unstructured-ingest 0.3.13__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of unstructured-ingest might be problematic. Click here for more details.

Files changed (557) hide show
  1. test/__init__.py +0 -0
  2. test/integration/__init__.py +0 -0
  3. test/integration/chunkers/__init__.py +0 -0
  4. test/integration/chunkers/test_chunkers.py +31 -0
  5. test/integration/connectors/__init__.py +0 -0
  6. test/integration/connectors/conftest.py +38 -0
  7. test/integration/connectors/databricks/__init__.py +0 -0
  8. test/integration/connectors/databricks/test_volumes_native.py +269 -0
  9. test/integration/connectors/discord/__init__.py +0 -0
  10. test/integration/connectors/discord/test_discord.py +90 -0
  11. test/integration/connectors/duckdb/__init__.py +0 -0
  12. test/integration/connectors/duckdb/conftest.py +14 -0
  13. test/integration/connectors/duckdb/test_duckdb.py +89 -0
  14. test/integration/connectors/duckdb/test_motherduck.py +95 -0
  15. test/integration/connectors/elasticsearch/__init__.py +0 -0
  16. test/integration/connectors/elasticsearch/conftest.py +34 -0
  17. test/integration/connectors/elasticsearch/test_elasticsearch.py +330 -0
  18. test/integration/connectors/elasticsearch/test_opensearch.py +325 -0
  19. test/integration/connectors/sql/__init__.py +0 -0
  20. test/integration/connectors/sql/test_postgres.py +195 -0
  21. test/integration/connectors/sql/test_singlestore.py +176 -0
  22. test/integration/connectors/sql/test_snowflake.py +238 -0
  23. test/integration/connectors/sql/test_sqlite.py +162 -0
  24. test/integration/connectors/test_astradb.py +217 -0
  25. test/integration/connectors/test_azure_ai_search.py +255 -0
  26. test/integration/connectors/test_chroma.py +120 -0
  27. test/integration/connectors/test_confluence.py +113 -0
  28. test/integration/connectors/test_delta_table.py +185 -0
  29. test/integration/connectors/test_lancedb.py +247 -0
  30. test/integration/connectors/test_milvus.py +203 -0
  31. test/integration/connectors/test_mongodb.py +335 -0
  32. test/integration/connectors/test_neo4j.py +236 -0
  33. test/integration/connectors/test_notion.py +145 -0
  34. test/integration/connectors/test_onedrive.py +118 -0
  35. test/integration/connectors/test_pinecone.py +288 -0
  36. test/integration/connectors/test_qdrant.py +215 -0
  37. test/integration/connectors/test_redis.py +119 -0
  38. test/integration/connectors/test_s3.py +183 -0
  39. test/integration/connectors/test_vectara.py +270 -0
  40. test/integration/connectors/utils/__init__.py +0 -0
  41. test/integration/connectors/utils/constants.py +7 -0
  42. test/integration/connectors/utils/docker.py +151 -0
  43. test/integration/connectors/utils/docker_compose.py +59 -0
  44. test/integration/connectors/utils/validation/__init__.py +0 -0
  45. test/integration/connectors/utils/validation/destination.py +75 -0
  46. test/integration/connectors/utils/validation/equality.py +75 -0
  47. test/integration/connectors/utils/validation/source.py +299 -0
  48. test/integration/connectors/utils/validation/utils.py +36 -0
  49. test/integration/connectors/weaviate/__init__.py +0 -0
  50. test/integration/connectors/weaviate/conftest.py +15 -0
  51. test/integration/connectors/weaviate/test_cloud.py +34 -0
  52. test/integration/connectors/weaviate/test_local.py +131 -0
  53. test/integration/embedders/__init__.py +0 -0
  54. test/integration/embedders/conftest.py +13 -0
  55. test/integration/embedders/test_azure_openai.py +59 -0
  56. test/integration/embedders/test_bedrock.py +103 -0
  57. test/integration/embedders/test_huggingface.py +26 -0
  58. test/integration/embedders/test_mixedbread.py +71 -0
  59. test/integration/embedders/test_octoai.py +77 -0
  60. test/integration/embedders/test_openai.py +76 -0
  61. test/integration/embedders/test_togetherai.py +71 -0
  62. test/integration/embedders/test_vertexai.py +65 -0
  63. test/integration/embedders/test_voyageai.py +65 -0
  64. test/integration/embedders/utils.py +68 -0
  65. test/integration/partitioners/__init__.py +0 -0
  66. test/integration/partitioners/test_partitioner.py +75 -0
  67. test/integration/utils.py +15 -0
  68. test/unit/__init__.py +0 -0
  69. test/unit/embed/__init__.py +0 -0
  70. test/unit/embed/test_mixedbreadai.py +42 -0
  71. test/unit/embed/test_octoai.py +27 -0
  72. test/unit/embed/test_openai.py +20 -0
  73. test/unit/embed/test_vertexai.py +25 -0
  74. test/unit/embed/test_voyageai.py +24 -0
  75. test/unit/test_error.py +27 -0
  76. test/unit/test_logger.py +78 -0
  77. test/unit/test_utils.py +184 -0
  78. test/unit/v2/__init__.py +0 -0
  79. test/unit/v2/chunkers/__init__.py +0 -0
  80. test/unit/v2/chunkers/test_chunkers.py +49 -0
  81. test/unit/v2/connectors/__init__.py +0 -0
  82. test/unit/v2/connectors/test_confluence.py +39 -0
  83. test/unit/v2/embedders/__init__.py +0 -0
  84. test/unit/v2/embedders/test_bedrock.py +36 -0
  85. test/unit/v2/embedders/test_huggingface.py +48 -0
  86. test/unit/v2/embedders/test_mixedbread.py +37 -0
  87. test/unit/v2/embedders/test_octoai.py +35 -0
  88. test/unit/v2/embedders/test_openai.py +35 -0
  89. test/unit/v2/embedders/test_togetherai.py +37 -0
  90. test/unit/v2/embedders/test_vertexai.py +37 -0
  91. test/unit/v2/embedders/test_voyageai.py +38 -0
  92. test/unit/v2/partitioners/__init__.py +0 -0
  93. test/unit/v2/partitioners/test_partitioner.py +63 -0
  94. test/unit/v2/test_interfaces.py +26 -0
  95. test/unit/v2/test_utils.py +82 -0
  96. test/unit/v2/utils/__init__.py +0 -0
  97. test/unit/v2/utils/data_generator.py +32 -0
  98. unstructured_ingest/__init__.py +1 -0
  99. unstructured_ingest/__version__.py +1 -0
  100. unstructured_ingest/cli/__init__.py +14 -0
  101. unstructured_ingest/cli/base/__init__.py +0 -0
  102. unstructured_ingest/cli/base/cmd.py +19 -0
  103. unstructured_ingest/cli/base/dest.py +87 -0
  104. unstructured_ingest/cli/base/src.py +57 -0
  105. unstructured_ingest/cli/cli.py +37 -0
  106. unstructured_ingest/cli/cmd_factory.py +12 -0
  107. unstructured_ingest/cli/cmds/__init__.py +145 -0
  108. unstructured_ingest/cli/cmds/airtable.py +69 -0
  109. unstructured_ingest/cli/cmds/astradb.py +99 -0
  110. unstructured_ingest/cli/cmds/azure_ai_search.py +65 -0
  111. unstructured_ingest/cli/cmds/biomed.py +52 -0
  112. unstructured_ingest/cli/cmds/chroma.py +104 -0
  113. unstructured_ingest/cli/cmds/clarifai.py +71 -0
  114. unstructured_ingest/cli/cmds/confluence.py +69 -0
  115. unstructured_ingest/cli/cmds/databricks_volumes.py +163 -0
  116. unstructured_ingest/cli/cmds/delta_table.py +94 -0
  117. unstructured_ingest/cli/cmds/discord.py +47 -0
  118. unstructured_ingest/cli/cmds/elasticsearch.py +133 -0
  119. unstructured_ingest/cli/cmds/fsspec/__init__.py +0 -0
  120. unstructured_ingest/cli/cmds/fsspec/azure.py +94 -0
  121. unstructured_ingest/cli/cmds/fsspec/box.py +48 -0
  122. unstructured_ingest/cli/cmds/fsspec/dropbox.py +51 -0
  123. unstructured_ingest/cli/cmds/fsspec/fsspec.py +15 -0
  124. unstructured_ingest/cli/cmds/fsspec/gcs.py +71 -0
  125. unstructured_ingest/cli/cmds/fsspec/s3.py +74 -0
  126. unstructured_ingest/cli/cmds/fsspec/sftp.py +58 -0
  127. unstructured_ingest/cli/cmds/github.py +54 -0
  128. unstructured_ingest/cli/cmds/gitlab.py +54 -0
  129. unstructured_ingest/cli/cmds/google_drive.py +49 -0
  130. unstructured_ingest/cli/cmds/hubspot.py +70 -0
  131. unstructured_ingest/cli/cmds/jira.py +71 -0
  132. unstructured_ingest/cli/cmds/kafka.py +102 -0
  133. unstructured_ingest/cli/cmds/local.py +43 -0
  134. unstructured_ingest/cli/cmds/mongodb.py +72 -0
  135. unstructured_ingest/cli/cmds/notion.py +48 -0
  136. unstructured_ingest/cli/cmds/onedrive.py +66 -0
  137. unstructured_ingest/cli/cmds/opensearch.py +117 -0
  138. unstructured_ingest/cli/cmds/outlook.py +67 -0
  139. unstructured_ingest/cli/cmds/pinecone.py +71 -0
  140. unstructured_ingest/cli/cmds/qdrant.py +124 -0
  141. unstructured_ingest/cli/cmds/reddit.py +67 -0
  142. unstructured_ingest/cli/cmds/salesforce.py +58 -0
  143. unstructured_ingest/cli/cmds/sharepoint.py +66 -0
  144. unstructured_ingest/cli/cmds/slack.py +56 -0
  145. unstructured_ingest/cli/cmds/sql.py +66 -0
  146. unstructured_ingest/cli/cmds/vectara.py +66 -0
  147. unstructured_ingest/cli/cmds/weaviate.py +98 -0
  148. unstructured_ingest/cli/cmds/wikipedia.py +40 -0
  149. unstructured_ingest/cli/common.py +7 -0
  150. unstructured_ingest/cli/interfaces.py +663 -0
  151. unstructured_ingest/cli/utils.py +205 -0
  152. unstructured_ingest/connector/__init__.py +0 -0
  153. unstructured_ingest/connector/airtable.py +309 -0
  154. unstructured_ingest/connector/astradb.py +267 -0
  155. unstructured_ingest/connector/azure_ai_search.py +144 -0
  156. unstructured_ingest/connector/biomed.py +320 -0
  157. unstructured_ingest/connector/chroma.py +158 -0
  158. unstructured_ingest/connector/clarifai.py +122 -0
  159. unstructured_ingest/connector/confluence.py +285 -0
  160. unstructured_ingest/connector/databricks_volumes.py +137 -0
  161. unstructured_ingest/connector/delta_table.py +203 -0
  162. unstructured_ingest/connector/discord.py +180 -0
  163. unstructured_ingest/connector/elasticsearch.py +396 -0
  164. unstructured_ingest/connector/fsspec/__init__.py +0 -0
  165. unstructured_ingest/connector/fsspec/azure.py +78 -0
  166. unstructured_ingest/connector/fsspec/box.py +109 -0
  167. unstructured_ingest/connector/fsspec/dropbox.py +160 -0
  168. unstructured_ingest/connector/fsspec/fsspec.py +359 -0
  169. unstructured_ingest/connector/fsspec/gcs.py +82 -0
  170. unstructured_ingest/connector/fsspec/s3.py +62 -0
  171. unstructured_ingest/connector/fsspec/sftp.py +81 -0
  172. unstructured_ingest/connector/git.py +124 -0
  173. unstructured_ingest/connector/github.py +174 -0
  174. unstructured_ingest/connector/gitlab.py +142 -0
  175. unstructured_ingest/connector/google_drive.py +348 -0
  176. unstructured_ingest/connector/hubspot.py +278 -0
  177. unstructured_ingest/connector/jira.py +469 -0
  178. unstructured_ingest/connector/kafka.py +293 -0
  179. unstructured_ingest/connector/local.py +139 -0
  180. unstructured_ingest/connector/mongodb.py +284 -0
  181. unstructured_ingest/connector/notion/__init__.py +0 -0
  182. unstructured_ingest/connector/notion/client.py +248 -0
  183. unstructured_ingest/connector/notion/connector.py +469 -0
  184. unstructured_ingest/connector/notion/helpers.py +584 -0
  185. unstructured_ingest/connector/notion/interfaces.py +32 -0
  186. unstructured_ingest/connector/notion/types/__init__.py +0 -0
  187. unstructured_ingest/connector/notion/types/block.py +96 -0
  188. unstructured_ingest/connector/notion/types/blocks/__init__.py +63 -0
  189. unstructured_ingest/connector/notion/types/blocks/bookmark.py +40 -0
  190. unstructured_ingest/connector/notion/types/blocks/breadcrumb.py +21 -0
  191. unstructured_ingest/connector/notion/types/blocks/bulleted_list_item.py +31 -0
  192. unstructured_ingest/connector/notion/types/blocks/callout.py +94 -0
  193. unstructured_ingest/connector/notion/types/blocks/child_database.py +23 -0
  194. unstructured_ingest/connector/notion/types/blocks/child_page.py +23 -0
  195. unstructured_ingest/connector/notion/types/blocks/code.py +43 -0
  196. unstructured_ingest/connector/notion/types/blocks/column_list.py +35 -0
  197. unstructured_ingest/connector/notion/types/blocks/divider.py +22 -0
  198. unstructured_ingest/connector/notion/types/blocks/embed.py +36 -0
  199. unstructured_ingest/connector/notion/types/blocks/equation.py +23 -0
  200. unstructured_ingest/connector/notion/types/blocks/file.py +49 -0
  201. unstructured_ingest/connector/notion/types/blocks/heading.py +37 -0
  202. unstructured_ingest/connector/notion/types/blocks/image.py +21 -0
  203. unstructured_ingest/connector/notion/types/blocks/link_preview.py +24 -0
  204. unstructured_ingest/connector/notion/types/blocks/link_to_page.py +29 -0
  205. unstructured_ingest/connector/notion/types/blocks/numbered_list.py +29 -0
  206. unstructured_ingest/connector/notion/types/blocks/paragraph.py +31 -0
  207. unstructured_ingest/connector/notion/types/blocks/pdf.py +49 -0
  208. unstructured_ingest/connector/notion/types/blocks/quote.py +37 -0
  209. unstructured_ingest/connector/notion/types/blocks/synced_block.py +57 -0
  210. unstructured_ingest/connector/notion/types/blocks/table.py +63 -0
  211. unstructured_ingest/connector/notion/types/blocks/table_of_contents.py +23 -0
  212. unstructured_ingest/connector/notion/types/blocks/template.py +30 -0
  213. unstructured_ingest/connector/notion/types/blocks/todo.py +42 -0
  214. unstructured_ingest/connector/notion/types/blocks/toggle.py +37 -0
  215. unstructured_ingest/connector/notion/types/blocks/unsupported.py +20 -0
  216. unstructured_ingest/connector/notion/types/blocks/video.py +22 -0
  217. unstructured_ingest/connector/notion/types/database.py +73 -0
  218. unstructured_ingest/connector/notion/types/database_properties/__init__.py +106 -0
  219. unstructured_ingest/connector/notion/types/database_properties/checkbox.py +38 -0
  220. unstructured_ingest/connector/notion/types/database_properties/created_by.py +35 -0
  221. unstructured_ingest/connector/notion/types/database_properties/created_time.py +34 -0
  222. unstructured_ingest/connector/notion/types/database_properties/date.py +41 -0
  223. unstructured_ingest/connector/notion/types/database_properties/email.py +36 -0
  224. unstructured_ingest/connector/notion/types/database_properties/files.py +37 -0
  225. unstructured_ingest/connector/notion/types/database_properties/formula.py +49 -0
  226. unstructured_ingest/connector/notion/types/database_properties/last_edited_by.py +34 -0
  227. unstructured_ingest/connector/notion/types/database_properties/last_edited_time.py +34 -0
  228. unstructured_ingest/connector/notion/types/database_properties/multiselect.py +73 -0
  229. unstructured_ingest/connector/notion/types/database_properties/number.py +49 -0
  230. unstructured_ingest/connector/notion/types/database_properties/people.py +40 -0
  231. unstructured_ingest/connector/notion/types/database_properties/phone_number.py +36 -0
  232. unstructured_ingest/connector/notion/types/database_properties/relation.py +67 -0
  233. unstructured_ingest/connector/notion/types/database_properties/rich_text.py +43 -0
  234. unstructured_ingest/connector/notion/types/database_properties/rollup.py +56 -0
  235. unstructured_ingest/connector/notion/types/database_properties/select.py +68 -0
  236. unstructured_ingest/connector/notion/types/database_properties/status.py +80 -0
  237. unstructured_ingest/connector/notion/types/database_properties/title.py +37 -0
  238. unstructured_ingest/connector/notion/types/database_properties/unique_id.py +50 -0
  239. unstructured_ingest/connector/notion/types/database_properties/url.py +37 -0
  240. unstructured_ingest/connector/notion/types/database_properties/verification.py +78 -0
  241. unstructured_ingest/connector/notion/types/date.py +26 -0
  242. unstructured_ingest/connector/notion/types/file.py +51 -0
  243. unstructured_ingest/connector/notion/types/page.py +45 -0
  244. unstructured_ingest/connector/notion/types/parent.py +66 -0
  245. unstructured_ingest/connector/notion/types/rich_text.py +189 -0
  246. unstructured_ingest/connector/notion/types/user.py +76 -0
  247. unstructured_ingest/connector/onedrive.py +232 -0
  248. unstructured_ingest/connector/opensearch.py +218 -0
  249. unstructured_ingest/connector/outlook.py +285 -0
  250. unstructured_ingest/connector/pinecone.py +140 -0
  251. unstructured_ingest/connector/qdrant.py +144 -0
  252. unstructured_ingest/connector/reddit.py +166 -0
  253. unstructured_ingest/connector/registry.py +109 -0
  254. unstructured_ingest/connector/salesforce.py +301 -0
  255. unstructured_ingest/connector/sharepoint.py +573 -0
  256. unstructured_ingest/connector/slack.py +224 -0
  257. unstructured_ingest/connector/sql.py +199 -0
  258. unstructured_ingest/connector/vectara.py +253 -0
  259. unstructured_ingest/connector/weaviate.py +190 -0
  260. unstructured_ingest/connector/wikipedia.py +208 -0
  261. unstructured_ingest/embed/__init__.py +0 -0
  262. unstructured_ingest/embed/azure_openai.py +31 -0
  263. unstructured_ingest/embed/bedrock.py +193 -0
  264. unstructured_ingest/embed/huggingface.py +52 -0
  265. unstructured_ingest/embed/interfaces.py +117 -0
  266. unstructured_ingest/embed/mixedbreadai.py +233 -0
  267. unstructured_ingest/embed/octoai.py +130 -0
  268. unstructured_ingest/embed/openai.py +116 -0
  269. unstructured_ingest/embed/togetherai.py +106 -0
  270. unstructured_ingest/embed/vertexai.py +126 -0
  271. unstructured_ingest/embed/voyageai.py +130 -0
  272. unstructured_ingest/enhanced_dataclass/__init__.py +4 -0
  273. unstructured_ingest/enhanced_dataclass/core.py +99 -0
  274. unstructured_ingest/enhanced_dataclass/dataclasses.py +54 -0
  275. unstructured_ingest/enhanced_dataclass/json_mixin.py +125 -0
  276. unstructured_ingest/error.py +49 -0
  277. unstructured_ingest/ingest_backoff/__init__.py +3 -0
  278. unstructured_ingest/ingest_backoff/_common.py +102 -0
  279. unstructured_ingest/ingest_backoff/_wrapper.py +122 -0
  280. unstructured_ingest/interfaces.py +852 -0
  281. unstructured_ingest/logger.py +130 -0
  282. unstructured_ingest/main.py +11 -0
  283. unstructured_ingest/pipeline/__init__.py +22 -0
  284. unstructured_ingest/pipeline/copy.py +19 -0
  285. unstructured_ingest/pipeline/doc_factory.py +12 -0
  286. unstructured_ingest/pipeline/interfaces.py +270 -0
  287. unstructured_ingest/pipeline/partition.py +60 -0
  288. unstructured_ingest/pipeline/permissions.py +12 -0
  289. unstructured_ingest/pipeline/pipeline.py +117 -0
  290. unstructured_ingest/pipeline/reformat/__init__.py +0 -0
  291. unstructured_ingest/pipeline/reformat/chunking.py +134 -0
  292. unstructured_ingest/pipeline/reformat/embedding.py +64 -0
  293. unstructured_ingest/pipeline/source.py +77 -0
  294. unstructured_ingest/pipeline/utils.py +6 -0
  295. unstructured_ingest/pipeline/write.py +18 -0
  296. unstructured_ingest/processor.py +93 -0
  297. unstructured_ingest/runner/__init__.py +104 -0
  298. unstructured_ingest/runner/airtable.py +35 -0
  299. unstructured_ingest/runner/astradb.py +34 -0
  300. unstructured_ingest/runner/base_runner.py +89 -0
  301. unstructured_ingest/runner/biomed.py +45 -0
  302. unstructured_ingest/runner/confluence.py +35 -0
  303. unstructured_ingest/runner/delta_table.py +34 -0
  304. unstructured_ingest/runner/discord.py +35 -0
  305. unstructured_ingest/runner/elasticsearch.py +40 -0
  306. unstructured_ingest/runner/fsspec/__init__.py +0 -0
  307. unstructured_ingest/runner/fsspec/azure.py +30 -0
  308. unstructured_ingest/runner/fsspec/box.py +28 -0
  309. unstructured_ingest/runner/fsspec/dropbox.py +30 -0
  310. unstructured_ingest/runner/fsspec/fsspec.py +40 -0
  311. unstructured_ingest/runner/fsspec/gcs.py +28 -0
  312. unstructured_ingest/runner/fsspec/s3.py +28 -0
  313. unstructured_ingest/runner/fsspec/sftp.py +28 -0
  314. unstructured_ingest/runner/github.py +37 -0
  315. unstructured_ingest/runner/gitlab.py +37 -0
  316. unstructured_ingest/runner/google_drive.py +35 -0
  317. unstructured_ingest/runner/hubspot.py +35 -0
  318. unstructured_ingest/runner/jira.py +35 -0
  319. unstructured_ingest/runner/kafka.py +34 -0
  320. unstructured_ingest/runner/local.py +23 -0
  321. unstructured_ingest/runner/mongodb.py +34 -0
  322. unstructured_ingest/runner/notion.py +61 -0
  323. unstructured_ingest/runner/onedrive.py +35 -0
  324. unstructured_ingest/runner/opensearch.py +40 -0
  325. unstructured_ingest/runner/outlook.py +33 -0
  326. unstructured_ingest/runner/reddit.py +35 -0
  327. unstructured_ingest/runner/salesforce.py +33 -0
  328. unstructured_ingest/runner/sharepoint.py +35 -0
  329. unstructured_ingest/runner/slack.py +33 -0
  330. unstructured_ingest/runner/utils.py +47 -0
  331. unstructured_ingest/runner/wikipedia.py +35 -0
  332. unstructured_ingest/runner/writers/__init__.py +48 -0
  333. unstructured_ingest/runner/writers/astradb.py +22 -0
  334. unstructured_ingest/runner/writers/azure_ai_search.py +24 -0
  335. unstructured_ingest/runner/writers/base_writer.py +26 -0
  336. unstructured_ingest/runner/writers/chroma.py +22 -0
  337. unstructured_ingest/runner/writers/clarifai.py +19 -0
  338. unstructured_ingest/runner/writers/databricks_volumes.py +25 -0
  339. unstructured_ingest/runner/writers/delta_table.py +24 -0
  340. unstructured_ingest/runner/writers/elasticsearch.py +24 -0
  341. unstructured_ingest/runner/writers/fsspec/__init__.py +0 -0
  342. unstructured_ingest/runner/writers/fsspec/azure.py +24 -0
  343. unstructured_ingest/runner/writers/fsspec/box.py +21 -0
  344. unstructured_ingest/runner/writers/fsspec/dropbox.py +21 -0
  345. unstructured_ingest/runner/writers/fsspec/gcs.py +19 -0
  346. unstructured_ingest/runner/writers/fsspec/s3.py +21 -0
  347. unstructured_ingest/runner/writers/kafka.py +21 -0
  348. unstructured_ingest/runner/writers/mongodb.py +21 -0
  349. unstructured_ingest/runner/writers/opensearch.py +26 -0
  350. unstructured_ingest/runner/writers/pinecone.py +21 -0
  351. unstructured_ingest/runner/writers/qdrant.py +19 -0
  352. unstructured_ingest/runner/writers/sql.py +22 -0
  353. unstructured_ingest/runner/writers/vectara.py +22 -0
  354. unstructured_ingest/runner/writers/weaviate.py +21 -0
  355. unstructured_ingest/utils/__init__.py +0 -0
  356. unstructured_ingest/utils/chunking.py +56 -0
  357. unstructured_ingest/utils/compression.py +118 -0
  358. unstructured_ingest/utils/data_prep.py +200 -0
  359. unstructured_ingest/utils/dep_check.py +78 -0
  360. unstructured_ingest/utils/google_filetype.py +9 -0
  361. unstructured_ingest/utils/string_and_date_utils.py +49 -0
  362. unstructured_ingest/utils/table.py +73 -0
  363. unstructured_ingest/v2/__init__.py +1 -0
  364. unstructured_ingest/v2/cli/__init__.py +0 -0
  365. unstructured_ingest/v2/cli/base/__init__.py +4 -0
  366. unstructured_ingest/v2/cli/base/cmd.py +269 -0
  367. unstructured_ingest/v2/cli/base/dest.py +85 -0
  368. unstructured_ingest/v2/cli/base/importer.py +34 -0
  369. unstructured_ingest/v2/cli/base/src.py +85 -0
  370. unstructured_ingest/v2/cli/cli.py +24 -0
  371. unstructured_ingest/v2/cli/cmds.py +14 -0
  372. unstructured_ingest/v2/cli/utils/__init__.py +0 -0
  373. unstructured_ingest/v2/cli/utils/click.py +237 -0
  374. unstructured_ingest/v2/cli/utils/model_conversion.py +222 -0
  375. unstructured_ingest/v2/constants.py +2 -0
  376. unstructured_ingest/v2/errors.py +18 -0
  377. unstructured_ingest/v2/interfaces/__init__.py +32 -0
  378. unstructured_ingest/v2/interfaces/connector.py +50 -0
  379. unstructured_ingest/v2/interfaces/downloader.py +89 -0
  380. unstructured_ingest/v2/interfaces/file_data.py +116 -0
  381. unstructured_ingest/v2/interfaces/indexer.py +30 -0
  382. unstructured_ingest/v2/interfaces/process.py +19 -0
  383. unstructured_ingest/v2/interfaces/processor.py +88 -0
  384. unstructured_ingest/v2/interfaces/upload_stager.py +102 -0
  385. unstructured_ingest/v2/interfaces/uploader.py +53 -0
  386. unstructured_ingest/v2/logger.py +126 -0
  387. unstructured_ingest/v2/main.py +11 -0
  388. unstructured_ingest/v2/otel.py +111 -0
  389. unstructured_ingest/v2/pipeline/__init__.py +0 -0
  390. unstructured_ingest/v2/pipeline/interfaces.py +211 -0
  391. unstructured_ingest/v2/pipeline/otel.py +32 -0
  392. unstructured_ingest/v2/pipeline/pipeline.py +384 -0
  393. unstructured_ingest/v2/pipeline/steps/__init__.py +0 -0
  394. unstructured_ingest/v2/pipeline/steps/chunk.py +80 -0
  395. unstructured_ingest/v2/pipeline/steps/download.py +207 -0
  396. unstructured_ingest/v2/pipeline/steps/embed.py +79 -0
  397. unstructured_ingest/v2/pipeline/steps/filter.py +35 -0
  398. unstructured_ingest/v2/pipeline/steps/index.py +86 -0
  399. unstructured_ingest/v2/pipeline/steps/partition.py +79 -0
  400. unstructured_ingest/v2/pipeline/steps/stage.py +65 -0
  401. unstructured_ingest/v2/pipeline/steps/uncompress.py +50 -0
  402. unstructured_ingest/v2/pipeline/steps/upload.py +58 -0
  403. unstructured_ingest/v2/processes/__init__.py +18 -0
  404. unstructured_ingest/v2/processes/chunker.py +124 -0
  405. unstructured_ingest/v2/processes/connector_registry.py +69 -0
  406. unstructured_ingest/v2/processes/connectors/__init__.py +117 -0
  407. unstructured_ingest/v2/processes/connectors/airtable.py +235 -0
  408. unstructured_ingest/v2/processes/connectors/astradb.py +402 -0
  409. unstructured_ingest/v2/processes/connectors/azure_ai_search.py +276 -0
  410. unstructured_ingest/v2/processes/connectors/chroma.py +190 -0
  411. unstructured_ingest/v2/processes/connectors/confluence.py +207 -0
  412. unstructured_ingest/v2/processes/connectors/couchbase.py +334 -0
  413. unstructured_ingest/v2/processes/connectors/databricks/__init__.py +52 -0
  414. unstructured_ingest/v2/processes/connectors/databricks/volumes.py +208 -0
  415. unstructured_ingest/v2/processes/connectors/databricks/volumes_aws.py +87 -0
  416. unstructured_ingest/v2/processes/connectors/databricks/volumes_azure.py +102 -0
  417. unstructured_ingest/v2/processes/connectors/databricks/volumes_gcp.py +85 -0
  418. unstructured_ingest/v2/processes/connectors/databricks/volumes_native.py +86 -0
  419. unstructured_ingest/v2/processes/connectors/delta_table.py +191 -0
  420. unstructured_ingest/v2/processes/connectors/discord.py +158 -0
  421. unstructured_ingest/v2/processes/connectors/duckdb/__init__.py +15 -0
  422. unstructured_ingest/v2/processes/connectors/duckdb/base.py +100 -0
  423. unstructured_ingest/v2/processes/connectors/duckdb/duckdb.py +127 -0
  424. unstructured_ingest/v2/processes/connectors/duckdb/motherduck.py +126 -0
  425. unstructured_ingest/v2/processes/connectors/elasticsearch/__init__.py +19 -0
  426. unstructured_ingest/v2/processes/connectors/elasticsearch/elasticsearch.py +470 -0
  427. unstructured_ingest/v2/processes/connectors/elasticsearch/opensearch.py +195 -0
  428. unstructured_ingest/v2/processes/connectors/fsspec/__init__.py +37 -0
  429. unstructured_ingest/v2/processes/connectors/fsspec/azure.py +197 -0
  430. unstructured_ingest/v2/processes/connectors/fsspec/box.py +170 -0
  431. unstructured_ingest/v2/processes/connectors/fsspec/dropbox.py +168 -0
  432. unstructured_ingest/v2/processes/connectors/fsspec/fsspec.py +332 -0
  433. unstructured_ingest/v2/processes/connectors/fsspec/gcs.py +197 -0
  434. unstructured_ingest/v2/processes/connectors/fsspec/s3.py +185 -0
  435. unstructured_ingest/v2/processes/connectors/fsspec/sftp.py +171 -0
  436. unstructured_ingest/v2/processes/connectors/fsspec/utils.py +17 -0
  437. unstructured_ingest/v2/processes/connectors/gitlab.py +268 -0
  438. unstructured_ingest/v2/processes/connectors/google_drive.py +348 -0
  439. unstructured_ingest/v2/processes/connectors/kafka/__init__.py +17 -0
  440. unstructured_ingest/v2/processes/connectors/kafka/cloud.py +121 -0
  441. unstructured_ingest/v2/processes/connectors/kafka/kafka.py +273 -0
  442. unstructured_ingest/v2/processes/connectors/kafka/local.py +103 -0
  443. unstructured_ingest/v2/processes/connectors/kdbai.py +148 -0
  444. unstructured_ingest/v2/processes/connectors/lancedb/__init__.py +30 -0
  445. unstructured_ingest/v2/processes/connectors/lancedb/aws.py +43 -0
  446. unstructured_ingest/v2/processes/connectors/lancedb/azure.py +43 -0
  447. unstructured_ingest/v2/processes/connectors/lancedb/cloud.py +42 -0
  448. unstructured_ingest/v2/processes/connectors/lancedb/gcp.py +44 -0
  449. unstructured_ingest/v2/processes/connectors/lancedb/lancedb.py +169 -0
  450. unstructured_ingest/v2/processes/connectors/lancedb/local.py +44 -0
  451. unstructured_ingest/v2/processes/connectors/local.py +217 -0
  452. unstructured_ingest/v2/processes/connectors/milvus.py +225 -0
  453. unstructured_ingest/v2/processes/connectors/mongodb.py +361 -0
  454. unstructured_ingest/v2/processes/connectors/neo4j.py +385 -0
  455. unstructured_ingest/v2/processes/connectors/notion/__init__.py +0 -0
  456. unstructured_ingest/v2/processes/connectors/notion/client.py +349 -0
  457. unstructured_ingest/v2/processes/connectors/notion/connector.py +346 -0
  458. unstructured_ingest/v2/processes/connectors/notion/helpers.py +448 -0
  459. unstructured_ingest/v2/processes/connectors/notion/interfaces.py +32 -0
  460. unstructured_ingest/v2/processes/connectors/notion/types/__init__.py +0 -0
  461. unstructured_ingest/v2/processes/connectors/notion/types/block.py +96 -0
  462. unstructured_ingest/v2/processes/connectors/notion/types/blocks/__init__.py +63 -0
  463. unstructured_ingest/v2/processes/connectors/notion/types/blocks/bookmark.py +40 -0
  464. unstructured_ingest/v2/processes/connectors/notion/types/blocks/breadcrumb.py +21 -0
  465. unstructured_ingest/v2/processes/connectors/notion/types/blocks/bulleted_list_item.py +31 -0
  466. unstructured_ingest/v2/processes/connectors/notion/types/blocks/callout.py +94 -0
  467. unstructured_ingest/v2/processes/connectors/notion/types/blocks/child_database.py +23 -0
  468. unstructured_ingest/v2/processes/connectors/notion/types/blocks/child_page.py +23 -0
  469. unstructured_ingest/v2/processes/connectors/notion/types/blocks/code.py +43 -0
  470. unstructured_ingest/v2/processes/connectors/notion/types/blocks/column_list.py +35 -0
  471. unstructured_ingest/v2/processes/connectors/notion/types/blocks/divider.py +22 -0
  472. unstructured_ingest/v2/processes/connectors/notion/types/blocks/embed.py +36 -0
  473. unstructured_ingest/v2/processes/connectors/notion/types/blocks/equation.py +23 -0
  474. unstructured_ingest/v2/processes/connectors/notion/types/blocks/file.py +49 -0
  475. unstructured_ingest/v2/processes/connectors/notion/types/blocks/heading.py +37 -0
  476. unstructured_ingest/v2/processes/connectors/notion/types/blocks/image.py +21 -0
  477. unstructured_ingest/v2/processes/connectors/notion/types/blocks/link_preview.py +24 -0
  478. unstructured_ingest/v2/processes/connectors/notion/types/blocks/link_to_page.py +29 -0
  479. unstructured_ingest/v2/processes/connectors/notion/types/blocks/numbered_list.py +29 -0
  480. unstructured_ingest/v2/processes/connectors/notion/types/blocks/paragraph.py +31 -0
  481. unstructured_ingest/v2/processes/connectors/notion/types/blocks/pdf.py +49 -0
  482. unstructured_ingest/v2/processes/connectors/notion/types/blocks/quote.py +37 -0
  483. unstructured_ingest/v2/processes/connectors/notion/types/blocks/synced_block.py +57 -0
  484. unstructured_ingest/v2/processes/connectors/notion/types/blocks/table.py +63 -0
  485. unstructured_ingest/v2/processes/connectors/notion/types/blocks/table_of_contents.py +23 -0
  486. unstructured_ingest/v2/processes/connectors/notion/types/blocks/template.py +30 -0
  487. unstructured_ingest/v2/processes/connectors/notion/types/blocks/todo.py +42 -0
  488. unstructured_ingest/v2/processes/connectors/notion/types/blocks/toggle.py +37 -0
  489. unstructured_ingest/v2/processes/connectors/notion/types/blocks/unsupported.py +20 -0
  490. unstructured_ingest/v2/processes/connectors/notion/types/blocks/video.py +22 -0
  491. unstructured_ingest/v2/processes/connectors/notion/types/database.py +73 -0
  492. unstructured_ingest/v2/processes/connectors/notion/types/database_properties/__init__.py +106 -0
  493. unstructured_ingest/v2/processes/connectors/notion/types/database_properties/checkbox.py +38 -0
  494. unstructured_ingest/v2/processes/connectors/notion/types/database_properties/created_by.py +35 -0
  495. unstructured_ingest/v2/processes/connectors/notion/types/database_properties/created_time.py +34 -0
  496. unstructured_ingest/v2/processes/connectors/notion/types/database_properties/date.py +41 -0
  497. unstructured_ingest/v2/processes/connectors/notion/types/database_properties/email.py +36 -0
  498. unstructured_ingest/v2/processes/connectors/notion/types/database_properties/files.py +37 -0
  499. unstructured_ingest/v2/processes/connectors/notion/types/database_properties/formula.py +49 -0
  500. unstructured_ingest/v2/processes/connectors/notion/types/database_properties/last_edited_by.py +34 -0
  501. unstructured_ingest/v2/processes/connectors/notion/types/database_properties/last_edited_time.py +34 -0
  502. unstructured_ingest/v2/processes/connectors/notion/types/database_properties/multiselect.py +73 -0
  503. unstructured_ingest/v2/processes/connectors/notion/types/database_properties/number.py +49 -0
  504. unstructured_ingest/v2/processes/connectors/notion/types/database_properties/people.py +41 -0
  505. unstructured_ingest/v2/processes/connectors/notion/types/database_properties/phone_number.py +36 -0
  506. unstructured_ingest/v2/processes/connectors/notion/types/database_properties/relation.py +67 -0
  507. unstructured_ingest/v2/processes/connectors/notion/types/database_properties/rich_text.py +43 -0
  508. unstructured_ingest/v2/processes/connectors/notion/types/database_properties/rollup.py +56 -0
  509. unstructured_ingest/v2/processes/connectors/notion/types/database_properties/select.py +69 -0
  510. unstructured_ingest/v2/processes/connectors/notion/types/database_properties/status.py +81 -0
  511. unstructured_ingest/v2/processes/connectors/notion/types/database_properties/title.py +37 -0
  512. unstructured_ingest/v2/processes/connectors/notion/types/database_properties/unique_id.py +50 -0
  513. unstructured_ingest/v2/processes/connectors/notion/types/database_properties/url.py +37 -0
  514. unstructured_ingest/v2/processes/connectors/notion/types/database_properties/verification.py +78 -0
  515. unstructured_ingest/v2/processes/connectors/notion/types/date.py +29 -0
  516. unstructured_ingest/v2/processes/connectors/notion/types/file.py +54 -0
  517. unstructured_ingest/v2/processes/connectors/notion/types/page.py +45 -0
  518. unstructured_ingest/v2/processes/connectors/notion/types/parent.py +66 -0
  519. unstructured_ingest/v2/processes/connectors/notion/types/rich_text.py +189 -0
  520. unstructured_ingest/v2/processes/connectors/notion/types/user.py +79 -0
  521. unstructured_ingest/v2/processes/connectors/onedrive.py +447 -0
  522. unstructured_ingest/v2/processes/connectors/outlook.py +239 -0
  523. unstructured_ingest/v2/processes/connectors/pinecone.py +277 -0
  524. unstructured_ingest/v2/processes/connectors/qdrant/__init__.py +16 -0
  525. unstructured_ingest/v2/processes/connectors/qdrant/cloud.py +59 -0
  526. unstructured_ingest/v2/processes/connectors/qdrant/local.py +58 -0
  527. unstructured_ingest/v2/processes/connectors/qdrant/qdrant.py +160 -0
  528. unstructured_ingest/v2/processes/connectors/qdrant/server.py +60 -0
  529. unstructured_ingest/v2/processes/connectors/redisdb.py +182 -0
  530. unstructured_ingest/v2/processes/connectors/salesforce.py +303 -0
  531. unstructured_ingest/v2/processes/connectors/sharepoint.py +448 -0
  532. unstructured_ingest/v2/processes/connectors/slack.py +248 -0
  533. unstructured_ingest/v2/processes/connectors/sql/__init__.py +27 -0
  534. unstructured_ingest/v2/processes/connectors/sql/postgres.py +162 -0
  535. unstructured_ingest/v2/processes/connectors/sql/singlestore.py +166 -0
  536. unstructured_ingest/v2/processes/connectors/sql/snowflake.py +210 -0
  537. unstructured_ingest/v2/processes/connectors/sql/sql.py +434 -0
  538. unstructured_ingest/v2/processes/connectors/sql/sqlite.py +168 -0
  539. unstructured_ingest/v2/processes/connectors/utils.py +29 -0
  540. unstructured_ingest/v2/processes/connectors/vectara.py +350 -0
  541. unstructured_ingest/v2/processes/connectors/weaviate/__init__.py +22 -0
  542. unstructured_ingest/v2/processes/connectors/weaviate/cloud.py +165 -0
  543. unstructured_ingest/v2/processes/connectors/weaviate/embedded.py +90 -0
  544. unstructured_ingest/v2/processes/connectors/weaviate/local.py +73 -0
  545. unstructured_ingest/v2/processes/connectors/weaviate/weaviate.py +267 -0
  546. unstructured_ingest/v2/processes/embedder.py +195 -0
  547. unstructured_ingest/v2/processes/filter.py +60 -0
  548. unstructured_ingest/v2/processes/partitioner.py +188 -0
  549. unstructured_ingest/v2/processes/uncompress.py +61 -0
  550. unstructured_ingest/v2/unstructured_api.py +128 -0
  551. unstructured_ingest/v2/utils.py +61 -0
  552. unstructured_ingest-0.3.13.dist-info/LICENSE.md +201 -0
  553. unstructured_ingest-0.3.13.dist-info/METADATA +205 -0
  554. unstructured_ingest-0.3.13.dist-info/RECORD +557 -0
  555. unstructured_ingest-0.3.13.dist-info/WHEEL +5 -0
  556. unstructured_ingest-0.3.13.dist-info/entry_points.txt +2 -0
  557. unstructured_ingest-0.3.13.dist-info/top_level.txt +2 -0
@@ -0,0 +1,37 @@
1
+ # https://developers.notion.com/reference/property-object#files
2
+ from dataclasses import dataclass, field
3
+ from typing import List, Optional
4
+
5
+ from htmlBuilder.tags import Div, HtmlTag
6
+
7
+ from unstructured_ingest.connector.notion.interfaces import DBCellBase, DBPropertyBase
8
+ from unstructured_ingest.connector.notion.types.file import FileObject
9
+
10
+
11
+ @dataclass
12
+ class Files(DBPropertyBase):
13
+ id: str
14
+ name: str
15
+ type: str = "files"
16
+ files: dict = field(default_factory=dict)
17
+
18
+ @classmethod
19
+ def from_dict(cls, data: dict):
20
+ return cls(**data)
21
+
22
+
23
+ @dataclass
24
+ class FilesCell(DBCellBase):
25
+ id: str
26
+ files: List[FileObject]
27
+ type: str = "files"
28
+ name: Optional[str] = None
29
+
30
+ @classmethod
31
+ def from_dict(cls, data: dict):
32
+ return cls(files=[FileObject.from_dict(f) for f in data.pop("files", [])], **data)
33
+
34
+ def get_html(self) -> Optional[HtmlTag]:
35
+ if not self.files:
36
+ return None
37
+ return Div([], [f.get_html() for f in self.files])
@@ -0,0 +1,49 @@
1
+ # https://developers.notion.com/reference/property-object#formula
2
+ from dataclasses import dataclass
3
+ from typing import Optional
4
+
5
+ from htmlBuilder.tags import Div, HtmlTag
6
+
7
+ from unstructured_ingest.connector.notion.interfaces import (
8
+ DBCellBase,
9
+ DBPropertyBase,
10
+ FromJSONMixin,
11
+ )
12
+
13
+
14
+ @dataclass
15
+ class FormulaProp(FromJSONMixin):
16
+ expression: str
17
+
18
+ @classmethod
19
+ def from_dict(cls, data: dict):
20
+ return cls(**data)
21
+
22
+
23
+ @dataclass
24
+ class Formula(DBPropertyBase):
25
+ id: str
26
+ name: str
27
+ formula: FormulaProp
28
+ type: str = "formula"
29
+
30
+ @classmethod
31
+ def from_dict(cls, data: dict):
32
+ return cls(formula=FormulaProp.from_dict(data.pop("formula", {})), **data)
33
+
34
+
35
+ @dataclass
36
+ class FormulaCell(DBCellBase):
37
+ id: str
38
+ formula: dict
39
+ type: str = "formula"
40
+ name: Optional[str] = None
41
+
42
+ @classmethod
43
+ def from_dict(cls, data: dict):
44
+ return cls(**data)
45
+
46
+ def get_html(self) -> Optional[HtmlTag]:
47
+ formula = self.formula
48
+ t = formula.get("type")
49
+ return Div([], str(formula[t]))
@@ -0,0 +1,34 @@
1
+ # https://developers.notion.com/reference/property-object#last-edited-by
2
+ from dataclasses import dataclass
3
+ from typing import Optional
4
+
5
+ from htmlBuilder.tags import HtmlTag
6
+
7
+ from unstructured_ingest.connector.notion.interfaces import DBCellBase, DBPropertyBase
8
+ from unstructured_ingest.connector.notion.types.user import People
9
+
10
+
11
+ @dataclass
12
+ class LastEditedBy(DBPropertyBase):
13
+ @classmethod
14
+ def from_dict(cls, data: dict):
15
+ return cls()
16
+
17
+ def get_text(self) -> Optional[str]:
18
+ return None
19
+
20
+
21
+ @dataclass
22
+ class LastEditedByCell(DBCellBase):
23
+ id: str
24
+ last_edited_by: People
25
+ type: str = "last_edited_by"
26
+
27
+ name: Optional[str] = None
28
+
29
+ @classmethod
30
+ def from_dict(cls, data: dict):
31
+ return cls(last_edited_by=People.from_dict(data.pop("last_edited_by", {})), **data)
32
+
33
+ def get_html(self) -> Optional[HtmlTag]:
34
+ return self.last_edited_by.get_html()
@@ -0,0 +1,34 @@
1
+ # https://developers.notion.com/reference/property-object#last-edited-time
2
+ from dataclasses import dataclass, field
3
+ from typing import Optional
4
+
5
+ from htmlBuilder.tags import Div, HtmlTag
6
+
7
+ from unstructured_ingest.connector.notion.interfaces import DBCellBase, DBPropertyBase
8
+
9
+
10
+ @dataclass
11
+ class LastEditedTime(DBPropertyBase):
12
+ id: str
13
+ name: str
14
+ type: str = "last_edited_time"
15
+ last_edited_time: dict = field(default_factory=dict)
16
+
17
+ @classmethod
18
+ def from_dict(cls, data: dict):
19
+ return cls(**data)
20
+
21
+
22
+ @dataclass
23
+ class LastEditedTimeCell(DBCellBase):
24
+ id: str
25
+ last_edited_time: str
26
+ type: str = "last_edited_time"
27
+ name: Optional[str] = None
28
+
29
+ @classmethod
30
+ def from_dict(cls, data: dict):
31
+ return cls(**data)
32
+
33
+ def get_html(self) -> Optional[HtmlTag]:
34
+ return Div([], self.last_edited_time)
@@ -0,0 +1,73 @@
1
+ # https://developers.notion.com/reference/property-object#multi-select
2
+ from dataclasses import dataclass, field
3
+ from typing import List, Optional
4
+
5
+ from htmlBuilder.attributes import Style
6
+ from htmlBuilder.tags import Div, HtmlTag, Span
7
+
8
+ from unstructured_ingest.connector.notion.interfaces import (
9
+ DBCellBase,
10
+ DBPropertyBase,
11
+ FromJSONMixin,
12
+ )
13
+
14
+
15
+ @dataclass
16
+ class MultiSelectOption(FromJSONMixin):
17
+ color: str
18
+ id: str
19
+ name: str
20
+
21
+ @classmethod
22
+ def from_dict(cls, data: dict):
23
+ return cls(**data)
24
+
25
+
26
+ @dataclass
27
+ class MultiSelectProp(FromJSONMixin):
28
+ options: List[MultiSelectOption] = field(default_factory=list)
29
+
30
+ @classmethod
31
+ def from_dict(cls, data: dict):
32
+ return cls(options=[MultiSelectOption.from_dict(o) for o in data.get("options", [])])
33
+
34
+
35
+ @dataclass
36
+ class MultiSelect(DBPropertyBase):
37
+ id: str
38
+ name: str
39
+ multi_select: MultiSelectProp
40
+ type: str = "multi_select"
41
+
42
+ @classmethod
43
+ def from_dict(cls, data: dict):
44
+ return cls(
45
+ multi_select=data.pop("multi_select", {}),
46
+ **data,
47
+ )
48
+
49
+
50
+ @dataclass
51
+ class MultiSelectCell(DBCellBase):
52
+ id: str
53
+ multi_select: List[MultiSelectOption]
54
+ type: str = "multi_select"
55
+ name: Optional[str] = None
56
+
57
+ @classmethod
58
+ def from_dict(cls, data: dict):
59
+ return cls(
60
+ multi_select=[MultiSelectOption.from_dict(o) for o in data.pop("multi_select", [])],
61
+ **data,
62
+ )
63
+
64
+ def get_html(self) -> Optional[HtmlTag]:
65
+ if not self.multi_select:
66
+ return None
67
+ option_spans = []
68
+ for option in self.multi_select:
69
+ option_attributes = []
70
+ if option.color and option.color != "default":
71
+ option_attributes.append(Style(f"color: {option.color}"))
72
+ option_spans.append(Span(option_attributes, option.name))
73
+ return Div([], option_spans)
@@ -0,0 +1,49 @@
1
+ # https://developers.notion.com/reference/property-object#number
2
+ from dataclasses import dataclass
3
+ from typing import Optional
4
+
5
+ from htmlBuilder.tags import Div, HtmlTag
6
+
7
+ from unstructured_ingest.connector.notion.interfaces import (
8
+ DBCellBase,
9
+ DBPropertyBase,
10
+ FromJSONMixin,
11
+ )
12
+
13
+
14
+ @dataclass
15
+ class NumberProp(FromJSONMixin):
16
+ format: str
17
+
18
+ @classmethod
19
+ def from_dict(cls, data: dict):
20
+ return cls(**data)
21
+
22
+
23
+ @dataclass
24
+ class Number(DBPropertyBase):
25
+ id: str
26
+ name: str
27
+ number: NumberProp
28
+ type: str = "number"
29
+
30
+ @classmethod
31
+ def from_dict(cls, data: dict):
32
+ return cls(number=NumberProp.from_dict(data.pop("number")), **data)
33
+
34
+
35
+ @dataclass
36
+ class NumberCell(DBCellBase):
37
+ id: str
38
+ number: Optional[int] = None
39
+ type: str = "number"
40
+ name: Optional[str] = None
41
+
42
+ @classmethod
43
+ def from_dict(cls, data: dict):
44
+ return cls(**data)
45
+
46
+ def get_html(self) -> Optional[HtmlTag]:
47
+ if number := self.number:
48
+ return Div([], str(number))
49
+ return None
@@ -0,0 +1,41 @@
1
+ # https://developers.notion.com/reference/property-object#people
2
+ from dataclasses import dataclass, field
3
+ from typing import List, Optional
4
+
5
+ from htmlBuilder.tags import Div, HtmlTag, Span
6
+
7
+ from unstructured_ingest.connector.notion.interfaces import DBCellBase, DBPropertyBase
8
+ from unstructured_ingest.connector.notion.types.user import People as PeopleType
9
+
10
+
11
+ @dataclass
12
+ class People(DBPropertyBase):
13
+ id: str
14
+ name: str
15
+ description: Optional[str] = None
16
+ type: str = "people"
17
+ people: dict = field(default_factory=dict)
18
+
19
+ @classmethod
20
+ def from_dict(cls, data: dict):
21
+ return cls(**data)
22
+
23
+
24
+ @dataclass
25
+ class PeopleCell(DBCellBase):
26
+ id: str
27
+ people: List[PeopleType]
28
+ type: str = "people"
29
+ name: Optional[str] = None
30
+
31
+ @classmethod
32
+ def from_dict(cls, data: dict):
33
+ return cls(people=[PeopleType.from_dict(p) for p in data.pop("people", {})], **data)
34
+
35
+ def get_html(self) -> Optional[HtmlTag]:
36
+ if not self.people:
37
+ return None
38
+ people_spans = []
39
+ for person in self.people:
40
+ people_spans.append(Span([], person.get_html()))
41
+ return Div([], people_spans)
@@ -0,0 +1,36 @@
1
+ # https://developers.notion.com/reference/property-object#phone-number
2
+ from dataclasses import dataclass, field
3
+ from typing import Optional
4
+
5
+ from htmlBuilder.tags import Div, HtmlTag
6
+
7
+ from unstructured_ingest.connector.notion.interfaces import DBCellBase, DBPropertyBase
8
+
9
+
10
+ @dataclass
11
+ class PhoneNumber(DBPropertyBase):
12
+ id: str
13
+ name: str
14
+ type: str = "phone_number"
15
+ phone_number: dict = field(default_factory=dict)
16
+
17
+ @classmethod
18
+ def from_dict(cls, data: dict):
19
+ return cls(**data)
20
+
21
+
22
+ @dataclass
23
+ class PhoneNumberCell(DBCellBase):
24
+ id: str
25
+ phone_number: Optional[str]
26
+ name: Optional[str] = None
27
+ type: str = "phone_number"
28
+
29
+ @classmethod
30
+ def from_dict(cls, data: dict):
31
+ return cls(**data)
32
+
33
+ def get_html(self) -> Optional[HtmlTag]:
34
+ if phone_number := self.phone_number:
35
+ return Div([], phone_number)
36
+ return None
@@ -0,0 +1,67 @@
1
+ # https://developers.notion.com/reference/property-object#relation
2
+ from dataclasses import dataclass
3
+ from typing import Optional
4
+ from urllib.parse import unquote
5
+
6
+ from htmlBuilder.tags import Div, HtmlTag
7
+
8
+ from unstructured_ingest.connector.notion.interfaces import (
9
+ DBCellBase,
10
+ DBPropertyBase,
11
+ FromJSONMixin,
12
+ )
13
+
14
+
15
+ @dataclass
16
+ class DualProperty(FromJSONMixin):
17
+ synced_property_id: str
18
+ synced_property_name: str
19
+
20
+ @classmethod
21
+ def from_dict(cls, data: dict):
22
+ return cls(**data)
23
+
24
+
25
+ @dataclass
26
+ class RelationProp(FromJSONMixin):
27
+ database_id: str
28
+ type: str
29
+ dual_property: DualProperty
30
+
31
+ @classmethod
32
+ def from_dict(cls, data: dict):
33
+ t = data.get("type")
34
+ if t == "dual_property":
35
+ dual_property = DualProperty.from_dict(data.pop(t))
36
+ else:
37
+ raise ValueError(f"{t} type not recognized")
38
+
39
+ return cls(dual_property=dual_property, **data)
40
+
41
+
42
+ @dataclass
43
+ class Relation(DBPropertyBase):
44
+ id: str
45
+ name: str
46
+ relation: RelationProp
47
+ type: str = "relation"
48
+
49
+ @classmethod
50
+ def from_dict(cls, data: dict):
51
+ return cls(relation=RelationProp.from_dict(data.pop("relation")), **data)
52
+
53
+
54
+ @dataclass
55
+ class RelationCell(DBCellBase):
56
+ id: str
57
+ has_more: bool
58
+ relation: list
59
+ type: str = "relation"
60
+ name: Optional[str] = None
61
+
62
+ @classmethod
63
+ def from_dict(cls, data: dict):
64
+ return cls(**data)
65
+
66
+ def get_html(self) -> Optional[HtmlTag]:
67
+ return Div([], unquote(self.id))
@@ -0,0 +1,43 @@
1
+ # https://developers.notion.com/reference/property-object#rich-text
2
+ from dataclasses import dataclass, field
3
+ from typing import List, Optional
4
+
5
+ from htmlBuilder.tags import Div, HtmlTag, Span
6
+
7
+ from unstructured_ingest.connector.notion.interfaces import DBCellBase, DBPropertyBase
8
+ from unstructured_ingest.connector.notion.types.rich_text import (
9
+ RichText as RichTextType,
10
+ )
11
+
12
+
13
+ @dataclass
14
+ class RichText(DBPropertyBase):
15
+ id: str
16
+ name: str
17
+ type: str = "rich_text"
18
+ rich_text: dict = field(default_factory=dict)
19
+
20
+ @classmethod
21
+ def from_dict(cls, data: dict):
22
+ return cls(**data)
23
+
24
+
25
+ @dataclass
26
+ class RichTextCell(DBCellBase):
27
+ id: str
28
+ rich_text: List[RichTextType]
29
+ name: Optional[str] = None
30
+ type: str = "rich_text"
31
+
32
+ @classmethod
33
+ def from_dict(cls, data: dict):
34
+ return cls(
35
+ rich_text=[RichTextType.from_dict(rt) for rt in data.pop("rich_text", [])],
36
+ **data,
37
+ )
38
+
39
+ def get_html(self) -> Optional[HtmlTag]:
40
+ if not self.rich_text:
41
+ return None
42
+ spans = [Span([], rt.get_html()) for rt in self.rich_text]
43
+ return Div([], spans)
@@ -0,0 +1,56 @@
1
+ # https://developers.notion.com/reference/property-object#rollup
2
+ from dataclasses import dataclass
3
+ from typing import Optional
4
+
5
+ from htmlBuilder.tags import Div, HtmlTag, Span
6
+
7
+ from unstructured_ingest.connector.notion.interfaces import (
8
+ DBCellBase,
9
+ DBPropertyBase,
10
+ FromJSONMixin,
11
+ )
12
+
13
+
14
+ @dataclass
15
+ class RollupProp(FromJSONMixin):
16
+ function: str
17
+ relation_property_id: str
18
+ relation_property_name: str
19
+ rollup_property_id: str
20
+ rollup_property_name: str
21
+
22
+ @classmethod
23
+ def from_dict(cls, data: dict):
24
+ return cls(**data)
25
+
26
+
27
+ @dataclass
28
+ class Rollup(DBPropertyBase):
29
+ id: str
30
+ name: str
31
+ rollup: RollupProp
32
+ type: str = "rollup"
33
+
34
+ @classmethod
35
+ def from_dict(cls, data: dict):
36
+ return cls(rollup=RollupProp.from_dict(data.pop("rollup")), **data)
37
+
38
+
39
+ @dataclass
40
+ class RollupCell(DBCellBase):
41
+ id: str
42
+ rollup: dict
43
+ type: str = "rollup"
44
+ name: Optional[str] = None
45
+
46
+ @classmethod
47
+ def from_dict(cls, data: dict):
48
+ return cls(**data)
49
+
50
+ def get_html(self) -> Optional[HtmlTag]:
51
+ rollup = self.rollup
52
+ t = rollup.get("type")
53
+ v = rollup[t]
54
+ if isinstance(v, list):
55
+ return Div([], [Span([], str(x)) for x in v])
56
+ return Div([], str(v))
@@ -0,0 +1,69 @@
1
+ # https://developers.notion.com/reference/property-object#select
2
+ from dataclasses import dataclass, field
3
+ from typing import List, Optional
4
+
5
+ from htmlBuilder.attributes import Style
6
+ from htmlBuilder.tags import Div, HtmlTag
7
+
8
+ from unstructured_ingest.connector.notion.interfaces import (
9
+ DBCellBase,
10
+ DBPropertyBase,
11
+ FromJSONMixin,
12
+ )
13
+
14
+
15
+ @dataclass
16
+ class SelectOption(FromJSONMixin):
17
+ color: str
18
+ id: str
19
+ name: str
20
+ description: Optional[str] = None
21
+
22
+ @classmethod
23
+ def from_dict(cls, data: dict):
24
+ return cls(**data)
25
+
26
+
27
+ @dataclass
28
+ class SelectProp(FromJSONMixin):
29
+ options: List[SelectOption] = field(default_factory=list)
30
+
31
+ @classmethod
32
+ def from_dict(cls, data: dict):
33
+ return cls(options=[SelectOption.from_dict(o) for o in data.get("options", [])])
34
+
35
+
36
+ @dataclass
37
+ class Select(DBPropertyBase):
38
+ id: str
39
+ name: str
40
+ select: SelectProp
41
+ type: str = "select"
42
+
43
+ @classmethod
44
+ def from_dict(cls, data: dict):
45
+ return cls(select=SelectProp.from_dict(data.pop("select", {})), **data)
46
+
47
+
48
+ @dataclass
49
+ class SelectCell(DBCellBase):
50
+ id: str
51
+ select: Optional[SelectOption]
52
+ type: str = "select"
53
+ name: Optional[str] = None
54
+
55
+ @classmethod
56
+ def from_dict(cls, data: dict):
57
+ select_data = data.pop("select")
58
+ select = None
59
+ if select_data:
60
+ select = SelectOption.from_dict(select_data)
61
+ return cls(select=select, **data)
62
+
63
+ def get_html(self) -> Optional[HtmlTag]:
64
+ if select := self.select:
65
+ select_attr = []
66
+ if select.color and select.color != "default":
67
+ select_attr.append(Style(f"color: {select.color}"))
68
+ return Div(select_attr, select.name)
69
+ return None
@@ -0,0 +1,81 @@
1
+ # https://developers.notion.com/reference/property-object#status
2
+ from dataclasses import dataclass, field
3
+ from typing import List, Optional
4
+
5
+ from htmlBuilder.attributes import Style
6
+ from htmlBuilder.tags import Div, HtmlTag
7
+
8
+ from unstructured_ingest.connector.notion.interfaces import (
9
+ DBCellBase,
10
+ DBPropertyBase,
11
+ FromJSONMixin,
12
+ )
13
+
14
+
15
+ @dataclass
16
+ class StatusOption(FromJSONMixin):
17
+ color: str
18
+ id: str
19
+ name: str
20
+ description: Optional[str] = None
21
+
22
+ @classmethod
23
+ def from_dict(cls, data: dict):
24
+ return cls(**data)
25
+
26
+
27
+ @dataclass
28
+ class StatusGroup(FromJSONMixin):
29
+ color: str
30
+ id: str
31
+ name: str
32
+ option_ids: List[str] = field(default_factory=List[str])
33
+
34
+ @classmethod
35
+ def from_dict(cls, data: dict):
36
+ return cls(**data)
37
+
38
+
39
+ @dataclass
40
+ class StatusProp(FromJSONMixin):
41
+ options: List[StatusOption] = field(default_factory=list)
42
+ groups: List[StatusGroup] = field(default_factory=list)
43
+
44
+ @classmethod
45
+ def from_dict(cls, data: dict):
46
+ return cls(
47
+ options=[StatusOption.from_dict(o) for o in data.get("options", [])],
48
+ groups=[StatusGroup.from_dict(g) for g in data.get("groups", [])],
49
+ )
50
+
51
+
52
+ @dataclass
53
+ class Status(DBPropertyBase):
54
+ id: str
55
+ name: str
56
+ status: StatusProp
57
+ type: str = "status"
58
+
59
+ @classmethod
60
+ def from_dict(cls, data: dict):
61
+ return cls(status=StatusProp.from_dict(data.pop("status", {})), **data)
62
+
63
+
64
+ @dataclass
65
+ class StatusCell(DBCellBase):
66
+ id: str
67
+ status: Optional[StatusOption]
68
+ type: str = "status"
69
+ name: Optional[str] = None
70
+
71
+ @classmethod
72
+ def from_dict(cls, data: dict):
73
+ return cls(status=StatusOption.from_dict(data.pop("status", {})), **data)
74
+
75
+ def get_html(self) -> Optional[HtmlTag]:
76
+ if status := self.status:
77
+ select_attr = []
78
+ if status.color and status.color != "default":
79
+ select_attr.append(Style(f"color: {status.color}"))
80
+ return Div(select_attr, status.name)
81
+ return None