unstructured-ingest 0.6.4__py3-none-any.whl → 0.7.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of unstructured-ingest might be problematic. Click here for more details.

Files changed (568) hide show
  1. examples/airtable.py +44 -0
  2. examples/azure_cognitive_search.py +55 -0
  3. examples/chroma.py +54 -0
  4. examples/couchbase.py +55 -0
  5. examples/databricks_volumes_dest.py +55 -0
  6. examples/databricks_volumes_source.py +53 -0
  7. examples/delta_table.py +45 -0
  8. examples/discord_example.py +36 -0
  9. examples/elasticsearch.py +49 -0
  10. examples/google_drive.py +45 -0
  11. examples/kdbai.py +54 -0
  12. examples/local.py +36 -0
  13. examples/milvus.py +44 -0
  14. examples/mongodb.py +53 -0
  15. examples/opensearch.py +50 -0
  16. examples/pinecone.py +57 -0
  17. examples/s3.py +38 -0
  18. examples/salesforce.py +44 -0
  19. examples/sharepoint.py +47 -0
  20. examples/singlestore.py +49 -0
  21. examples/sql.py +90 -0
  22. examples/vectara.py +54 -0
  23. examples/weaviate.py +44 -0
  24. test/integration/chunkers/test_chunkers.py +1 -1
  25. test/integration/connectors/conftest.py +1 -1
  26. test/integration/connectors/databricks/test_volumes_native.py +3 -3
  27. test/integration/connectors/discord/test_discord.py +1 -1
  28. test/integration/connectors/duckdb/test_duckdb.py +2 -2
  29. test/integration/connectors/duckdb/test_motherduck.py +2 -2
  30. test/integration/connectors/elasticsearch/test_elasticsearch.py +2 -2
  31. test/integration/connectors/elasticsearch/test_opensearch.py +2 -2
  32. test/integration/connectors/sql/test_databricks_delta_tables.py +3 -3
  33. test/integration/connectors/sql/test_postgres.py +2 -2
  34. test/integration/connectors/sql/test_singlestore.py +2 -2
  35. test/integration/connectors/sql/test_snowflake.py +2 -2
  36. test/integration/connectors/sql/test_sqlite.py +2 -2
  37. test/integration/connectors/sql/test_vastdb.py +1 -1
  38. test/integration/connectors/test_astradb.py +2 -2
  39. test/integration/connectors/test_azure_ai_search.py +2 -2
  40. test/integration/connectors/test_chroma.py +2 -2
  41. test/integration/connectors/test_confluence.py +1 -1
  42. test/integration/connectors/test_delta_table.py +2 -2
  43. test/integration/connectors/test_dropbox.py +2 -2
  44. test/integration/connectors/test_github.py +1 -1
  45. test/integration/connectors/test_google_drive.py +2 -2
  46. test/integration/connectors/test_jira.py +1 -1
  47. test/integration/connectors/test_lancedb.py +7 -7
  48. test/integration/connectors/test_milvus.py +2 -2
  49. test/integration/connectors/test_mongodb.py +2 -2
  50. test/integration/connectors/test_neo4j.py +7 -7
  51. test/integration/connectors/test_notion.py +2 -2
  52. test/integration/connectors/test_onedrive.py +2 -2
  53. test/integration/connectors/test_pinecone.py +3 -3
  54. test/integration/connectors/test_qdrant.py +6 -6
  55. test/integration/connectors/test_redis.py +3 -3
  56. test/integration/connectors/test_s3.py +3 -3
  57. test/integration/connectors/test_sharepoint.py +1 -1
  58. test/integration/connectors/test_vectara.py +4 -4
  59. test/integration/connectors/test_zendesk.py +2 -2
  60. test/integration/connectors/utils/validation/destination.py +2 -2
  61. test/integration/connectors/utils/validation/source.py +2 -2
  62. test/integration/connectors/weaviate/test_cloud.py +1 -1
  63. test/integration/connectors/weaviate/test_local.py +2 -2
  64. test/integration/embedders/test_azure_openai.py +1 -1
  65. test/integration/embedders/test_bedrock.py +2 -2
  66. test/integration/embedders/test_huggingface.py +1 -1
  67. test/integration/embedders/test_mixedbread.py +1 -1
  68. test/integration/embedders/test_octoai.py +2 -2
  69. test/integration/embedders/test_openai.py +2 -2
  70. test/integration/embedders/test_togetherai.py +2 -2
  71. test/integration/embedders/test_vertexai.py +1 -1
  72. test/integration/embedders/test_voyageai.py +1 -1
  73. test/integration/partitioners/test_partitioner.py +2 -2
  74. test/unit/{v2/chunkers → chunkers}/test_chunkers.py +1 -1
  75. test/unit/{v2/connectors → connectors}/ibm_watsonx/test_ibm_watsonx_s3.py +6 -6
  76. test/unit/{v2/connectors → connectors}/motherduck/test_base.py +5 -5
  77. test/unit/{v2/connectors → connectors}/sql/test_sql.py +4 -4
  78. test/unit/{v2/connectors → connectors}/test_confluence.py +1 -1
  79. test/unit/{v2/connectors → connectors}/test_jira.py +1 -1
  80. test/unit/{v2/embedders → embedders}/test_huggingface.py +1 -1
  81. test/unit/{v2/embedders → embedders}/test_vertexai.py +1 -1
  82. test/unit/{v2/partitioners → partitioners}/test_partitioner.py +2 -2
  83. test/unit/test_html.py +1 -1
  84. test/unit/{v2/test_interfaces.py → test_interfaces.py} +1 -1
  85. test/unit/test_utils.py +106 -97
  86. unstructured_ingest/__version__.py +1 -1
  87. unstructured_ingest/cli/__init__.py +0 -14
  88. unstructured_ingest/cli/base/__init__.py +4 -0
  89. unstructured_ingest/cli/base/cmd.py +259 -9
  90. unstructured_ingest/cli/base/dest.py +58 -61
  91. unstructured_ingest/cli/base/src.py +54 -36
  92. unstructured_ingest/cli/cli.py +4 -17
  93. unstructured_ingest/{v2/cli → cli}/cmds.py +2 -2
  94. unstructured_ingest/{v2/cli → cli}/utils/model_conversion.py +6 -6
  95. unstructured_ingest/{v2/types → data_types}/file_data.py +1 -1
  96. unstructured_ingest/embed/bedrock.py +3 -3
  97. unstructured_ingest/embed/octoai.py +3 -3
  98. unstructured_ingest/embed/openai.py +3 -3
  99. unstructured_ingest/embed/togetherai.py +4 -4
  100. unstructured_ingest/embed/vertexai.py +1 -1
  101. unstructured_ingest/embed/voyageai.py +4 -4
  102. unstructured_ingest/{v2/interfaces → interfaces}/downloader.py +3 -3
  103. unstructured_ingest/{v2/interfaces → interfaces}/indexer.py +3 -3
  104. unstructured_ingest/{v2/interfaces → interfaces}/upload_stager.py +2 -2
  105. unstructured_ingest/{v2/interfaces → interfaces}/uploader.py +2 -2
  106. unstructured_ingest/{v2/otel.py → otel.py} +1 -1
  107. unstructured_ingest/pipeline/__init__.py +0 -22
  108. unstructured_ingest/pipeline/interfaces.py +179 -238
  109. unstructured_ingest/{v2/pipeline → pipeline}/otel.py +2 -2
  110. unstructured_ingest/pipeline/pipeline.py +388 -97
  111. unstructured_ingest/{v2/pipeline → pipeline}/steps/chunk.py +5 -5
  112. unstructured_ingest/{v2/pipeline → pipeline}/steps/download.py +5 -5
  113. unstructured_ingest/{v2/pipeline → pipeline}/steps/embed.py +5 -5
  114. unstructured_ingest/{v2/pipeline → pipeline}/steps/filter.py +4 -4
  115. unstructured_ingest/{v2/pipeline → pipeline}/steps/index.py +5 -5
  116. unstructured_ingest/{v2/pipeline → pipeline}/steps/partition.py +5 -5
  117. unstructured_ingest/{v2/pipeline → pipeline}/steps/stage.py +5 -5
  118. unstructured_ingest/{v2/pipeline → pipeline}/steps/uncompress.py +4 -4
  119. unstructured_ingest/{v2/pipeline → pipeline}/steps/upload.py +5 -5
  120. unstructured_ingest/{v2/processes → processes}/chunker.py +3 -3
  121. unstructured_ingest/{v2/processes → processes}/connector_registry.py +1 -1
  122. unstructured_ingest/{v2/processes → processes}/connectors/__init__.py +11 -11
  123. unstructured_ingest/{v2/processes → processes}/connectors/airtable.py +4 -4
  124. unstructured_ingest/{v2/processes → processes}/connectors/astradb.py +15 -15
  125. unstructured_ingest/{v2/processes → processes}/connectors/azure_ai_search.py +8 -9
  126. unstructured_ingest/{v2/processes → processes}/connectors/chroma.py +10 -7
  127. unstructured_ingest/{v2/processes → processes}/connectors/confluence.py +11 -11
  128. unstructured_ingest/{v2/processes → processes}/connectors/couchbase.py +12 -12
  129. unstructured_ingest/{v2/processes → processes}/connectors/databricks/__init__.py +1 -1
  130. unstructured_ingest/{v2/processes → processes}/connectors/databricks/volumes.py +9 -9
  131. unstructured_ingest/{v2/processes → processes}/connectors/databricks/volumes_aws.py +3 -3
  132. unstructured_ingest/{v2/processes → processes}/connectors/databricks/volumes_azure.py +3 -3
  133. unstructured_ingest/{v2/processes → processes}/connectors/databricks/volumes_gcp.py +3 -3
  134. unstructured_ingest/{v2/processes → processes}/connectors/databricks/volumes_native.py +3 -3
  135. unstructured_ingest/{v2/processes → processes}/connectors/databricks/volumes_table.py +8 -9
  136. unstructured_ingest/{v2/processes → processes}/connectors/delta_table.py +7 -7
  137. unstructured_ingest/{v2/processes → processes}/connectors/discord.py +9 -9
  138. unstructured_ingest/{v2/processes → processes}/connectors/duckdb/__init__.py +1 -1
  139. unstructured_ingest/{v2/processes → processes}/connectors/duckdb/base.py +3 -4
  140. unstructured_ingest/{v2/processes → processes}/connectors/duckdb/duckdb.py +7 -7
  141. unstructured_ingest/{v2/processes → processes}/connectors/duckdb/motherduck.py +7 -7
  142. unstructured_ingest/{v2/processes → processes}/connectors/elasticsearch/__init__.py +1 -1
  143. unstructured_ingest/{v2/processes → processes}/connectors/elasticsearch/elasticsearch.py +17 -17
  144. unstructured_ingest/{v2/processes → processes}/connectors/elasticsearch/opensearch.py +5 -5
  145. unstructured_ingest/{v2/processes → processes}/connectors/fsspec/__init__.py +1 -1
  146. unstructured_ingest/{v2/processes → processes}/connectors/fsspec/azure.py +8 -8
  147. unstructured_ingest/{v2/processes → processes}/connectors/fsspec/box.py +8 -8
  148. unstructured_ingest/{v2/processes → processes}/connectors/fsspec/dropbox.py +8 -8
  149. unstructured_ingest/{v2/processes → processes}/connectors/fsspec/fsspec.py +8 -8
  150. unstructured_ingest/{v2/processes → processes}/connectors/fsspec/gcs.py +8 -8
  151. unstructured_ingest/{v2/processes → processes}/connectors/fsspec/s3.py +9 -9
  152. unstructured_ingest/{v2/processes → processes}/connectors/fsspec/sftp.py +5 -5
  153. unstructured_ingest/{v2/processes → processes}/connectors/github.py +10 -10
  154. unstructured_ingest/{v2/processes → processes}/connectors/gitlab.py +9 -9
  155. unstructured_ingest/{v2/processes → processes}/connectors/google_drive.py +22 -13
  156. unstructured_ingest/{v2/processes → processes}/connectors/ibm_watsonx/__init__.py +1 -1
  157. unstructured_ingest/{v2/processes → processes}/connectors/ibm_watsonx/ibm_watsonx_s3.py +9 -9
  158. unstructured_ingest/{v2/processes → processes}/connectors/jira.py +9 -9
  159. unstructured_ingest/{v2/processes → processes}/connectors/kafka/__init__.py +1 -1
  160. unstructured_ingest/{v2/processes → processes}/connectors/kafka/cloud.py +3 -3
  161. unstructured_ingest/{v2/processes → processes}/connectors/kafka/kafka.py +9 -9
  162. unstructured_ingest/{v2/processes → processes}/connectors/kafka/local.py +2 -2
  163. unstructured_ingest/{v2/processes → processes}/connectors/kdbai.py +11 -7
  164. unstructured_ingest/{v2/processes → processes}/connectors/lancedb/__init__.py +1 -1
  165. unstructured_ingest/{v2/processes → processes}/connectors/lancedb/aws.py +3 -3
  166. unstructured_ingest/{v2/processes → processes}/connectors/lancedb/azure.py +3 -3
  167. unstructured_ingest/{v2/processes → processes}/connectors/lancedb/cloud.py +3 -3
  168. unstructured_ingest/{v2/processes → processes}/connectors/lancedb/gcp.py +3 -3
  169. unstructured_ingest/{v2/processes → processes}/connectors/lancedb/lancedb.py +6 -6
  170. unstructured_ingest/{v2/processes → processes}/connectors/lancedb/local.py +3 -3
  171. unstructured_ingest/{v2/processes → processes}/connectors/local.py +9 -9
  172. unstructured_ingest/{v2/processes → processes}/connectors/milvus.py +7 -7
  173. unstructured_ingest/{v2/processes → processes}/connectors/mongodb.py +13 -13
  174. unstructured_ingest/{v2/processes → processes}/connectors/neo4j.py +7 -7
  175. unstructured_ingest/{v2/processes → processes}/connectors/notion/client.py +6 -6
  176. unstructured_ingest/{v2/processes → processes}/connectors/notion/connector.py +15 -15
  177. unstructured_ingest/{v2/processes → processes}/connectors/notion/helpers.py +4 -4
  178. unstructured_ingest/{ingest_backoff → processes/connectors/notion/ingest_backoff}/_wrapper.py +5 -1
  179. unstructured_ingest/processes/connectors/notion/ingest_backoff/types.py +24 -0
  180. unstructured_ingest/{connector → processes/connectors}/notion/types/block.py +4 -4
  181. unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/bookmark.py +2 -2
  182. unstructured_ingest/{connector → processes/connectors}/notion/types/blocks/breadcrumb.py +1 -1
  183. unstructured_ingest/{connector → processes/connectors}/notion/types/blocks/bulleted_list_item.py +2 -2
  184. unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/callout.py +2 -2
  185. unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/child_database.py +1 -1
  186. unstructured_ingest/{connector → processes/connectors}/notion/types/blocks/child_page.py +1 -1
  187. unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/code.py +2 -2
  188. unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/column_list.py +1 -1
  189. unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/divider.py +1 -1
  190. unstructured_ingest/{connector → processes/connectors}/notion/types/blocks/embed.py +2 -2
  191. unstructured_ingest/{connector → processes/connectors}/notion/types/blocks/equation.py +1 -1
  192. unstructured_ingest/{connector → processes/connectors}/notion/types/blocks/file.py +4 -4
  193. unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/heading.py +2 -2
  194. unstructured_ingest/{connector → processes/connectors}/notion/types/blocks/image.py +2 -2
  195. unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/link_preview.py +1 -1
  196. unstructured_ingest/{connector → processes/connectors}/notion/types/blocks/link_to_page.py +1 -1
  197. unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/numbered_list.py +2 -2
  198. unstructured_ingest/{connector → processes/connectors}/notion/types/blocks/paragraph.py +2 -2
  199. unstructured_ingest/{connector → processes/connectors}/notion/types/blocks/pdf.py +3 -3
  200. unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/quote.py +2 -2
  201. unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/synced_block.py +1 -1
  202. unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/table.py +2 -5
  203. unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/table_of_contents.py +1 -1
  204. unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/template.py +2 -2
  205. unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/todo.py +2 -2
  206. unstructured_ingest/{connector → processes/connectors}/notion/types/blocks/toggle.py +2 -2
  207. unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/unsupported.py +1 -1
  208. unstructured_ingest/{connector → processes/connectors}/notion/types/blocks/video.py +2 -2
  209. unstructured_ingest/{connector → processes/connectors}/notion/types/database.py +6 -6
  210. unstructured_ingest/{v2/processes → processes}/connectors/notion/types/database_properties/__init__.py +1 -1
  211. unstructured_ingest/{v2/processes → processes}/connectors/notion/types/database_properties/checkbox.py +1 -1
  212. unstructured_ingest/{connector → processes/connectors}/notion/types/database_properties/created_by.py +2 -2
  213. unstructured_ingest/{connector → processes/connectors}/notion/types/database_properties/created_time.py +1 -1
  214. unstructured_ingest/{v2/processes → processes}/connectors/notion/types/database_properties/date.py +2 -2
  215. unstructured_ingest/{connector → processes/connectors}/notion/types/database_properties/email.py +1 -1
  216. unstructured_ingest/{connector → processes/connectors}/notion/types/database_properties/files.py +2 -2
  217. unstructured_ingest/{v2/processes → processes}/connectors/notion/types/database_properties/formula.py +1 -1
  218. unstructured_ingest/{connector → processes/connectors}/notion/types/database_properties/last_edited_by.py +2 -2
  219. unstructured_ingest/{v2/processes → processes}/connectors/notion/types/database_properties/last_edited_time.py +1 -1
  220. unstructured_ingest/{v2/processes → processes}/connectors/notion/types/database_properties/multiselect.py +1 -1
  221. unstructured_ingest/{connector → processes/connectors}/notion/types/database_properties/number.py +1 -1
  222. unstructured_ingest/{v2/processes → processes}/connectors/notion/types/database_properties/people.py +2 -2
  223. unstructured_ingest/{v2/processes → processes}/connectors/notion/types/database_properties/phone_number.py +1 -1
  224. unstructured_ingest/{v2/processes → processes}/connectors/notion/types/database_properties/relation.py +1 -1
  225. unstructured_ingest/{connector → processes/connectors}/notion/types/database_properties/rich_text.py +2 -2
  226. unstructured_ingest/{connector → processes/connectors}/notion/types/database_properties/rollup.py +1 -1
  227. unstructured_ingest/{v2/processes → processes}/connectors/notion/types/database_properties/select.py +1 -1
  228. unstructured_ingest/{v2/processes → processes}/connectors/notion/types/database_properties/status.py +1 -1
  229. unstructured_ingest/{connector → processes/connectors}/notion/types/database_properties/title.py +2 -2
  230. unstructured_ingest/{v2/processes → processes}/connectors/notion/types/database_properties/unique_id.py +1 -1
  231. unstructured_ingest/{connector → processes/connectors}/notion/types/database_properties/url.py +1 -1
  232. unstructured_ingest/{connector → processes/connectors}/notion/types/database_properties/verification.py +3 -3
  233. unstructured_ingest/{v2/processes → processes}/connectors/notion/types/date.py +1 -1
  234. unstructured_ingest/{v2/processes → processes}/connectors/notion/types/file.py +1 -1
  235. unstructured_ingest/{connector → processes/connectors}/notion/types/page.py +4 -4
  236. unstructured_ingest/{connector → processes/connectors}/notion/types/parent.py +1 -1
  237. unstructured_ingest/{connector → processes/connectors}/notion/types/rich_text.py +3 -3
  238. unstructured_ingest/{v2/processes → processes}/connectors/notion/types/user.py +1 -1
  239. unstructured_ingest/{v2/processes → processes}/connectors/onedrive.py +55 -27
  240. unstructured_ingest/{v2/processes → processes}/connectors/outlook.py +9 -9
  241. unstructured_ingest/{v2/processes → processes}/connectors/pinecone.py +12 -9
  242. unstructured_ingest/{v2/processes → processes}/connectors/qdrant/__init__.py +1 -1
  243. unstructured_ingest/{v2/processes → processes}/connectors/qdrant/cloud.py +2 -2
  244. unstructured_ingest/{v2/processes → processes}/connectors/qdrant/local.py +2 -2
  245. unstructured_ingest/{v2/processes → processes}/connectors/qdrant/qdrant.py +9 -6
  246. unstructured_ingest/{v2/processes → processes}/connectors/qdrant/server.py +2 -2
  247. unstructured_ingest/{v2/processes → processes}/connectors/redisdb.py +6 -6
  248. unstructured_ingest/{v2/processes → processes}/connectors/salesforce.py +9 -9
  249. unstructured_ingest/{v2/processes → processes}/connectors/sharepoint.py +8 -8
  250. unstructured_ingest/{v2/processes → processes}/connectors/slack.py +9 -9
  251. unstructured_ingest/{v2/processes → processes}/connectors/sql/__init__.py +1 -1
  252. unstructured_ingest/{v2/processes → processes}/connectors/sql/databricks_delta_tables.py +7 -7
  253. unstructured_ingest/{v2/processes → processes}/connectors/sql/postgres.py +4 -4
  254. unstructured_ingest/{v2/processes → processes}/connectors/sql/singlestore.py +4 -4
  255. unstructured_ingest/{v2/processes → processes}/connectors/sql/snowflake.py +6 -6
  256. unstructured_ingest/{v2/processes → processes}/connectors/sql/sql.py +16 -11
  257. unstructured_ingest/{v2/processes → processes}/connectors/sql/sqlite.py +4 -4
  258. unstructured_ingest/{v2/processes → processes}/connectors/sql/vastdb.py +9 -10
  259. unstructured_ingest/{v2/processes → processes}/connectors/vectara.py +6 -6
  260. unstructured_ingest/{v2/processes → processes}/connectors/weaviate/__init__.py +1 -1
  261. unstructured_ingest/{v2/processes → processes}/connectors/weaviate/cloud.py +3 -3
  262. unstructured_ingest/{v2/processes → processes}/connectors/weaviate/embedded.py +3 -3
  263. unstructured_ingest/{v2/processes → processes}/connectors/weaviate/local.py +3 -3
  264. unstructured_ingest/{v2/processes → processes}/connectors/weaviate/weaviate.py +5 -5
  265. unstructured_ingest/{v2/processes → processes}/connectors/zendesk/client.py +2 -2
  266. unstructured_ingest/{v2/processes → processes}/connectors/zendesk/zendesk.py +10 -10
  267. unstructured_ingest/{v2/processes → processes}/embedder.py +1 -1
  268. unstructured_ingest/{v2/processes → processes}/filter.py +4 -4
  269. unstructured_ingest/{v2/processes → processes}/partitioner.py +6 -6
  270. unstructured_ingest/{v2/processes → processes}/uncompress.py +3 -3
  271. unstructured_ingest/{v2/processes → processes}/utils/blob_storage.py +2 -2
  272. unstructured_ingest/{v2/unstructured_api.py → unstructured_api.py} +2 -2
  273. unstructured_ingest/utils/compression.py +1 -48
  274. unstructured_ingest/utils/data_prep.py +9 -1
  275. unstructured_ingest/utils/html.py +3 -3
  276. unstructured_ingest/{v2/utils.py → utils/pydantic_models.py} +0 -9
  277. unstructured_ingest/utils/string_and_date_utils.py +1 -1
  278. {unstructured_ingest-0.6.4.dist-info → unstructured_ingest-0.7.1.dist-info}/METADATA +98 -97
  279. unstructured_ingest-0.7.1.dist-info/RECORD +370 -0
  280. {unstructured_ingest-0.6.4.dist-info → unstructured_ingest-0.7.1.dist-info}/top_level.txt +1 -0
  281. test/unit/v2/test_utils.py +0 -82
  282. unstructured_ingest/cli/cmd_factory.py +0 -12
  283. unstructured_ingest/cli/cmds/__init__.py +0 -145
  284. unstructured_ingest/cli/cmds/airtable.py +0 -69
  285. unstructured_ingest/cli/cmds/astradb.py +0 -99
  286. unstructured_ingest/cli/cmds/azure_ai_search.py +0 -65
  287. unstructured_ingest/cli/cmds/biomed.py +0 -52
  288. unstructured_ingest/cli/cmds/chroma.py +0 -104
  289. unstructured_ingest/cli/cmds/clarifai.py +0 -71
  290. unstructured_ingest/cli/cmds/confluence.py +0 -69
  291. unstructured_ingest/cli/cmds/databricks_volumes.py +0 -163
  292. unstructured_ingest/cli/cmds/delta_table.py +0 -94
  293. unstructured_ingest/cli/cmds/discord.py +0 -47
  294. unstructured_ingest/cli/cmds/elasticsearch.py +0 -133
  295. unstructured_ingest/cli/cmds/fsspec/azure.py +0 -94
  296. unstructured_ingest/cli/cmds/fsspec/box.py +0 -48
  297. unstructured_ingest/cli/cmds/fsspec/dropbox.py +0 -51
  298. unstructured_ingest/cli/cmds/fsspec/fsspec.py +0 -15
  299. unstructured_ingest/cli/cmds/fsspec/gcs.py +0 -71
  300. unstructured_ingest/cli/cmds/fsspec/s3.py +0 -74
  301. unstructured_ingest/cli/cmds/fsspec/sftp.py +0 -58
  302. unstructured_ingest/cli/cmds/github.py +0 -54
  303. unstructured_ingest/cli/cmds/gitlab.py +0 -54
  304. unstructured_ingest/cli/cmds/google_drive.py +0 -49
  305. unstructured_ingest/cli/cmds/hubspot.py +0 -70
  306. unstructured_ingest/cli/cmds/jira.py +0 -71
  307. unstructured_ingest/cli/cmds/kafka.py +0 -102
  308. unstructured_ingest/cli/cmds/local.py +0 -43
  309. unstructured_ingest/cli/cmds/mongodb.py +0 -72
  310. unstructured_ingest/cli/cmds/notion.py +0 -48
  311. unstructured_ingest/cli/cmds/onedrive.py +0 -66
  312. unstructured_ingest/cli/cmds/opensearch.py +0 -117
  313. unstructured_ingest/cli/cmds/outlook.py +0 -67
  314. unstructured_ingest/cli/cmds/pinecone.py +0 -71
  315. unstructured_ingest/cli/cmds/qdrant.py +0 -124
  316. unstructured_ingest/cli/cmds/reddit.py +0 -67
  317. unstructured_ingest/cli/cmds/salesforce.py +0 -58
  318. unstructured_ingest/cli/cmds/sharepoint.py +0 -66
  319. unstructured_ingest/cli/cmds/slack.py +0 -56
  320. unstructured_ingest/cli/cmds/sql.py +0 -66
  321. unstructured_ingest/cli/cmds/vectara.py +0 -66
  322. unstructured_ingest/cli/cmds/weaviate.py +0 -98
  323. unstructured_ingest/cli/cmds/wikipedia.py +0 -40
  324. unstructured_ingest/cli/common.py +0 -7
  325. unstructured_ingest/cli/interfaces.py +0 -663
  326. unstructured_ingest/cli/utils.py +0 -205
  327. unstructured_ingest/connector/airtable.py +0 -309
  328. unstructured_ingest/connector/astradb.py +0 -267
  329. unstructured_ingest/connector/azure_ai_search.py +0 -144
  330. unstructured_ingest/connector/biomed.py +0 -320
  331. unstructured_ingest/connector/chroma.py +0 -158
  332. unstructured_ingest/connector/clarifai.py +0 -122
  333. unstructured_ingest/connector/confluence.py +0 -285
  334. unstructured_ingest/connector/databricks_volumes.py +0 -137
  335. unstructured_ingest/connector/delta_table.py +0 -203
  336. unstructured_ingest/connector/discord.py +0 -180
  337. unstructured_ingest/connector/elasticsearch.py +0 -396
  338. unstructured_ingest/connector/fsspec/azure.py +0 -78
  339. unstructured_ingest/connector/fsspec/box.py +0 -109
  340. unstructured_ingest/connector/fsspec/dropbox.py +0 -160
  341. unstructured_ingest/connector/fsspec/fsspec.py +0 -359
  342. unstructured_ingest/connector/fsspec/gcs.py +0 -82
  343. unstructured_ingest/connector/fsspec/s3.py +0 -62
  344. unstructured_ingest/connector/fsspec/sftp.py +0 -81
  345. unstructured_ingest/connector/git.py +0 -124
  346. unstructured_ingest/connector/github.py +0 -174
  347. unstructured_ingest/connector/gitlab.py +0 -142
  348. unstructured_ingest/connector/google_drive.py +0 -348
  349. unstructured_ingest/connector/hubspot.py +0 -278
  350. unstructured_ingest/connector/jira.py +0 -469
  351. unstructured_ingest/connector/kafka.py +0 -293
  352. unstructured_ingest/connector/local.py +0 -139
  353. unstructured_ingest/connector/mongodb.py +0 -284
  354. unstructured_ingest/connector/notion/client.py +0 -248
  355. unstructured_ingest/connector/notion/connector.py +0 -469
  356. unstructured_ingest/connector/notion/helpers.py +0 -584
  357. unstructured_ingest/connector/notion/types/blocks/bookmark.py +0 -40
  358. unstructured_ingest/connector/notion/types/blocks/callout.py +0 -94
  359. unstructured_ingest/connector/notion/types/blocks/child_database.py +0 -23
  360. unstructured_ingest/connector/notion/types/blocks/code.py +0 -43
  361. unstructured_ingest/connector/notion/types/blocks/column_list.py +0 -35
  362. unstructured_ingest/connector/notion/types/blocks/divider.py +0 -22
  363. unstructured_ingest/connector/notion/types/blocks/heading.py +0 -37
  364. unstructured_ingest/connector/notion/types/blocks/link_preview.py +0 -24
  365. unstructured_ingest/connector/notion/types/blocks/numbered_list.py +0 -29
  366. unstructured_ingest/connector/notion/types/blocks/quote.py +0 -37
  367. unstructured_ingest/connector/notion/types/blocks/synced_block.py +0 -57
  368. unstructured_ingest/connector/notion/types/blocks/table.py +0 -63
  369. unstructured_ingest/connector/notion/types/blocks/table_of_contents.py +0 -23
  370. unstructured_ingest/connector/notion/types/blocks/template.py +0 -30
  371. unstructured_ingest/connector/notion/types/blocks/todo.py +0 -42
  372. unstructured_ingest/connector/notion/types/blocks/unsupported.py +0 -20
  373. unstructured_ingest/connector/notion/types/database_properties/__init__.py +0 -106
  374. unstructured_ingest/connector/notion/types/database_properties/checkbox.py +0 -38
  375. unstructured_ingest/connector/notion/types/database_properties/date.py +0 -41
  376. unstructured_ingest/connector/notion/types/database_properties/formula.py +0 -49
  377. unstructured_ingest/connector/notion/types/database_properties/last_edited_time.py +0 -34
  378. unstructured_ingest/connector/notion/types/database_properties/multiselect.py +0 -73
  379. unstructured_ingest/connector/notion/types/database_properties/people.py +0 -40
  380. unstructured_ingest/connector/notion/types/database_properties/phone_number.py +0 -36
  381. unstructured_ingest/connector/notion/types/database_properties/relation.py +0 -67
  382. unstructured_ingest/connector/notion/types/database_properties/select.py +0 -68
  383. unstructured_ingest/connector/notion/types/database_properties/status.py +0 -80
  384. unstructured_ingest/connector/notion/types/database_properties/unique_id.py +0 -50
  385. unstructured_ingest/connector/notion/types/date.py +0 -26
  386. unstructured_ingest/connector/notion/types/file.py +0 -51
  387. unstructured_ingest/connector/notion/types/user.py +0 -76
  388. unstructured_ingest/connector/onedrive.py +0 -232
  389. unstructured_ingest/connector/opensearch.py +0 -218
  390. unstructured_ingest/connector/outlook.py +0 -285
  391. unstructured_ingest/connector/pinecone.py +0 -150
  392. unstructured_ingest/connector/qdrant.py +0 -144
  393. unstructured_ingest/connector/reddit.py +0 -166
  394. unstructured_ingest/connector/registry.py +0 -109
  395. unstructured_ingest/connector/salesforce.py +0 -301
  396. unstructured_ingest/connector/sharepoint.py +0 -573
  397. unstructured_ingest/connector/slack.py +0 -224
  398. unstructured_ingest/connector/sql.py +0 -199
  399. unstructured_ingest/connector/vectara.py +0 -253
  400. unstructured_ingest/connector/weaviate.py +0 -190
  401. unstructured_ingest/connector/wikipedia.py +0 -208
  402. unstructured_ingest/enhanced_dataclass/__init__.py +0 -4
  403. unstructured_ingest/enhanced_dataclass/core.py +0 -99
  404. unstructured_ingest/enhanced_dataclass/dataclasses.py +0 -54
  405. unstructured_ingest/enhanced_dataclass/json_mixin.py +0 -125
  406. unstructured_ingest/interfaces.py +0 -852
  407. unstructured_ingest/pipeline/copy.py +0 -19
  408. unstructured_ingest/pipeline/doc_factory.py +0 -12
  409. unstructured_ingest/pipeline/partition.py +0 -60
  410. unstructured_ingest/pipeline/permissions.py +0 -12
  411. unstructured_ingest/pipeline/reformat/chunking.py +0 -134
  412. unstructured_ingest/pipeline/reformat/embedding.py +0 -64
  413. unstructured_ingest/pipeline/source.py +0 -77
  414. unstructured_ingest/pipeline/utils.py +0 -6
  415. unstructured_ingest/pipeline/write.py +0 -18
  416. unstructured_ingest/processor.py +0 -93
  417. unstructured_ingest/runner/__init__.py +0 -104
  418. unstructured_ingest/runner/airtable.py +0 -35
  419. unstructured_ingest/runner/astradb.py +0 -34
  420. unstructured_ingest/runner/base_runner.py +0 -89
  421. unstructured_ingest/runner/biomed.py +0 -45
  422. unstructured_ingest/runner/confluence.py +0 -35
  423. unstructured_ingest/runner/delta_table.py +0 -34
  424. unstructured_ingest/runner/discord.py +0 -35
  425. unstructured_ingest/runner/elasticsearch.py +0 -40
  426. unstructured_ingest/runner/fsspec/azure.py +0 -30
  427. unstructured_ingest/runner/fsspec/box.py +0 -28
  428. unstructured_ingest/runner/fsspec/dropbox.py +0 -30
  429. unstructured_ingest/runner/fsspec/fsspec.py +0 -40
  430. unstructured_ingest/runner/fsspec/gcs.py +0 -28
  431. unstructured_ingest/runner/fsspec/s3.py +0 -28
  432. unstructured_ingest/runner/fsspec/sftp.py +0 -28
  433. unstructured_ingest/runner/github.py +0 -37
  434. unstructured_ingest/runner/gitlab.py +0 -37
  435. unstructured_ingest/runner/google_drive.py +0 -35
  436. unstructured_ingest/runner/hubspot.py +0 -35
  437. unstructured_ingest/runner/jira.py +0 -35
  438. unstructured_ingest/runner/kafka.py +0 -34
  439. unstructured_ingest/runner/local.py +0 -23
  440. unstructured_ingest/runner/mongodb.py +0 -34
  441. unstructured_ingest/runner/notion.py +0 -61
  442. unstructured_ingest/runner/onedrive.py +0 -35
  443. unstructured_ingest/runner/opensearch.py +0 -40
  444. unstructured_ingest/runner/outlook.py +0 -33
  445. unstructured_ingest/runner/reddit.py +0 -35
  446. unstructured_ingest/runner/salesforce.py +0 -33
  447. unstructured_ingest/runner/sharepoint.py +0 -35
  448. unstructured_ingest/runner/slack.py +0 -33
  449. unstructured_ingest/runner/utils.py +0 -47
  450. unstructured_ingest/runner/wikipedia.py +0 -35
  451. unstructured_ingest/runner/writers/__init__.py +0 -48
  452. unstructured_ingest/runner/writers/astradb.py +0 -22
  453. unstructured_ingest/runner/writers/azure_ai_search.py +0 -24
  454. unstructured_ingest/runner/writers/base_writer.py +0 -26
  455. unstructured_ingest/runner/writers/chroma.py +0 -22
  456. unstructured_ingest/runner/writers/clarifai.py +0 -19
  457. unstructured_ingest/runner/writers/databricks_volumes.py +0 -25
  458. unstructured_ingest/runner/writers/delta_table.py +0 -24
  459. unstructured_ingest/runner/writers/elasticsearch.py +0 -24
  460. unstructured_ingest/runner/writers/fsspec/azure.py +0 -24
  461. unstructured_ingest/runner/writers/fsspec/box.py +0 -21
  462. unstructured_ingest/runner/writers/fsspec/dropbox.py +0 -21
  463. unstructured_ingest/runner/writers/fsspec/gcs.py +0 -19
  464. unstructured_ingest/runner/writers/fsspec/s3.py +0 -21
  465. unstructured_ingest/runner/writers/kafka.py +0 -21
  466. unstructured_ingest/runner/writers/mongodb.py +0 -21
  467. unstructured_ingest/runner/writers/opensearch.py +0 -26
  468. unstructured_ingest/runner/writers/pinecone.py +0 -21
  469. unstructured_ingest/runner/writers/qdrant.py +0 -19
  470. unstructured_ingest/runner/writers/sql.py +0 -22
  471. unstructured_ingest/runner/writers/vectara.py +0 -22
  472. unstructured_ingest/runner/writers/weaviate.py +0 -21
  473. unstructured_ingest/utils/google_filetype.py +0 -9
  474. unstructured_ingest/v2/__init__.py +0 -1
  475. unstructured_ingest/v2/cli/__init__.py +0 -0
  476. unstructured_ingest/v2/cli/base/__init__.py +0 -4
  477. unstructured_ingest/v2/cli/base/cmd.py +0 -269
  478. unstructured_ingest/v2/cli/base/dest.py +0 -85
  479. unstructured_ingest/v2/cli/base/src.py +0 -85
  480. unstructured_ingest/v2/cli/cli.py +0 -24
  481. unstructured_ingest/v2/cli/utils/__init__.py +0 -0
  482. unstructured_ingest/v2/logger.py +0 -126
  483. unstructured_ingest/v2/main.py +0 -11
  484. unstructured_ingest/v2/pipeline/__init__.py +0 -0
  485. unstructured_ingest/v2/pipeline/interfaces.py +0 -211
  486. unstructured_ingest/v2/pipeline/pipeline.py +0 -408
  487. unstructured_ingest/v2/pipeline/steps/__init__.py +0 -0
  488. unstructured_ingest/v2/processes/connectors/assets/__init__.py +0 -0
  489. unstructured_ingest/v2/processes/connectors/assets/databricks_delta_table_schema.sql +0 -10
  490. unstructured_ingest/v2/processes/connectors/assets/weaviate_collection_config.json +0 -23
  491. unstructured_ingest/v2/processes/connectors/notion/__init__.py +0 -0
  492. unstructured_ingest/v2/processes/connectors/notion/interfaces.py +0 -32
  493. unstructured_ingest/v2/processes/connectors/notion/types/__init__.py +0 -0
  494. unstructured_ingest/v2/processes/connectors/notion/types/block.py +0 -96
  495. unstructured_ingest/v2/processes/connectors/notion/types/blocks/__init__.py +0 -63
  496. unstructured_ingest/v2/processes/connectors/notion/types/blocks/breadcrumb.py +0 -21
  497. unstructured_ingest/v2/processes/connectors/notion/types/blocks/bulleted_list_item.py +0 -31
  498. unstructured_ingest/v2/processes/connectors/notion/types/blocks/child_page.py +0 -23
  499. unstructured_ingest/v2/processes/connectors/notion/types/blocks/embed.py +0 -36
  500. unstructured_ingest/v2/processes/connectors/notion/types/blocks/equation.py +0 -23
  501. unstructured_ingest/v2/processes/connectors/notion/types/blocks/file.py +0 -49
  502. unstructured_ingest/v2/processes/connectors/notion/types/blocks/image.py +0 -21
  503. unstructured_ingest/v2/processes/connectors/notion/types/blocks/link_to_page.py +0 -29
  504. unstructured_ingest/v2/processes/connectors/notion/types/blocks/paragraph.py +0 -31
  505. unstructured_ingest/v2/processes/connectors/notion/types/blocks/pdf.py +0 -49
  506. unstructured_ingest/v2/processes/connectors/notion/types/blocks/toggle.py +0 -37
  507. unstructured_ingest/v2/processes/connectors/notion/types/blocks/video.py +0 -22
  508. unstructured_ingest/v2/processes/connectors/notion/types/database.py +0 -73
  509. unstructured_ingest/v2/processes/connectors/notion/types/database_properties/created_by.py +0 -35
  510. unstructured_ingest/v2/processes/connectors/notion/types/database_properties/created_time.py +0 -34
  511. unstructured_ingest/v2/processes/connectors/notion/types/database_properties/email.py +0 -36
  512. unstructured_ingest/v2/processes/connectors/notion/types/database_properties/files.py +0 -37
  513. unstructured_ingest/v2/processes/connectors/notion/types/database_properties/last_edited_by.py +0 -34
  514. unstructured_ingest/v2/processes/connectors/notion/types/database_properties/number.py +0 -49
  515. unstructured_ingest/v2/processes/connectors/notion/types/database_properties/rich_text.py +0 -43
  516. unstructured_ingest/v2/processes/connectors/notion/types/database_properties/rollup.py +0 -56
  517. unstructured_ingest/v2/processes/connectors/notion/types/database_properties/title.py +0 -37
  518. unstructured_ingest/v2/processes/connectors/notion/types/database_properties/url.py +0 -37
  519. unstructured_ingest/v2/processes/connectors/notion/types/database_properties/verification.py +0 -78
  520. unstructured_ingest/v2/processes/connectors/notion/types/page.py +0 -45
  521. unstructured_ingest/v2/processes/connectors/notion/types/parent.py +0 -66
  522. unstructured_ingest/v2/processes/connectors/notion/types/rich_text.py +0 -189
  523. unstructured_ingest/v2/processes/connectors/zendesk/__init__.py +0 -0
  524. unstructured_ingest/v2/processes/utils/__init__.py +0 -0
  525. unstructured_ingest/v2/types/__init__.py +0 -0
  526. unstructured_ingest-0.6.4.dist-info/RECORD +0 -591
  527. {test/unit/v2 → examples}/__init__.py +0 -0
  528. /test/unit/{v2/chunkers → chunkers}/__init__.py +0 -0
  529. /test/unit/{v2/connectors → connectors}/__init__.py +0 -0
  530. /test/unit/{v2/connectors → connectors}/ibm_watsonx/__init__.py +0 -0
  531. /test/unit/{v2/connectors → connectors}/motherduck/__init__.py +0 -0
  532. /test/unit/{v2/connectors → connectors}/sql/__init__.py +0 -0
  533. /test/unit/{v2/embedders → embedders}/__init__.py +0 -0
  534. /test/unit/{v2/embedders → embedders}/test_bedrock.py +0 -0
  535. /test/unit/{v2/embedders → embedders}/test_mixedbread.py +0 -0
  536. /test/unit/{v2/embedders → embedders}/test_octoai.py +0 -0
  537. /test/unit/{v2/embedders → embedders}/test_openai.py +0 -0
  538. /test/unit/{v2/embedders → embedders}/test_togetherai.py +0 -0
  539. /test/unit/{v2/embedders → embedders}/test_voyageai.py +0 -0
  540. /test/unit/{v2/partitioners → partitioners}/__init__.py +0 -0
  541. /test/unit/{v2/utils → utils}/__init__.py +0 -0
  542. /test/unit/{v2/utils → utils}/data_generator.py +0 -0
  543. /unstructured_ingest/{v2/cli → cli}/base/importer.py +0 -0
  544. /unstructured_ingest/cli/{cmds/fsspec → utils}/__init__.py +0 -0
  545. /unstructured_ingest/{v2/cli → cli}/utils/click.py +0 -0
  546. /unstructured_ingest/{connector → data_types}/__init__.py +0 -0
  547. /unstructured_ingest/{v2/errors.py → errors_v2.py} +0 -0
  548. /unstructured_ingest/{v2/interfaces → interfaces}/__init__.py +0 -0
  549. /unstructured_ingest/{v2/interfaces → interfaces}/connector.py +0 -0
  550. /unstructured_ingest/{v2/interfaces → interfaces}/process.py +0 -0
  551. /unstructured_ingest/{v2/interfaces → interfaces}/processor.py +0 -0
  552. /unstructured_ingest/{connector/fsspec → pipeline/steps}/__init__.py +0 -0
  553. /unstructured_ingest/{v2/processes → processes}/__init__.py +0 -0
  554. /unstructured_ingest/{connector/notion → processes/connectors/assets}/__init__.py +0 -0
  555. /unstructured_ingest/{v2/processes → processes}/connectors/fsspec/utils.py +0 -0
  556. /unstructured_ingest/{connector/notion/types → processes/connectors/notion}/__init__.py +0 -0
  557. /unstructured_ingest/{ingest_backoff → processes/connectors/notion/ingest_backoff}/__init__.py +0 -0
  558. /unstructured_ingest/{ingest_backoff → processes/connectors/notion/ingest_backoff}/_common.py +0 -0
  559. /unstructured_ingest/{connector → processes/connectors}/notion/interfaces.py +0 -0
  560. /unstructured_ingest/{pipeline/reformat → processes/connectors/notion/types}/__init__.py +0 -0
  561. /unstructured_ingest/{connector → processes/connectors}/notion/types/blocks/__init__.py +0 -0
  562. /unstructured_ingest/{v2/processes → processes}/connectors/utils.py +0 -0
  563. /unstructured_ingest/{runner/fsspec → processes/connectors/zendesk}/__init__.py +0 -0
  564. /unstructured_ingest/{runner/writers/fsspec → processes/utils}/__init__.py +0 -0
  565. /unstructured_ingest/{v2 → utils}/constants.py +0 -0
  566. {unstructured_ingest-0.6.4.dist-info → unstructured_ingest-0.7.1.dist-info}/LICENSE.md +0 -0
  567. {unstructured_ingest-0.6.4.dist-info → unstructured_ingest-0.7.1.dist-info}/WHEEL +0 -0
  568. {unstructured_ingest-0.6.4.dist-info → unstructured_ingest-0.7.1.dist-info}/entry_points.txt +0 -0
@@ -1,31 +0,0 @@
1
- # https://developers.notion.com/reference/block#bulleted-list-item
2
- from dataclasses import dataclass, field
3
- from typing import List, Optional
4
-
5
- from htmlBuilder.tags import HtmlTag, Li
6
-
7
- from unstructured_ingest.connector.notion.interfaces import BlockBase
8
- from unstructured_ingest.connector.notion.types.rich_text import RichText
9
-
10
-
11
- @dataclass
12
- class BulletedListItem(BlockBase):
13
- color: str
14
- children: List[dict] = field(default_factory=list)
15
- rich_text: List[RichText] = field(default_factory=list)
16
-
17
- @staticmethod
18
- def can_have_children() -> bool:
19
- return True
20
-
21
- @classmethod
22
- def from_dict(cls, data: dict):
23
- rich_text = data.pop("rich_text", [])
24
- return cls(
25
- color=data["color"],
26
- children=data.get("children", []),
27
- rich_text=[RichText.from_dict(rt) for rt in rich_text],
28
- )
29
-
30
- def get_html(self) -> Optional[HtmlTag]:
31
- return Li([], [rt.get_html() for rt in self.rich_text])
@@ -1,23 +0,0 @@
1
- # https://developers.notion.com/reference/block#child-page
2
- from dataclasses import dataclass
3
- from typing import Optional
4
-
5
- from htmlBuilder.tags import HtmlTag, P
6
-
7
- from unstructured_ingest.connector.notion.interfaces import BlockBase, GetHTMLMixin
8
-
9
-
10
- @dataclass
11
- class ChildPage(BlockBase, GetHTMLMixin):
12
- title: str
13
-
14
- @staticmethod
15
- def can_have_children() -> bool:
16
- return True
17
-
18
- @classmethod
19
- def from_dict(cls, data: dict):
20
- return cls(**data)
21
-
22
- def get_html(self) -> Optional[HtmlTag]:
23
- return P([], self.title)
@@ -1,36 +0,0 @@
1
- # https://developers.notion.com/reference/block#embed
2
- from dataclasses import dataclass, field
3
- from typing import List, Optional
4
-
5
- from htmlBuilder.attributes import Href
6
- from htmlBuilder.tags import A, Br, Div, HtmlTag
7
-
8
- from unstructured_ingest.connector.notion.interfaces import BlockBase
9
- from unstructured_ingest.connector.notion.types.rich_text import RichText
10
-
11
-
12
- @dataclass
13
- class Embed(BlockBase):
14
- url: str
15
- caption: List[RichText] = field(default_factory=list)
16
-
17
- @staticmethod
18
- def can_have_children() -> bool:
19
- return False
20
-
21
- @classmethod
22
- def from_dict(cls, data: dict):
23
- return cls(caption=[RichText.from_dict(d) for d in data.pop("caption", [])], **data)
24
-
25
- def get_html(self) -> Optional[HtmlTag]:
26
- texts = []
27
- if self.url:
28
- texts.append(A([Href(self.url)], self.url))
29
- if self.caption:
30
- texts.append(Div([], [rt.get_html() for rt in self.caption]))
31
- if not texts:
32
- return None
33
- joined = [Br()] * (len(texts) * 2 - 1)
34
- joined[0::2] = texts
35
-
36
- return Div([], joined)
@@ -1,23 +0,0 @@
1
- # https://developers.notion.com/reference/block#equation
2
- from dataclasses import dataclass
3
- from typing import Optional
4
-
5
- from htmlBuilder.tags import Div, HtmlTag
6
-
7
- from unstructured_ingest.connector.notion.interfaces import BlockBase
8
-
9
-
10
- @dataclass
11
- class Equation(BlockBase):
12
- expression: str
13
-
14
- @staticmethod
15
- def can_have_children() -> bool:
16
- return False
17
-
18
- @classmethod
19
- def from_dict(cls, data: dict):
20
- return cls(**data)
21
-
22
- def get_html(self) -> Optional[HtmlTag]:
23
- return Div([], self.expression)
@@ -1,49 +0,0 @@
1
- # https://developers.notion.com/reference/block#file
2
- from dataclasses import dataclass, field
3
- from typing import List, Optional
4
-
5
- from htmlBuilder.attributes import Href
6
- from htmlBuilder.tags import A, Br, Div, HtmlTag
7
-
8
- from unstructured_ingest.connector.notion.interfaces import BlockBase
9
- from unstructured_ingest.connector.notion.types.file import External
10
- from unstructured_ingest.connector.notion.types.file import File as FileContent
11
- from unstructured_ingest.connector.notion.types.rich_text import RichText
12
-
13
-
14
- @dataclass
15
- class File(BlockBase):
16
- type: str
17
- external: Optional[External] = None
18
- file: Optional[FileContent] = None
19
- caption: List[RichText] = field(default_factory=list)
20
-
21
- @staticmethod
22
- def can_have_children() -> bool:
23
- return False
24
-
25
- @classmethod
26
- def from_dict(cls, data: dict):
27
- caption = [RichText.from_dict(rt) for rt in data.pop("caption", [])]
28
- t = data["type"]
29
- file = cls(type=t, caption=caption)
30
- if t == "external":
31
- file.external = External.from_dict(data["external"])
32
- elif t == "file":
33
- file.file = FileContent.from_dict(data["file"])
34
- return file
35
-
36
- def get_html(self) -> Optional[HtmlTag]:
37
- texts = []
38
- if self.file:
39
- texts.append(A([Href(self.file.url)], self.file.url))
40
- if self.external:
41
- texts.append(A([Href(self.external.url)], self.external.url))
42
- if self.caption:
43
- texts.append(Div([], [rt.get_html() for rt in self.caption]))
44
- if not texts:
45
- return None
46
- joined = [Br()] * (len(texts) * 2 - 1)
47
- joined[0::2] = texts
48
-
49
- return Div([], joined)
@@ -1,21 +0,0 @@
1
- # https://developers.notion.com/reference/block#image
2
- from typing import Optional
3
-
4
- from htmlBuilder.attributes import Src
5
- from htmlBuilder.tags import HtmlTag, Img
6
-
7
- from unstructured_ingest.connector.notion.interfaces import BlockBase
8
- from unstructured_ingest.connector.notion.types.file import FileObject
9
-
10
-
11
- class Image(BlockBase, FileObject):
12
- @staticmethod
13
- def can_have_children() -> bool:
14
- return False
15
-
16
- def get_html(self) -> Optional[HtmlTag]:
17
- if self.external:
18
- return Img([Src(self.external.url)], [])
19
- if self.file:
20
- return Img([Src(self.file.url)], [])
21
- return None
@@ -1,29 +0,0 @@
1
- # https://developers.notion.com/reference/block#link-to-page
2
- from dataclasses import dataclass
3
- from typing import Optional
4
-
5
- from htmlBuilder.tags import Div, HtmlTag
6
-
7
- from unstructured_ingest.connector.notion.interfaces import BlockBase
8
-
9
-
10
- @dataclass
11
- class LinkToPage(BlockBase):
12
- type: str
13
- page_id: Optional[str] = None
14
- database_id: Optional[str] = None
15
-
16
- @staticmethod
17
- def can_have_children() -> bool:
18
- return False
19
-
20
- @classmethod
21
- def from_dict(cls, data: dict):
22
- return cls(**data)
23
-
24
- def get_html(self) -> Optional[HtmlTag]:
25
- if page_id := self.page_id:
26
- return Div([], page_id)
27
- if database_id := self.database_id:
28
- return Div([], database_id)
29
- return None
@@ -1,31 +0,0 @@
1
- # https://developers.notion.com/reference/block#paragraph
2
- from dataclasses import dataclass, field
3
- from typing import List, Optional
4
-
5
- from htmlBuilder.tags import Br, Div, HtmlTag
6
-
7
- from unstructured_ingest.connector.notion.interfaces import BlockBase
8
- from unstructured_ingest.connector.notion.types.rich_text import RichText
9
-
10
-
11
- @dataclass
12
- class Paragraph(BlockBase):
13
- color: str
14
- children: List[dict] = field(default_factory=list)
15
- rich_text: List[RichText] = field(default_factory=list)
16
-
17
- @staticmethod
18
- def can_have_children() -> bool:
19
- return True
20
-
21
- @classmethod
22
- def from_dict(cls, data: dict):
23
- rich_text = data.pop("rich_text", [])
24
- paragraph = cls(**data)
25
- paragraph.rich_text = [RichText.from_dict(rt) for rt in rich_text]
26
- return paragraph
27
-
28
- def get_html(self) -> Optional[HtmlTag]:
29
- if not self.rich_text:
30
- return Br()
31
- return Div([], [rt.get_html() for rt in self.rich_text])
@@ -1,49 +0,0 @@
1
- # https://developers.notion.com/reference/block#pdf
2
- from dataclasses import dataclass, field
3
- from typing import List, Optional
4
-
5
- from htmlBuilder.attributes import Href
6
- from htmlBuilder.tags import A, Br, Div, HtmlTag
7
-
8
- from unstructured_ingest.connector.notion.interfaces import BlockBase
9
- from unstructured_ingest.connector.notion.types.file import External, File
10
- from unstructured_ingest.connector.notion.types.rich_text import RichText
11
-
12
-
13
- @dataclass
14
- class PDF(BlockBase):
15
- type: str
16
- caption: List[RichText] = field(default_factory=list)
17
- external: Optional[External] = None
18
- file: Optional[File] = None
19
-
20
- @staticmethod
21
- def can_have_children() -> bool:
22
- return False
23
-
24
- @classmethod
25
- def from_dict(cls, data: dict):
26
- caption = data.pop("caption", [])
27
- t = data["type"]
28
- paragraph = cls(type=t)
29
- paragraph.caption = [RichText.from_dict(c) for c in caption]
30
- if t == "external":
31
- paragraph.external = External.from_dict(data["external"])
32
- elif t == "file":
33
- paragraph.file = File.from_dict(data["file"])
34
- return paragraph
35
-
36
- def get_html(self) -> Optional[HtmlTag]:
37
- texts = []
38
- if self.external:
39
- texts.append(A([Href(self.external.url)], self.external.url))
40
- if self.file:
41
- texts.append(A([Href(self.file.url)], self.file.url))
42
- if self.caption:
43
- texts.append(Div([], [rt.get_html() for rt in self.caption]))
44
- if not texts:
45
- return None
46
- joined = [Br()] * (len(texts) * 2 - 1)
47
- joined[0::2] = texts
48
-
49
- return Div([], joined)
@@ -1,37 +0,0 @@
1
- # https://developers.notion.com/reference/block#toggle-blocks
2
- from dataclasses import dataclass, field
3
- from typing import List, Optional
4
-
5
- from htmlBuilder.attributes import Style
6
- from htmlBuilder.tags import Div, HtmlTag
7
-
8
- from unstructured_ingest.connector.notion.interfaces import BlockBase
9
- from unstructured_ingest.connector.notion.types.rich_text import RichText
10
-
11
-
12
- @dataclass
13
- class Toggle(BlockBase):
14
- color: str
15
- children: List[dict] = field(default_factory=list)
16
- rich_text: List[RichText] = field(default_factory=list)
17
-
18
- @staticmethod
19
- def can_have_children() -> bool:
20
- return True
21
-
22
- @classmethod
23
- def from_dict(cls, data: dict):
24
- rich_text = data.pop("rich_text", [])
25
- toggle = cls(**data)
26
- toggle.rich_text = [RichText.from_dict(rt) for rt in rich_text]
27
- return toggle
28
-
29
- def get_html(self) -> Optional[HtmlTag]:
30
- if not self.rich_text:
31
- return None
32
-
33
- texts = [rt.get_html() for rt in self.rich_text]
34
- attributes = []
35
- if self.color and self.color != "default":
36
- attributes.append(Style(f"color: {self.color}"))
37
- return Div(attributes, texts)
@@ -1,22 +0,0 @@
1
- # https://developers.notion.com/reference/block#image
2
- from typing import Optional
3
-
4
- from htmlBuilder.attributes import Src
5
- from htmlBuilder.tags import HtmlTag, Source
6
- from htmlBuilder.tags import Video as VideoHtml
7
-
8
- from unstructured_ingest.connector.notion.interfaces import BlockBase
9
- from unstructured_ingest.connector.notion.types.file import FileObject
10
-
11
-
12
- class Video(BlockBase, FileObject):
13
- @staticmethod
14
- def can_have_children() -> bool:
15
- return False
16
-
17
- def get_html(self) -> Optional[HtmlTag]:
18
- if self.external:
19
- return VideoHtml([], [Source([Src(self.external.url)], [self.external.url])])
20
- if self.file:
21
- return VideoHtml([], [Source([Src(self.file.url)], [self.file.url])])
22
- return None
@@ -1,73 +0,0 @@
1
- # https://developers.notion.com/reference/database
2
- from dataclasses import dataclass, field
3
- from typing import Dict, List, Optional
4
-
5
- from htmlBuilder.tags import Div, HtmlTag, Span
6
-
7
- from unstructured_ingest.v2.processes.connectors.notion.interfaces import (
8
- DBPropertyBase,
9
- FromJSONMixin,
10
- GetHTMLMixin,
11
- )
12
- from unstructured_ingest.v2.processes.connectors.notion.types.database_properties import (
13
- map_properties,
14
- )
15
- from unstructured_ingest.v2.processes.connectors.notion.types.file import FileObject
16
- from unstructured_ingest.v2.processes.connectors.notion.types.parent import Parent
17
- from unstructured_ingest.v2.processes.connectors.notion.types.rich_text import RichText
18
- from unstructured_ingest.v2.processes.connectors.notion.types.user import PartialUser
19
-
20
-
21
- @dataclass
22
- class Database(FromJSONMixin, GetHTMLMixin):
23
- id: str
24
- created_time: str
25
- created_by: PartialUser
26
- last_edited_time: str
27
- last_edited_by: PartialUser
28
- archived: bool
29
- in_trash: bool
30
- parent: Parent
31
- url: str
32
- is_inline: bool
33
- public_url: str
34
- request_id: Optional[str] = None
35
- properties: Dict[str, DBPropertyBase] = field(default_factory=dict)
36
- title: List[RichText] = field(default_factory=list)
37
- description: List[RichText] = field(default_factory=list)
38
- icon: Optional[FileObject] = None
39
- cover: Optional[FileObject] = None
40
- object: str = "database"
41
-
42
- @classmethod
43
- def from_dict(cls, data: dict):
44
- created_by = data.pop("created_by")
45
- last_edited_by = data.pop("last_edited_by")
46
- icon = data.pop("icon")
47
- cover = data.pop("cover")
48
- parent = data.pop("parent")
49
- title = data.pop("title")
50
- description = data.pop("description")
51
- page = cls(
52
- properties=map_properties(data.pop("properties", {})),
53
- created_by=PartialUser.from_dict(created_by),
54
- last_edited_by=PartialUser.from_dict(last_edited_by),
55
- icon=FileObject.from_dict(icon) if icon else None,
56
- cover=FileObject.from_dict(cover) if cover else None,
57
- parent=Parent.from_dict(parent),
58
- title=[RichText.from_dict(data=r) for r in title],
59
- description=[RichText.from_dict(data=r) for r in description],
60
- **data,
61
- )
62
-
63
- return page
64
-
65
- def get_html(self) -> Optional[HtmlTag]:
66
- spans = []
67
- if title := self.title:
68
- spans.append(Span([], [rt.get_html() for rt in title]))
69
- if description := self.description:
70
- spans.append(Span([], [rt.get_html() for rt in description]))
71
- if spans:
72
- return Div([], spans)
73
- return None
@@ -1,35 +0,0 @@
1
- # https://developers.notion.com/reference/property-object#created-by
2
- from dataclasses import dataclass, field
3
- from typing import Optional
4
-
5
- from htmlBuilder.tags import HtmlTag
6
-
7
- from unstructured_ingest.connector.notion.interfaces import DBCellBase, DBPropertyBase
8
- from unstructured_ingest.connector.notion.types.user import People
9
-
10
-
11
- @dataclass
12
- class CreatedBy(DBPropertyBase):
13
- id: str
14
- name: str
15
- type: str = "created_by"
16
- created_by: dict = field(default_factory=dict)
17
-
18
- @classmethod
19
- def from_dict(cls, data: dict):
20
- return cls(**data)
21
-
22
-
23
- @dataclass
24
- class CreatedByCell(DBCellBase):
25
- id: str
26
- created_by: People
27
- type: str = "created_by"
28
- name: Optional[str] = None
29
-
30
- @classmethod
31
- def from_dict(cls, data: dict):
32
- return cls(created_by=People.from_dict(data.pop("created_by")), **data)
33
-
34
- def get_html(self) -> Optional[HtmlTag]:
35
- return self.created_by.get_html()
@@ -1,34 +0,0 @@
1
- # https://developers.notion.com/reference/property-object#created-time
2
- from dataclasses import dataclass, field
3
- from typing import Optional
4
-
5
- from htmlBuilder.tags import Div, HtmlTag
6
-
7
- from unstructured_ingest.connector.notion.interfaces import DBCellBase, DBPropertyBase
8
-
9
-
10
- @dataclass
11
- class CreatedTime(DBPropertyBase):
12
- id: str
13
- name: str
14
- type: str = "created_time"
15
- created_time: dict = field(default_factory=dict)
16
-
17
- @classmethod
18
- def from_dict(cls, data: dict):
19
- return cls(**data)
20
-
21
-
22
- @dataclass
23
- class CreatedTimeCell(DBCellBase):
24
- id: str
25
- created_time: str
26
- type: str = "created_time"
27
- name: Optional[str] = None
28
-
29
- @classmethod
30
- def from_dict(cls, data: dict):
31
- return cls(**data)
32
-
33
- def get_html(self) -> Optional[HtmlTag]:
34
- return Div([], self.created_time)
@@ -1,36 +0,0 @@
1
- # https://developers.notion.com/reference/property-object#email
2
- from dataclasses import dataclass, field
3
- from typing import Optional
4
-
5
- from htmlBuilder.tags import Div, HtmlTag
6
-
7
- from unstructured_ingest.connector.notion.interfaces import DBCellBase, DBPropertyBase
8
-
9
-
10
- @dataclass
11
- class Email(DBPropertyBase):
12
- id: str
13
- name: str
14
- type: str = "email"
15
- email: dict = field(default_factory=dict)
16
-
17
- @classmethod
18
- def from_dict(cls, data: dict):
19
- return cls(**data)
20
-
21
-
22
- @dataclass
23
- class EmailCell(DBCellBase):
24
- id: str
25
- email: str
26
- name: Optional[str] = None
27
- type: str = "email"
28
-
29
- @classmethod
30
- def from_dict(cls, data: dict):
31
- return cls(**data)
32
-
33
- def get_html(self) -> Optional[HtmlTag]:
34
- if email := self.email:
35
- return Div([], email)
36
- return None
@@ -1,37 +0,0 @@
1
- # https://developers.notion.com/reference/property-object#files
2
- from dataclasses import dataclass, field
3
- from typing import List, Optional
4
-
5
- from htmlBuilder.tags import Div, HtmlTag
6
-
7
- from unstructured_ingest.connector.notion.interfaces import DBCellBase, DBPropertyBase
8
- from unstructured_ingest.connector.notion.types.file import FileObject
9
-
10
-
11
- @dataclass
12
- class Files(DBPropertyBase):
13
- id: str
14
- name: str
15
- type: str = "files"
16
- files: dict = field(default_factory=dict)
17
-
18
- @classmethod
19
- def from_dict(cls, data: dict):
20
- return cls(**data)
21
-
22
-
23
- @dataclass
24
- class FilesCell(DBCellBase):
25
- id: str
26
- files: List[FileObject]
27
- type: str = "files"
28
- name: Optional[str] = None
29
-
30
- @classmethod
31
- def from_dict(cls, data: dict):
32
- return cls(files=[FileObject.from_dict(f) for f in data.pop("files", [])], **data)
33
-
34
- def get_html(self) -> Optional[HtmlTag]:
35
- if not self.files:
36
- return None
37
- return Div([], [f.get_html() for f in self.files])
@@ -1,34 +0,0 @@
1
- # https://developers.notion.com/reference/property-object#last-edited-by
2
- from dataclasses import dataclass
3
- from typing import Optional
4
-
5
- from htmlBuilder.tags import HtmlTag
6
-
7
- from unstructured_ingest.connector.notion.interfaces import DBCellBase, DBPropertyBase
8
- from unstructured_ingest.connector.notion.types.user import People
9
-
10
-
11
- @dataclass
12
- class LastEditedBy(DBPropertyBase):
13
- @classmethod
14
- def from_dict(cls, data: dict):
15
- return cls()
16
-
17
- def get_text(self) -> Optional[str]:
18
- return None
19
-
20
-
21
- @dataclass
22
- class LastEditedByCell(DBCellBase):
23
- id: str
24
- last_edited_by: People
25
- type: str = "last_edited_by"
26
-
27
- name: Optional[str] = None
28
-
29
- @classmethod
30
- def from_dict(cls, data: dict):
31
- return cls(last_edited_by=People.from_dict(data.pop("last_edited_by", {})), **data)
32
-
33
- def get_html(self) -> Optional[HtmlTag]:
34
- return self.last_edited_by.get_html()
@@ -1,49 +0,0 @@
1
- # https://developers.notion.com/reference/property-object#number
2
- from dataclasses import dataclass
3
- from typing import Optional
4
-
5
- from htmlBuilder.tags import Div, HtmlTag
6
-
7
- from unstructured_ingest.connector.notion.interfaces import (
8
- DBCellBase,
9
- DBPropertyBase,
10
- FromJSONMixin,
11
- )
12
-
13
-
14
- @dataclass
15
- class NumberProp(FromJSONMixin):
16
- format: str
17
-
18
- @classmethod
19
- def from_dict(cls, data: dict):
20
- return cls(**data)
21
-
22
-
23
- @dataclass
24
- class Number(DBPropertyBase):
25
- id: str
26
- name: str
27
- number: NumberProp
28
- type: str = "number"
29
-
30
- @classmethod
31
- def from_dict(cls, data: dict):
32
- return cls(number=NumberProp.from_dict(data.pop("number")), **data)
33
-
34
-
35
- @dataclass
36
- class NumberCell(DBCellBase):
37
- id: str
38
- number: Optional[int] = None
39
- type: str = "number"
40
- name: Optional[str] = None
41
-
42
- @classmethod
43
- def from_dict(cls, data: dict):
44
- return cls(**data)
45
-
46
- def get_html(self) -> Optional[HtmlTag]:
47
- if number := self.number:
48
- return Div([], str(number))
49
- return None