unstructured-ingest 0.6.2__py3-none-any.whl → 0.7.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of unstructured-ingest might be problematic. Click here for more details.

Files changed (568) hide show
  1. examples/airtable.py +44 -0
  2. examples/azure_cognitive_search.py +55 -0
  3. examples/chroma.py +54 -0
  4. examples/couchbase.py +55 -0
  5. examples/databricks_volumes_dest.py +55 -0
  6. examples/databricks_volumes_source.py +53 -0
  7. examples/delta_table.py +45 -0
  8. examples/discord_example.py +36 -0
  9. examples/elasticsearch.py +49 -0
  10. examples/google_drive.py +45 -0
  11. examples/kdbai.py +54 -0
  12. examples/local.py +36 -0
  13. examples/milvus.py +44 -0
  14. examples/mongodb.py +53 -0
  15. examples/opensearch.py +50 -0
  16. examples/pinecone.py +57 -0
  17. examples/s3.py +38 -0
  18. examples/salesforce.py +44 -0
  19. examples/sharepoint.py +47 -0
  20. examples/singlestore.py +49 -0
  21. examples/sql.py +90 -0
  22. examples/vectara.py +54 -0
  23. examples/weaviate.py +44 -0
  24. test/integration/chunkers/test_chunkers.py +1 -1
  25. test/integration/connectors/conftest.py +1 -1
  26. test/integration/connectors/databricks/test_volumes_native.py +3 -3
  27. test/integration/connectors/discord/test_discord.py +1 -1
  28. test/integration/connectors/duckdb/test_duckdb.py +2 -2
  29. test/integration/connectors/duckdb/test_motherduck.py +2 -2
  30. test/integration/connectors/elasticsearch/test_elasticsearch.py +2 -2
  31. test/integration/connectors/elasticsearch/test_opensearch.py +2 -2
  32. test/integration/connectors/sql/test_databricks_delta_tables.py +3 -3
  33. test/integration/connectors/sql/test_postgres.py +2 -2
  34. test/integration/connectors/sql/test_singlestore.py +2 -2
  35. test/integration/connectors/sql/test_snowflake.py +2 -2
  36. test/integration/connectors/sql/test_sqlite.py +2 -2
  37. test/integration/connectors/sql/test_vastdb.py +1 -1
  38. test/integration/connectors/test_astradb.py +2 -2
  39. test/integration/connectors/test_azure_ai_search.py +2 -2
  40. test/integration/connectors/test_chroma.py +2 -2
  41. test/integration/connectors/test_confluence.py +1 -1
  42. test/integration/connectors/test_delta_table.py +2 -2
  43. test/integration/connectors/test_dropbox.py +2 -2
  44. test/integration/connectors/test_github.py +49 -0
  45. test/integration/connectors/test_google_drive.py +2 -2
  46. test/integration/connectors/test_jira.py +1 -1
  47. test/integration/connectors/test_lancedb.py +7 -7
  48. test/integration/connectors/test_milvus.py +2 -2
  49. test/integration/connectors/test_mongodb.py +2 -2
  50. test/integration/connectors/test_neo4j.py +7 -7
  51. test/integration/connectors/test_notion.py +2 -2
  52. test/integration/connectors/test_onedrive.py +2 -2
  53. test/integration/connectors/test_pinecone.py +3 -3
  54. test/integration/connectors/test_qdrant.py +6 -6
  55. test/integration/connectors/test_redis.py +3 -3
  56. test/integration/connectors/test_s3.py +3 -3
  57. test/integration/connectors/test_sharepoint.py +1 -1
  58. test/integration/connectors/test_vectara.py +4 -4
  59. test/integration/connectors/test_zendesk.py +2 -2
  60. test/integration/connectors/utils/validation/destination.py +2 -2
  61. test/integration/connectors/utils/validation/source.py +2 -2
  62. test/integration/connectors/weaviate/test_cloud.py +1 -1
  63. test/integration/connectors/weaviate/test_local.py +2 -2
  64. test/integration/embedders/test_azure_openai.py +1 -1
  65. test/integration/embedders/test_bedrock.py +2 -2
  66. test/integration/embedders/test_huggingface.py +1 -1
  67. test/integration/embedders/test_mixedbread.py +1 -1
  68. test/integration/embedders/test_octoai.py +2 -2
  69. test/integration/embedders/test_openai.py +2 -2
  70. test/integration/embedders/test_togetherai.py +2 -2
  71. test/integration/embedders/test_vertexai.py +1 -1
  72. test/integration/embedders/test_voyageai.py +1 -1
  73. test/integration/partitioners/test_partitioner.py +2 -2
  74. test/unit/{v2/chunkers → chunkers}/test_chunkers.py +1 -1
  75. test/unit/{v2/connectors → connectors}/ibm_watsonx/test_ibm_watsonx_s3.py +6 -6
  76. test/unit/{v2/connectors → connectors}/motherduck/test_base.py +5 -5
  77. test/unit/{v2/connectors → connectors}/sql/test_sql.py +4 -4
  78. test/unit/{v2/connectors → connectors}/test_confluence.py +1 -1
  79. test/unit/{v2/connectors → connectors}/test_jira.py +1 -1
  80. test/unit/{v2/embedders → embedders}/test_huggingface.py +1 -1
  81. test/unit/{v2/embedders → embedders}/test_vertexai.py +1 -1
  82. test/unit/{v2/partitioners → partitioners}/test_partitioner.py +2 -2
  83. test/unit/test_html.py +1 -1
  84. test/unit/{v2/test_interfaces.py → test_interfaces.py} +1 -1
  85. test/unit/test_utils.py +106 -97
  86. unstructured_ingest/__version__.py +1 -1
  87. unstructured_ingest/cli/__init__.py +0 -14
  88. unstructured_ingest/cli/base/__init__.py +4 -0
  89. unstructured_ingest/cli/base/cmd.py +259 -9
  90. unstructured_ingest/cli/base/dest.py +58 -61
  91. unstructured_ingest/cli/base/src.py +54 -36
  92. unstructured_ingest/cli/cli.py +4 -17
  93. unstructured_ingest/{v2/cli → cli}/cmds.py +2 -2
  94. unstructured_ingest/{v2/cli → cli}/utils/model_conversion.py +6 -6
  95. unstructured_ingest/{v2/types → data_types}/file_data.py +1 -1
  96. unstructured_ingest/embed/bedrock.py +3 -3
  97. unstructured_ingest/embed/octoai.py +3 -3
  98. unstructured_ingest/embed/openai.py +3 -3
  99. unstructured_ingest/embed/togetherai.py +4 -4
  100. unstructured_ingest/embed/vertexai.py +1 -1
  101. unstructured_ingest/embed/voyageai.py +4 -4
  102. unstructured_ingest/{v2/interfaces → interfaces}/downloader.py +3 -3
  103. unstructured_ingest/{v2/interfaces → interfaces}/indexer.py +3 -3
  104. unstructured_ingest/{v2/interfaces → interfaces}/upload_stager.py +2 -2
  105. unstructured_ingest/{v2/interfaces → interfaces}/uploader.py +2 -2
  106. unstructured_ingest/{v2/otel.py → otel.py} +1 -1
  107. unstructured_ingest/pipeline/__init__.py +0 -22
  108. unstructured_ingest/pipeline/interfaces.py +179 -238
  109. unstructured_ingest/{v2/pipeline → pipeline}/otel.py +2 -2
  110. unstructured_ingest/pipeline/pipeline.py +388 -97
  111. unstructured_ingest/{v2/pipeline → pipeline}/steps/chunk.py +5 -5
  112. unstructured_ingest/{v2/pipeline → pipeline}/steps/download.py +5 -5
  113. unstructured_ingest/{v2/pipeline → pipeline}/steps/embed.py +5 -5
  114. unstructured_ingest/{v2/pipeline → pipeline}/steps/filter.py +4 -4
  115. unstructured_ingest/{v2/pipeline → pipeline}/steps/index.py +5 -5
  116. unstructured_ingest/{v2/pipeline → pipeline}/steps/partition.py +5 -5
  117. unstructured_ingest/{v2/pipeline → pipeline}/steps/stage.py +5 -5
  118. unstructured_ingest/{v2/pipeline → pipeline}/steps/uncompress.py +4 -4
  119. unstructured_ingest/{v2/pipeline → pipeline}/steps/upload.py +5 -5
  120. unstructured_ingest/{v2/processes → processes}/chunker.py +3 -3
  121. unstructured_ingest/{v2/processes → processes}/connector_registry.py +1 -1
  122. unstructured_ingest/{v2/processes → processes}/connectors/__init__.py +14 -11
  123. unstructured_ingest/{v2/processes → processes}/connectors/airtable.py +4 -4
  124. unstructured_ingest/{v2/processes → processes}/connectors/astradb.py +15 -15
  125. unstructured_ingest/{v2/processes → processes}/connectors/azure_ai_search.py +8 -9
  126. unstructured_ingest/{v2/processes → processes}/connectors/chroma.py +10 -7
  127. unstructured_ingest/{v2/processes → processes}/connectors/confluence.py +11 -11
  128. unstructured_ingest/{v2/processes → processes}/connectors/couchbase.py +12 -12
  129. unstructured_ingest/{v2/processes → processes}/connectors/databricks/__init__.py +1 -1
  130. unstructured_ingest/{v2/processes → processes}/connectors/databricks/volumes.py +12 -11
  131. unstructured_ingest/{v2/processes → processes}/connectors/databricks/volumes_aws.py +3 -3
  132. unstructured_ingest/{v2/processes → processes}/connectors/databricks/volumes_azure.py +3 -3
  133. unstructured_ingest/{v2/processes → processes}/connectors/databricks/volumes_gcp.py +3 -3
  134. unstructured_ingest/{v2/processes → processes}/connectors/databricks/volumes_native.py +3 -3
  135. unstructured_ingest/{v2/processes → processes}/connectors/databricks/volumes_table.py +8 -9
  136. unstructured_ingest/{v2/processes → processes}/connectors/delta_table.py +7 -7
  137. unstructured_ingest/{v2/processes → processes}/connectors/discord.py +9 -9
  138. unstructured_ingest/{v2/processes → processes}/connectors/duckdb/__init__.py +1 -1
  139. unstructured_ingest/{v2/processes → processes}/connectors/duckdb/base.py +3 -4
  140. unstructured_ingest/{v2/processes → processes}/connectors/duckdb/duckdb.py +7 -7
  141. unstructured_ingest/{v2/processes → processes}/connectors/duckdb/motherduck.py +7 -7
  142. unstructured_ingest/{v2/processes → processes}/connectors/elasticsearch/__init__.py +1 -1
  143. unstructured_ingest/{v2/processes → processes}/connectors/elasticsearch/elasticsearch.py +17 -17
  144. unstructured_ingest/{v2/processes → processes}/connectors/elasticsearch/opensearch.py +5 -5
  145. unstructured_ingest/{v2/processes → processes}/connectors/fsspec/__init__.py +1 -1
  146. unstructured_ingest/{v2/processes → processes}/connectors/fsspec/azure.py +8 -8
  147. unstructured_ingest/{v2/processes → processes}/connectors/fsspec/box.py +8 -8
  148. unstructured_ingest/{v2/processes → processes}/connectors/fsspec/dropbox.py +8 -8
  149. unstructured_ingest/{v2/processes → processes}/connectors/fsspec/fsspec.py +8 -8
  150. unstructured_ingest/{v2/processes → processes}/connectors/fsspec/gcs.py +8 -8
  151. unstructured_ingest/{v2/processes → processes}/connectors/fsspec/s3.py +9 -9
  152. unstructured_ingest/{v2/processes → processes}/connectors/fsspec/sftp.py +5 -5
  153. unstructured_ingest/processes/connectors/github.py +221 -0
  154. unstructured_ingest/{v2/processes → processes}/connectors/gitlab.py +9 -9
  155. unstructured_ingest/{v2/processes → processes}/connectors/google_drive.py +22 -13
  156. unstructured_ingest/{v2/processes → processes}/connectors/ibm_watsonx/__init__.py +1 -1
  157. unstructured_ingest/{v2/processes → processes}/connectors/ibm_watsonx/ibm_watsonx_s3.py +9 -9
  158. unstructured_ingest/{v2/processes → processes}/connectors/jira.py +9 -9
  159. unstructured_ingest/{v2/processes → processes}/connectors/kafka/__init__.py +1 -1
  160. unstructured_ingest/{v2/processes → processes}/connectors/kafka/cloud.py +3 -3
  161. unstructured_ingest/{v2/processes → processes}/connectors/kafka/kafka.py +9 -9
  162. unstructured_ingest/{v2/processes → processes}/connectors/kafka/local.py +2 -2
  163. unstructured_ingest/{v2/processes → processes}/connectors/kdbai.py +11 -7
  164. unstructured_ingest/{v2/processes → processes}/connectors/lancedb/__init__.py +1 -1
  165. unstructured_ingest/{v2/processes → processes}/connectors/lancedb/aws.py +3 -3
  166. unstructured_ingest/{v2/processes → processes}/connectors/lancedb/azure.py +3 -3
  167. unstructured_ingest/{v2/processes → processes}/connectors/lancedb/cloud.py +3 -3
  168. unstructured_ingest/{v2/processes → processes}/connectors/lancedb/gcp.py +3 -3
  169. unstructured_ingest/{v2/processes → processes}/connectors/lancedb/lancedb.py +6 -6
  170. unstructured_ingest/{v2/processes → processes}/connectors/lancedb/local.py +3 -3
  171. unstructured_ingest/{v2/processes → processes}/connectors/local.py +9 -9
  172. unstructured_ingest/{v2/processes → processes}/connectors/milvus.py +7 -7
  173. unstructured_ingest/{v2/processes → processes}/connectors/mongodb.py +13 -13
  174. unstructured_ingest/{v2/processes → processes}/connectors/neo4j.py +7 -7
  175. unstructured_ingest/{v2/processes → processes}/connectors/notion/client.py +6 -6
  176. unstructured_ingest/{v2/processes → processes}/connectors/notion/connector.py +15 -15
  177. unstructured_ingest/{v2/processes → processes}/connectors/notion/helpers.py +4 -4
  178. unstructured_ingest/{ingest_backoff → processes/connectors/notion/ingest_backoff}/_wrapper.py +5 -1
  179. unstructured_ingest/processes/connectors/notion/ingest_backoff/types.py +24 -0
  180. unstructured_ingest/{connector → processes/connectors}/notion/types/block.py +4 -4
  181. unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/bookmark.py +2 -2
  182. unstructured_ingest/{connector → processes/connectors}/notion/types/blocks/breadcrumb.py +1 -1
  183. unstructured_ingest/{connector → processes/connectors}/notion/types/blocks/bulleted_list_item.py +2 -2
  184. unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/callout.py +2 -2
  185. unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/child_database.py +1 -1
  186. unstructured_ingest/{connector → processes/connectors}/notion/types/blocks/child_page.py +1 -1
  187. unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/code.py +2 -2
  188. unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/column_list.py +1 -1
  189. unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/divider.py +1 -1
  190. unstructured_ingest/{connector → processes/connectors}/notion/types/blocks/embed.py +2 -2
  191. unstructured_ingest/{connector → processes/connectors}/notion/types/blocks/equation.py +1 -1
  192. unstructured_ingest/{connector → processes/connectors}/notion/types/blocks/file.py +4 -4
  193. unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/heading.py +2 -2
  194. unstructured_ingest/{connector → processes/connectors}/notion/types/blocks/image.py +2 -2
  195. unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/link_preview.py +1 -1
  196. unstructured_ingest/{connector → processes/connectors}/notion/types/blocks/link_to_page.py +1 -1
  197. unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/numbered_list.py +2 -2
  198. unstructured_ingest/{connector → processes/connectors}/notion/types/blocks/paragraph.py +2 -2
  199. unstructured_ingest/{connector → processes/connectors}/notion/types/blocks/pdf.py +3 -3
  200. unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/quote.py +2 -2
  201. unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/synced_block.py +1 -1
  202. unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/table.py +2 -5
  203. unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/table_of_contents.py +1 -1
  204. unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/template.py +2 -2
  205. unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/todo.py +2 -2
  206. unstructured_ingest/{connector → processes/connectors}/notion/types/blocks/toggle.py +2 -2
  207. unstructured_ingest/{v2/processes → processes}/connectors/notion/types/blocks/unsupported.py +1 -1
  208. unstructured_ingest/{connector → processes/connectors}/notion/types/blocks/video.py +2 -2
  209. unstructured_ingest/{connector → processes/connectors}/notion/types/database.py +6 -6
  210. unstructured_ingest/{v2/processes → processes}/connectors/notion/types/database_properties/__init__.py +1 -1
  211. unstructured_ingest/{v2/processes → processes}/connectors/notion/types/database_properties/checkbox.py +1 -1
  212. unstructured_ingest/{connector → processes/connectors}/notion/types/database_properties/created_by.py +2 -2
  213. unstructured_ingest/{connector → processes/connectors}/notion/types/database_properties/created_time.py +1 -1
  214. unstructured_ingest/{v2/processes → processes}/connectors/notion/types/database_properties/date.py +2 -2
  215. unstructured_ingest/{connector → processes/connectors}/notion/types/database_properties/email.py +1 -1
  216. unstructured_ingest/{connector → processes/connectors}/notion/types/database_properties/files.py +2 -2
  217. unstructured_ingest/{v2/processes → processes}/connectors/notion/types/database_properties/formula.py +1 -1
  218. unstructured_ingest/{connector → processes/connectors}/notion/types/database_properties/last_edited_by.py +2 -2
  219. unstructured_ingest/{v2/processes → processes}/connectors/notion/types/database_properties/last_edited_time.py +1 -1
  220. unstructured_ingest/{v2/processes → processes}/connectors/notion/types/database_properties/multiselect.py +1 -1
  221. unstructured_ingest/{connector → processes/connectors}/notion/types/database_properties/number.py +1 -1
  222. unstructured_ingest/{v2/processes → processes}/connectors/notion/types/database_properties/people.py +2 -2
  223. unstructured_ingest/{v2/processes → processes}/connectors/notion/types/database_properties/phone_number.py +1 -1
  224. unstructured_ingest/{v2/processes → processes}/connectors/notion/types/database_properties/relation.py +1 -1
  225. unstructured_ingest/{connector → processes/connectors}/notion/types/database_properties/rich_text.py +2 -2
  226. unstructured_ingest/{connector → processes/connectors}/notion/types/database_properties/rollup.py +1 -1
  227. unstructured_ingest/{v2/processes → processes}/connectors/notion/types/database_properties/select.py +1 -1
  228. unstructured_ingest/{v2/processes → processes}/connectors/notion/types/database_properties/status.py +1 -1
  229. unstructured_ingest/{connector → processes/connectors}/notion/types/database_properties/title.py +2 -2
  230. unstructured_ingest/{v2/processes → processes}/connectors/notion/types/database_properties/unique_id.py +1 -1
  231. unstructured_ingest/{connector → processes/connectors}/notion/types/database_properties/url.py +1 -1
  232. unstructured_ingest/{connector → processes/connectors}/notion/types/database_properties/verification.py +3 -3
  233. unstructured_ingest/{v2/processes → processes}/connectors/notion/types/date.py +1 -1
  234. unstructured_ingest/{v2/processes → processes}/connectors/notion/types/file.py +1 -1
  235. unstructured_ingest/{connector → processes/connectors}/notion/types/page.py +4 -4
  236. unstructured_ingest/{connector → processes/connectors}/notion/types/parent.py +1 -1
  237. unstructured_ingest/{connector → processes/connectors}/notion/types/rich_text.py +3 -3
  238. unstructured_ingest/{v2/processes → processes}/connectors/notion/types/user.py +1 -1
  239. unstructured_ingest/{v2/processes → processes}/connectors/onedrive.py +10 -10
  240. unstructured_ingest/{v2/processes → processes}/connectors/outlook.py +9 -9
  241. unstructured_ingest/{v2/processes → processes}/connectors/pinecone.py +12 -9
  242. unstructured_ingest/{v2/processes → processes}/connectors/qdrant/__init__.py +1 -1
  243. unstructured_ingest/{v2/processes → processes}/connectors/qdrant/cloud.py +2 -2
  244. unstructured_ingest/{v2/processes → processes}/connectors/qdrant/local.py +2 -2
  245. unstructured_ingest/{v2/processes → processes}/connectors/qdrant/qdrant.py +9 -6
  246. unstructured_ingest/{v2/processes → processes}/connectors/qdrant/server.py +2 -2
  247. unstructured_ingest/{v2/processes → processes}/connectors/redisdb.py +6 -6
  248. unstructured_ingest/{v2/processes → processes}/connectors/salesforce.py +9 -9
  249. unstructured_ingest/{v2/processes → processes}/connectors/sharepoint.py +7 -7
  250. unstructured_ingest/{v2/processes → processes}/connectors/slack.py +9 -9
  251. unstructured_ingest/{v2/processes → processes}/connectors/sql/__init__.py +1 -1
  252. unstructured_ingest/{v2/processes → processes}/connectors/sql/databricks_delta_tables.py +11 -9
  253. unstructured_ingest/{v2/processes → processes}/connectors/sql/postgres.py +4 -4
  254. unstructured_ingest/{v2/processes → processes}/connectors/sql/singlestore.py +4 -4
  255. unstructured_ingest/{v2/processes → processes}/connectors/sql/snowflake.py +6 -6
  256. unstructured_ingest/{v2/processes → processes}/connectors/sql/sql.py +16 -11
  257. unstructured_ingest/{v2/processes → processes}/connectors/sql/sqlite.py +4 -4
  258. unstructured_ingest/{v2/processes → processes}/connectors/sql/vastdb.py +9 -10
  259. unstructured_ingest/{v2/processes → processes}/connectors/vectara.py +6 -6
  260. unstructured_ingest/{v2/processes → processes}/connectors/weaviate/__init__.py +1 -1
  261. unstructured_ingest/{v2/processes → processes}/connectors/weaviate/cloud.py +3 -3
  262. unstructured_ingest/{v2/processes → processes}/connectors/weaviate/embedded.py +3 -3
  263. unstructured_ingest/{v2/processes → processes}/connectors/weaviate/local.py +3 -3
  264. unstructured_ingest/{v2/processes → processes}/connectors/weaviate/weaviate.py +5 -5
  265. unstructured_ingest/{v2/processes → processes}/connectors/zendesk/client.py +2 -2
  266. unstructured_ingest/{v2/processes → processes}/connectors/zendesk/zendesk.py +10 -10
  267. unstructured_ingest/{v2/processes → processes}/embedder.py +1 -1
  268. unstructured_ingest/{v2/processes → processes}/filter.py +4 -4
  269. unstructured_ingest/{v2/processes → processes}/partitioner.py +6 -6
  270. unstructured_ingest/{v2/processes → processes}/uncompress.py +3 -3
  271. unstructured_ingest/{v2/processes → processes}/utils/blob_storage.py +2 -2
  272. unstructured_ingest/{v2/unstructured_api.py → unstructured_api.py} +2 -2
  273. unstructured_ingest/utils/compression.py +1 -48
  274. unstructured_ingest/utils/data_prep.py +9 -1
  275. unstructured_ingest/utils/html.py +3 -3
  276. unstructured_ingest/{v2/utils.py → utils/pydantic_models.py} +0 -9
  277. unstructured_ingest/utils/string_and_date_utils.py +1 -1
  278. {unstructured_ingest-0.6.2.dist-info → unstructured_ingest-0.7.0.dist-info}/METADATA +99 -99
  279. unstructured_ingest-0.7.0.dist-info/RECORD +370 -0
  280. {unstructured_ingest-0.6.2.dist-info → unstructured_ingest-0.7.0.dist-info}/top_level.txt +1 -0
  281. test/unit/v2/test_utils.py +0 -82
  282. unstructured_ingest/cli/cmd_factory.py +0 -12
  283. unstructured_ingest/cli/cmds/__init__.py +0 -145
  284. unstructured_ingest/cli/cmds/airtable.py +0 -69
  285. unstructured_ingest/cli/cmds/astradb.py +0 -99
  286. unstructured_ingest/cli/cmds/azure_ai_search.py +0 -65
  287. unstructured_ingest/cli/cmds/biomed.py +0 -52
  288. unstructured_ingest/cli/cmds/chroma.py +0 -104
  289. unstructured_ingest/cli/cmds/clarifai.py +0 -71
  290. unstructured_ingest/cli/cmds/confluence.py +0 -69
  291. unstructured_ingest/cli/cmds/databricks_volumes.py +0 -163
  292. unstructured_ingest/cli/cmds/delta_table.py +0 -94
  293. unstructured_ingest/cli/cmds/discord.py +0 -47
  294. unstructured_ingest/cli/cmds/elasticsearch.py +0 -133
  295. unstructured_ingest/cli/cmds/fsspec/azure.py +0 -94
  296. unstructured_ingest/cli/cmds/fsspec/box.py +0 -48
  297. unstructured_ingest/cli/cmds/fsspec/dropbox.py +0 -51
  298. unstructured_ingest/cli/cmds/fsspec/fsspec.py +0 -15
  299. unstructured_ingest/cli/cmds/fsspec/gcs.py +0 -71
  300. unstructured_ingest/cli/cmds/fsspec/s3.py +0 -74
  301. unstructured_ingest/cli/cmds/fsspec/sftp.py +0 -58
  302. unstructured_ingest/cli/cmds/github.py +0 -54
  303. unstructured_ingest/cli/cmds/gitlab.py +0 -54
  304. unstructured_ingest/cli/cmds/google_drive.py +0 -49
  305. unstructured_ingest/cli/cmds/hubspot.py +0 -70
  306. unstructured_ingest/cli/cmds/jira.py +0 -71
  307. unstructured_ingest/cli/cmds/kafka.py +0 -102
  308. unstructured_ingest/cli/cmds/local.py +0 -43
  309. unstructured_ingest/cli/cmds/mongodb.py +0 -72
  310. unstructured_ingest/cli/cmds/notion.py +0 -48
  311. unstructured_ingest/cli/cmds/onedrive.py +0 -66
  312. unstructured_ingest/cli/cmds/opensearch.py +0 -117
  313. unstructured_ingest/cli/cmds/outlook.py +0 -67
  314. unstructured_ingest/cli/cmds/pinecone.py +0 -71
  315. unstructured_ingest/cli/cmds/qdrant.py +0 -124
  316. unstructured_ingest/cli/cmds/reddit.py +0 -67
  317. unstructured_ingest/cli/cmds/salesforce.py +0 -58
  318. unstructured_ingest/cli/cmds/sharepoint.py +0 -66
  319. unstructured_ingest/cli/cmds/slack.py +0 -56
  320. unstructured_ingest/cli/cmds/sql.py +0 -66
  321. unstructured_ingest/cli/cmds/vectara.py +0 -66
  322. unstructured_ingest/cli/cmds/weaviate.py +0 -98
  323. unstructured_ingest/cli/cmds/wikipedia.py +0 -40
  324. unstructured_ingest/cli/common.py +0 -7
  325. unstructured_ingest/cli/interfaces.py +0 -663
  326. unstructured_ingest/cli/utils.py +0 -205
  327. unstructured_ingest/connector/airtable.py +0 -309
  328. unstructured_ingest/connector/astradb.py +0 -267
  329. unstructured_ingest/connector/azure_ai_search.py +0 -144
  330. unstructured_ingest/connector/biomed.py +0 -320
  331. unstructured_ingest/connector/chroma.py +0 -158
  332. unstructured_ingest/connector/clarifai.py +0 -122
  333. unstructured_ingest/connector/confluence.py +0 -285
  334. unstructured_ingest/connector/databricks_volumes.py +0 -137
  335. unstructured_ingest/connector/delta_table.py +0 -203
  336. unstructured_ingest/connector/discord.py +0 -180
  337. unstructured_ingest/connector/elasticsearch.py +0 -396
  338. unstructured_ingest/connector/fsspec/azure.py +0 -78
  339. unstructured_ingest/connector/fsspec/box.py +0 -109
  340. unstructured_ingest/connector/fsspec/dropbox.py +0 -160
  341. unstructured_ingest/connector/fsspec/fsspec.py +0 -359
  342. unstructured_ingest/connector/fsspec/gcs.py +0 -82
  343. unstructured_ingest/connector/fsspec/s3.py +0 -62
  344. unstructured_ingest/connector/fsspec/sftp.py +0 -81
  345. unstructured_ingest/connector/git.py +0 -124
  346. unstructured_ingest/connector/github.py +0 -174
  347. unstructured_ingest/connector/gitlab.py +0 -142
  348. unstructured_ingest/connector/google_drive.py +0 -348
  349. unstructured_ingest/connector/hubspot.py +0 -278
  350. unstructured_ingest/connector/jira.py +0 -469
  351. unstructured_ingest/connector/kafka.py +0 -293
  352. unstructured_ingest/connector/local.py +0 -139
  353. unstructured_ingest/connector/mongodb.py +0 -284
  354. unstructured_ingest/connector/notion/client.py +0 -248
  355. unstructured_ingest/connector/notion/connector.py +0 -469
  356. unstructured_ingest/connector/notion/helpers.py +0 -584
  357. unstructured_ingest/connector/notion/types/blocks/bookmark.py +0 -40
  358. unstructured_ingest/connector/notion/types/blocks/callout.py +0 -94
  359. unstructured_ingest/connector/notion/types/blocks/child_database.py +0 -23
  360. unstructured_ingest/connector/notion/types/blocks/code.py +0 -43
  361. unstructured_ingest/connector/notion/types/blocks/column_list.py +0 -35
  362. unstructured_ingest/connector/notion/types/blocks/divider.py +0 -22
  363. unstructured_ingest/connector/notion/types/blocks/heading.py +0 -37
  364. unstructured_ingest/connector/notion/types/blocks/link_preview.py +0 -24
  365. unstructured_ingest/connector/notion/types/blocks/numbered_list.py +0 -29
  366. unstructured_ingest/connector/notion/types/blocks/quote.py +0 -37
  367. unstructured_ingest/connector/notion/types/blocks/synced_block.py +0 -57
  368. unstructured_ingest/connector/notion/types/blocks/table.py +0 -63
  369. unstructured_ingest/connector/notion/types/blocks/table_of_contents.py +0 -23
  370. unstructured_ingest/connector/notion/types/blocks/template.py +0 -30
  371. unstructured_ingest/connector/notion/types/blocks/todo.py +0 -42
  372. unstructured_ingest/connector/notion/types/blocks/unsupported.py +0 -20
  373. unstructured_ingest/connector/notion/types/database_properties/__init__.py +0 -106
  374. unstructured_ingest/connector/notion/types/database_properties/checkbox.py +0 -38
  375. unstructured_ingest/connector/notion/types/database_properties/date.py +0 -41
  376. unstructured_ingest/connector/notion/types/database_properties/formula.py +0 -49
  377. unstructured_ingest/connector/notion/types/database_properties/last_edited_time.py +0 -34
  378. unstructured_ingest/connector/notion/types/database_properties/multiselect.py +0 -73
  379. unstructured_ingest/connector/notion/types/database_properties/people.py +0 -40
  380. unstructured_ingest/connector/notion/types/database_properties/phone_number.py +0 -36
  381. unstructured_ingest/connector/notion/types/database_properties/relation.py +0 -67
  382. unstructured_ingest/connector/notion/types/database_properties/select.py +0 -68
  383. unstructured_ingest/connector/notion/types/database_properties/status.py +0 -80
  384. unstructured_ingest/connector/notion/types/database_properties/unique_id.py +0 -50
  385. unstructured_ingest/connector/notion/types/date.py +0 -26
  386. unstructured_ingest/connector/notion/types/file.py +0 -51
  387. unstructured_ingest/connector/notion/types/user.py +0 -76
  388. unstructured_ingest/connector/onedrive.py +0 -232
  389. unstructured_ingest/connector/opensearch.py +0 -218
  390. unstructured_ingest/connector/outlook.py +0 -285
  391. unstructured_ingest/connector/pinecone.py +0 -150
  392. unstructured_ingest/connector/qdrant.py +0 -144
  393. unstructured_ingest/connector/reddit.py +0 -166
  394. unstructured_ingest/connector/registry.py +0 -109
  395. unstructured_ingest/connector/salesforce.py +0 -301
  396. unstructured_ingest/connector/sharepoint.py +0 -573
  397. unstructured_ingest/connector/slack.py +0 -224
  398. unstructured_ingest/connector/sql.py +0 -199
  399. unstructured_ingest/connector/vectara.py +0 -253
  400. unstructured_ingest/connector/weaviate.py +0 -190
  401. unstructured_ingest/connector/wikipedia.py +0 -208
  402. unstructured_ingest/enhanced_dataclass/__init__.py +0 -4
  403. unstructured_ingest/enhanced_dataclass/core.py +0 -99
  404. unstructured_ingest/enhanced_dataclass/dataclasses.py +0 -54
  405. unstructured_ingest/enhanced_dataclass/json_mixin.py +0 -125
  406. unstructured_ingest/interfaces.py +0 -852
  407. unstructured_ingest/pipeline/copy.py +0 -19
  408. unstructured_ingest/pipeline/doc_factory.py +0 -12
  409. unstructured_ingest/pipeline/partition.py +0 -60
  410. unstructured_ingest/pipeline/permissions.py +0 -12
  411. unstructured_ingest/pipeline/reformat/chunking.py +0 -134
  412. unstructured_ingest/pipeline/reformat/embedding.py +0 -64
  413. unstructured_ingest/pipeline/source.py +0 -77
  414. unstructured_ingest/pipeline/utils.py +0 -6
  415. unstructured_ingest/pipeline/write.py +0 -18
  416. unstructured_ingest/processor.py +0 -93
  417. unstructured_ingest/runner/__init__.py +0 -104
  418. unstructured_ingest/runner/airtable.py +0 -35
  419. unstructured_ingest/runner/astradb.py +0 -34
  420. unstructured_ingest/runner/base_runner.py +0 -89
  421. unstructured_ingest/runner/biomed.py +0 -45
  422. unstructured_ingest/runner/confluence.py +0 -35
  423. unstructured_ingest/runner/delta_table.py +0 -34
  424. unstructured_ingest/runner/discord.py +0 -35
  425. unstructured_ingest/runner/elasticsearch.py +0 -40
  426. unstructured_ingest/runner/fsspec/azure.py +0 -30
  427. unstructured_ingest/runner/fsspec/box.py +0 -28
  428. unstructured_ingest/runner/fsspec/dropbox.py +0 -30
  429. unstructured_ingest/runner/fsspec/fsspec.py +0 -40
  430. unstructured_ingest/runner/fsspec/gcs.py +0 -28
  431. unstructured_ingest/runner/fsspec/s3.py +0 -28
  432. unstructured_ingest/runner/fsspec/sftp.py +0 -28
  433. unstructured_ingest/runner/github.py +0 -37
  434. unstructured_ingest/runner/gitlab.py +0 -37
  435. unstructured_ingest/runner/google_drive.py +0 -35
  436. unstructured_ingest/runner/hubspot.py +0 -35
  437. unstructured_ingest/runner/jira.py +0 -35
  438. unstructured_ingest/runner/kafka.py +0 -34
  439. unstructured_ingest/runner/local.py +0 -23
  440. unstructured_ingest/runner/mongodb.py +0 -34
  441. unstructured_ingest/runner/notion.py +0 -61
  442. unstructured_ingest/runner/onedrive.py +0 -35
  443. unstructured_ingest/runner/opensearch.py +0 -40
  444. unstructured_ingest/runner/outlook.py +0 -33
  445. unstructured_ingest/runner/reddit.py +0 -35
  446. unstructured_ingest/runner/salesforce.py +0 -33
  447. unstructured_ingest/runner/sharepoint.py +0 -35
  448. unstructured_ingest/runner/slack.py +0 -33
  449. unstructured_ingest/runner/utils.py +0 -47
  450. unstructured_ingest/runner/wikipedia.py +0 -35
  451. unstructured_ingest/runner/writers/__init__.py +0 -48
  452. unstructured_ingest/runner/writers/astradb.py +0 -22
  453. unstructured_ingest/runner/writers/azure_ai_search.py +0 -24
  454. unstructured_ingest/runner/writers/base_writer.py +0 -26
  455. unstructured_ingest/runner/writers/chroma.py +0 -22
  456. unstructured_ingest/runner/writers/clarifai.py +0 -19
  457. unstructured_ingest/runner/writers/databricks_volumes.py +0 -25
  458. unstructured_ingest/runner/writers/delta_table.py +0 -24
  459. unstructured_ingest/runner/writers/elasticsearch.py +0 -24
  460. unstructured_ingest/runner/writers/fsspec/azure.py +0 -24
  461. unstructured_ingest/runner/writers/fsspec/box.py +0 -21
  462. unstructured_ingest/runner/writers/fsspec/dropbox.py +0 -21
  463. unstructured_ingest/runner/writers/fsspec/gcs.py +0 -19
  464. unstructured_ingest/runner/writers/fsspec/s3.py +0 -21
  465. unstructured_ingest/runner/writers/kafka.py +0 -21
  466. unstructured_ingest/runner/writers/mongodb.py +0 -21
  467. unstructured_ingest/runner/writers/opensearch.py +0 -26
  468. unstructured_ingest/runner/writers/pinecone.py +0 -21
  469. unstructured_ingest/runner/writers/qdrant.py +0 -19
  470. unstructured_ingest/runner/writers/sql.py +0 -22
  471. unstructured_ingest/runner/writers/vectara.py +0 -22
  472. unstructured_ingest/runner/writers/weaviate.py +0 -21
  473. unstructured_ingest/utils/google_filetype.py +0 -9
  474. unstructured_ingest/v2/__init__.py +0 -1
  475. unstructured_ingest/v2/cli/__init__.py +0 -0
  476. unstructured_ingest/v2/cli/base/__init__.py +0 -4
  477. unstructured_ingest/v2/cli/base/cmd.py +0 -269
  478. unstructured_ingest/v2/cli/base/dest.py +0 -85
  479. unstructured_ingest/v2/cli/base/src.py +0 -85
  480. unstructured_ingest/v2/cli/cli.py +0 -24
  481. unstructured_ingest/v2/cli/utils/__init__.py +0 -0
  482. unstructured_ingest/v2/logger.py +0 -126
  483. unstructured_ingest/v2/main.py +0 -11
  484. unstructured_ingest/v2/pipeline/__init__.py +0 -0
  485. unstructured_ingest/v2/pipeline/interfaces.py +0 -211
  486. unstructured_ingest/v2/pipeline/pipeline.py +0 -408
  487. unstructured_ingest/v2/pipeline/steps/__init__.py +0 -0
  488. unstructured_ingest/v2/processes/connectors/assets/__init__.py +0 -0
  489. unstructured_ingest/v2/processes/connectors/assets/databricks_delta_table_schema.sql +0 -10
  490. unstructured_ingest/v2/processes/connectors/assets/weaviate_collection_config.json +0 -23
  491. unstructured_ingest/v2/processes/connectors/notion/__init__.py +0 -0
  492. unstructured_ingest/v2/processes/connectors/notion/interfaces.py +0 -32
  493. unstructured_ingest/v2/processes/connectors/notion/types/__init__.py +0 -0
  494. unstructured_ingest/v2/processes/connectors/notion/types/block.py +0 -96
  495. unstructured_ingest/v2/processes/connectors/notion/types/blocks/__init__.py +0 -63
  496. unstructured_ingest/v2/processes/connectors/notion/types/blocks/breadcrumb.py +0 -21
  497. unstructured_ingest/v2/processes/connectors/notion/types/blocks/bulleted_list_item.py +0 -31
  498. unstructured_ingest/v2/processes/connectors/notion/types/blocks/child_page.py +0 -23
  499. unstructured_ingest/v2/processes/connectors/notion/types/blocks/embed.py +0 -36
  500. unstructured_ingest/v2/processes/connectors/notion/types/blocks/equation.py +0 -23
  501. unstructured_ingest/v2/processes/connectors/notion/types/blocks/file.py +0 -49
  502. unstructured_ingest/v2/processes/connectors/notion/types/blocks/image.py +0 -21
  503. unstructured_ingest/v2/processes/connectors/notion/types/blocks/link_to_page.py +0 -29
  504. unstructured_ingest/v2/processes/connectors/notion/types/blocks/paragraph.py +0 -31
  505. unstructured_ingest/v2/processes/connectors/notion/types/blocks/pdf.py +0 -49
  506. unstructured_ingest/v2/processes/connectors/notion/types/blocks/toggle.py +0 -37
  507. unstructured_ingest/v2/processes/connectors/notion/types/blocks/video.py +0 -22
  508. unstructured_ingest/v2/processes/connectors/notion/types/database.py +0 -73
  509. unstructured_ingest/v2/processes/connectors/notion/types/database_properties/created_by.py +0 -35
  510. unstructured_ingest/v2/processes/connectors/notion/types/database_properties/created_time.py +0 -34
  511. unstructured_ingest/v2/processes/connectors/notion/types/database_properties/email.py +0 -36
  512. unstructured_ingest/v2/processes/connectors/notion/types/database_properties/files.py +0 -37
  513. unstructured_ingest/v2/processes/connectors/notion/types/database_properties/last_edited_by.py +0 -34
  514. unstructured_ingest/v2/processes/connectors/notion/types/database_properties/number.py +0 -49
  515. unstructured_ingest/v2/processes/connectors/notion/types/database_properties/rich_text.py +0 -43
  516. unstructured_ingest/v2/processes/connectors/notion/types/database_properties/rollup.py +0 -56
  517. unstructured_ingest/v2/processes/connectors/notion/types/database_properties/title.py +0 -37
  518. unstructured_ingest/v2/processes/connectors/notion/types/database_properties/url.py +0 -37
  519. unstructured_ingest/v2/processes/connectors/notion/types/database_properties/verification.py +0 -78
  520. unstructured_ingest/v2/processes/connectors/notion/types/page.py +0 -45
  521. unstructured_ingest/v2/processes/connectors/notion/types/parent.py +0 -66
  522. unstructured_ingest/v2/processes/connectors/notion/types/rich_text.py +0 -189
  523. unstructured_ingest/v2/processes/connectors/zendesk/__init__.py +0 -0
  524. unstructured_ingest/v2/processes/utils/__init__.py +0 -0
  525. unstructured_ingest/v2/types/__init__.py +0 -0
  526. unstructured_ingest-0.6.2.dist-info/RECORD +0 -589
  527. {test/unit/v2 → examples}/__init__.py +0 -0
  528. /test/unit/{v2/chunkers → chunkers}/__init__.py +0 -0
  529. /test/unit/{v2/connectors → connectors}/__init__.py +0 -0
  530. /test/unit/{v2/connectors → connectors}/ibm_watsonx/__init__.py +0 -0
  531. /test/unit/{v2/connectors → connectors}/motherduck/__init__.py +0 -0
  532. /test/unit/{v2/connectors → connectors}/sql/__init__.py +0 -0
  533. /test/unit/{v2/embedders → embedders}/__init__.py +0 -0
  534. /test/unit/{v2/embedders → embedders}/test_bedrock.py +0 -0
  535. /test/unit/{v2/embedders → embedders}/test_mixedbread.py +0 -0
  536. /test/unit/{v2/embedders → embedders}/test_octoai.py +0 -0
  537. /test/unit/{v2/embedders → embedders}/test_openai.py +0 -0
  538. /test/unit/{v2/embedders → embedders}/test_togetherai.py +0 -0
  539. /test/unit/{v2/embedders → embedders}/test_voyageai.py +0 -0
  540. /test/unit/{v2/partitioners → partitioners}/__init__.py +0 -0
  541. /test/unit/{v2/utils → utils}/__init__.py +0 -0
  542. /test/unit/{v2/utils → utils}/data_generator.py +0 -0
  543. /unstructured_ingest/{v2/cli → cli}/base/importer.py +0 -0
  544. /unstructured_ingest/cli/{cmds/fsspec → utils}/__init__.py +0 -0
  545. /unstructured_ingest/{v2/cli → cli}/utils/click.py +0 -0
  546. /unstructured_ingest/{connector → data_types}/__init__.py +0 -0
  547. /unstructured_ingest/{v2/errors.py → errors_v2.py} +0 -0
  548. /unstructured_ingest/{v2/interfaces → interfaces}/__init__.py +0 -0
  549. /unstructured_ingest/{v2/interfaces → interfaces}/connector.py +0 -0
  550. /unstructured_ingest/{v2/interfaces → interfaces}/process.py +0 -0
  551. /unstructured_ingest/{v2/interfaces → interfaces}/processor.py +0 -0
  552. /unstructured_ingest/{connector/fsspec → pipeline/steps}/__init__.py +0 -0
  553. /unstructured_ingest/{v2/processes → processes}/__init__.py +0 -0
  554. /unstructured_ingest/{connector/notion → processes/connectors/assets}/__init__.py +0 -0
  555. /unstructured_ingest/{v2/processes → processes}/connectors/fsspec/utils.py +0 -0
  556. /unstructured_ingest/{connector/notion/types → processes/connectors/notion}/__init__.py +0 -0
  557. /unstructured_ingest/{ingest_backoff → processes/connectors/notion/ingest_backoff}/__init__.py +0 -0
  558. /unstructured_ingest/{ingest_backoff → processes/connectors/notion/ingest_backoff}/_common.py +0 -0
  559. /unstructured_ingest/{connector → processes/connectors}/notion/interfaces.py +0 -0
  560. /unstructured_ingest/{pipeline/reformat → processes/connectors/notion/types}/__init__.py +0 -0
  561. /unstructured_ingest/{connector → processes/connectors}/notion/types/blocks/__init__.py +0 -0
  562. /unstructured_ingest/{v2/processes → processes}/connectors/utils.py +0 -0
  563. /unstructured_ingest/{runner/fsspec → processes/connectors/zendesk}/__init__.py +0 -0
  564. /unstructured_ingest/{runner/writers/fsspec → processes/utils}/__init__.py +0 -0
  565. /unstructured_ingest/{v2 → utils}/constants.py +0 -0
  566. {unstructured_ingest-0.6.2.dist-info → unstructured_ingest-0.7.0.dist-info}/LICENSE.md +0 -0
  567. {unstructured_ingest-0.6.2.dist-info → unstructured_ingest-0.7.0.dist-info}/WHEEL +0 -0
  568. {unstructured_ingest-0.6.2.dist-info → unstructured_ingest-0.7.0.dist-info}/entry_points.txt +0 -0
@@ -1,106 +0,0 @@
1
- from typing import Dict
2
-
3
- from unstructured_ingest.connector.notion.interfaces import DBCellBase, DBPropertyBase
4
-
5
- from .checkbox import Checkbox, CheckboxCell
6
- from .created_by import CreatedBy, CreatedByCell
7
- from .created_time import CreatedTime, CreatedTimeCell
8
- from .date import Date, DateCell
9
- from .email import Email, EmailCell
10
- from .files import Files, FilesCell
11
- from .formula import Formula, FormulaCell
12
- from .last_edited_by import LastEditedBy, LastEditedByCell
13
- from .last_edited_time import LastEditedTime, LastEditedTimeCell
14
- from .multiselect import MultiSelect, MultiSelectCell
15
- from .number import Number, NumberCell
16
- from .people import People, PeopleCell
17
- from .phone_number import PhoneNumber, PhoneNumberCell
18
- from .relation import Relation, RelationCell
19
- from .rich_text import RichText, RichTextCell
20
- from .rollup import Rollup, RollupCell
21
- from .select import Select, SelectCell
22
- from .status import Status, StatusCell
23
- from .title import Title, TitleCell
24
- from .unique_id import UniqueID, UniqueIDCell
25
- from .url import URL, URLCell
26
- from .verification import Verification, VerificationCell
27
-
28
- db_prop_type_mapping = {
29
- "checkbox": Checkbox,
30
- "created_by": CreatedBy,
31
- "created_time": CreatedTime,
32
- "date": Date,
33
- "email": Email,
34
- "files": Files,
35
- "formula": Formula,
36
- "last_edited_by": LastEditedBy,
37
- "last_edited_time": LastEditedTime,
38
- "multi_select": MultiSelect,
39
- "number": Number,
40
- "people": People,
41
- "phone_number": PhoneNumber,
42
- "relation": Relation,
43
- "rich_text": RichText,
44
- "rollup": Rollup,
45
- "select": Select,
46
- "status": Status,
47
- "title": Title,
48
- "unique_id": UniqueID,
49
- "url": URL,
50
- "verification": Verification,
51
- }
52
-
53
-
54
- def map_properties(props: Dict[str, dict]) -> Dict[str, DBPropertyBase]:
55
- mapped_dict = {}
56
- for k, v in props.items():
57
- try:
58
- mapped_dict[k] = db_prop_type_mapping[v["type"]].from_dict(v) # type: ignore
59
- except KeyError as ke:
60
- raise KeyError(f"failed to map to associated database property -> {k}: {v}") from ke
61
-
62
- return mapped_dict
63
-
64
-
65
- db_cell_type_mapping = {
66
- "checkbox": CheckboxCell,
67
- "created_by": CreatedByCell,
68
- "created_time": CreatedTimeCell,
69
- "date": DateCell,
70
- "email": EmailCell,
71
- "files": FilesCell,
72
- "formula": FormulaCell,
73
- "last_edited_by": LastEditedByCell,
74
- "last_edited_time": LastEditedTimeCell,
75
- "multi_select": MultiSelectCell,
76
- "number": NumberCell,
77
- "people": PeopleCell,
78
- "phone_number": PhoneNumberCell,
79
- "relation": RelationCell,
80
- "rich_text": RichTextCell,
81
- "rollup": RollupCell,
82
- "select": SelectCell,
83
- "status": StatusCell,
84
- "title": TitleCell,
85
- "unique_id": UniqueIDCell,
86
- "url": URLCell,
87
- "verification": VerificationCell,
88
- }
89
-
90
-
91
- def map_cells(props: Dict[str, dict]) -> Dict[str, DBCellBase]:
92
- mapped_dict = {}
93
- for k, v in props.items():
94
- try:
95
- t = v["type"]
96
- mapped_dict[k] = db_cell_type_mapping[t].from_dict(v) # type: ignore
97
- except KeyError as ke:
98
- raise KeyError(f"failed to map to associated database property -> {k}: {v}") from ke
99
-
100
- return mapped_dict
101
-
102
-
103
- __all__ = [
104
- "map_properties",
105
- "map_cells",
106
- ]
@@ -1,38 +0,0 @@
1
- # https://developers.notion.com/reference/property-object#checkbox
2
- from dataclasses import dataclass, field
3
- from typing import Optional
4
-
5
- from htmlBuilder.attributes import Checked, Type
6
- from htmlBuilder.tags import Div, HtmlTag, Input
7
-
8
- from unstructured_ingest.connector.notion.interfaces import DBCellBase, DBPropertyBase
9
-
10
-
11
- @dataclass
12
- class Checkbox(DBPropertyBase):
13
- id: str
14
- name: str
15
- type: str = "checkbox"
16
- checkbox: dict = field(default_factory=dict)
17
-
18
- @classmethod
19
- def from_dict(cls, data: dict):
20
- return cls(**data)
21
-
22
-
23
- @dataclass
24
- class CheckboxCell(DBCellBase):
25
- id: str
26
- checkbox: bool
27
- name: Optional[str] = None
28
- type: str = "checkbox"
29
-
30
- @classmethod
31
- def from_dict(cls, data: dict):
32
- return cls(**data)
33
-
34
- def get_html(self) -> Optional[HtmlTag]:
35
- check_input_attributes = [Type("checkbox")]
36
- if self.checkbox:
37
- check_input_attributes.append(Checked(""))
38
- return Div([], Input(check_input_attributes))
@@ -1,41 +0,0 @@
1
- # https://developers.notion.com/reference/property-object#date
2
- from dataclasses import dataclass, field
3
- from typing import Optional
4
-
5
- from htmlBuilder.tags import HtmlTag
6
-
7
- from unstructured_ingest.connector.notion.interfaces import DBCellBase, DBPropertyBase
8
- from unstructured_ingest.connector.notion.types.date import Date as DateType
9
-
10
-
11
- @dataclass
12
- class Date(DBPropertyBase):
13
- id: str
14
- name: str
15
- type: str = "date"
16
- date: dict = field(default_factory=dict)
17
-
18
- @classmethod
19
- def from_dict(cls, data: dict):
20
- return cls(**data)
21
-
22
-
23
- @dataclass
24
- class DateCell(DBCellBase):
25
- id: str
26
- date: Optional[DateType] = None
27
- name: Optional[str] = None
28
- type: str = "date"
29
-
30
- @classmethod
31
- def from_dict(cls, data: dict):
32
- date = None
33
- date_data = data.pop("date")
34
- if date_data:
35
- date = DateType.from_dict(date_data)
36
- return cls(date=date, **data)
37
-
38
- def get_html(self) -> Optional[HtmlTag]:
39
- if date := self.date:
40
- return date.get_html()
41
- return None
@@ -1,49 +0,0 @@
1
- # https://developers.notion.com/reference/property-object#formula
2
- from dataclasses import dataclass
3
- from typing import Optional
4
-
5
- from htmlBuilder.tags import Div, HtmlTag
6
-
7
- from unstructured_ingest.connector.notion.interfaces import (
8
- DBCellBase,
9
- DBPropertyBase,
10
- FromJSONMixin,
11
- )
12
-
13
-
14
- @dataclass
15
- class FormulaProp(FromJSONMixin):
16
- expression: str
17
-
18
- @classmethod
19
- def from_dict(cls, data: dict):
20
- return cls(**data)
21
-
22
-
23
- @dataclass
24
- class Formula(DBPropertyBase):
25
- id: str
26
- name: str
27
- formula: FormulaProp
28
- type: str = "formula"
29
-
30
- @classmethod
31
- def from_dict(cls, data: dict):
32
- return cls(formula=FormulaProp.from_dict(data.pop("formula", {})), **data)
33
-
34
-
35
- @dataclass
36
- class FormulaCell(DBCellBase):
37
- id: str
38
- formula: dict
39
- type: str = "formula"
40
- name: Optional[str] = None
41
-
42
- @classmethod
43
- def from_dict(cls, data: dict):
44
- return cls(**data)
45
-
46
- def get_html(self) -> Optional[HtmlTag]:
47
- formula = self.formula
48
- t = formula.get("type")
49
- return Div([], str(formula[t]))
@@ -1,34 +0,0 @@
1
- # https://developers.notion.com/reference/property-object#last-edited-time
2
- from dataclasses import dataclass, field
3
- from typing import Optional
4
-
5
- from htmlBuilder.tags import Div, HtmlTag
6
-
7
- from unstructured_ingest.connector.notion.interfaces import DBCellBase, DBPropertyBase
8
-
9
-
10
- @dataclass
11
- class LastEditedTime(DBPropertyBase):
12
- id: str
13
- name: str
14
- type: str = "last_edited_time"
15
- last_edited_time: dict = field(default_factory=dict)
16
-
17
- @classmethod
18
- def from_dict(cls, data: dict):
19
- return cls(**data)
20
-
21
-
22
- @dataclass
23
- class LastEditedTimeCell(DBCellBase):
24
- id: str
25
- last_edited_time: str
26
- type: str = "last_edited_time"
27
- name: Optional[str] = None
28
-
29
- @classmethod
30
- def from_dict(cls, data: dict):
31
- return cls(**data)
32
-
33
- def get_html(self) -> Optional[HtmlTag]:
34
- return Div([], self.last_edited_time)
@@ -1,73 +0,0 @@
1
- # https://developers.notion.com/reference/property-object#multi-select
2
- from dataclasses import dataclass, field
3
- from typing import List, Optional
4
-
5
- from htmlBuilder.attributes import Style
6
- from htmlBuilder.tags import Div, HtmlTag, Span
7
-
8
- from unstructured_ingest.connector.notion.interfaces import (
9
- DBCellBase,
10
- DBPropertyBase,
11
- FromJSONMixin,
12
- )
13
-
14
-
15
- @dataclass
16
- class MultiSelectOption(FromJSONMixin):
17
- color: str
18
- id: str
19
- name: str
20
-
21
- @classmethod
22
- def from_dict(cls, data: dict):
23
- return cls(**data)
24
-
25
-
26
- @dataclass
27
- class MultiSelectProp(FromJSONMixin):
28
- options: List[MultiSelectOption] = field(default_factory=list)
29
-
30
- @classmethod
31
- def from_dict(cls, data: dict):
32
- return cls(options=[MultiSelectOption.from_dict(o) for o in data.get("options", [])])
33
-
34
-
35
- @dataclass
36
- class MultiSelect(DBPropertyBase):
37
- id: str
38
- name: str
39
- multi_select: MultiSelectProp
40
- type: str = "multi_select"
41
-
42
- @classmethod
43
- def from_dict(cls, data: dict):
44
- return cls(
45
- multi_select=data.pop("multi_select", {}),
46
- **data,
47
- )
48
-
49
-
50
- @dataclass
51
- class MultiSelectCell(DBCellBase):
52
- id: str
53
- multi_select: List[MultiSelectOption]
54
- type: str = "multi_select"
55
- name: Optional[str] = None
56
-
57
- @classmethod
58
- def from_dict(cls, data: dict):
59
- return cls(
60
- multi_select=[MultiSelectOption.from_dict(o) for o in data.pop("multi_select", [])],
61
- **data,
62
- )
63
-
64
- def get_html(self) -> Optional[HtmlTag]:
65
- if not self.multi_select:
66
- return None
67
- option_spans = []
68
- for option in self.multi_select:
69
- option_attributes = []
70
- if option.color and option.color != "default":
71
- option_attributes.append(Style(f"color: {option.color}"))
72
- option_spans.append(Span(option_attributes, option.name))
73
- return Div([], option_spans)
@@ -1,40 +0,0 @@
1
- # https://developers.notion.com/reference/property-object#people
2
- from dataclasses import dataclass, field
3
- from typing import List, Optional
4
-
5
- from htmlBuilder.tags import Div, HtmlTag, Span
6
-
7
- from unstructured_ingest.connector.notion.interfaces import DBCellBase, DBPropertyBase
8
- from unstructured_ingest.connector.notion.types.user import People as PeopleType
9
-
10
-
11
- @dataclass
12
- class People(DBPropertyBase):
13
- id: str
14
- name: str
15
- type: str = "people"
16
- people: dict = field(default_factory=dict)
17
-
18
- @classmethod
19
- def from_dict(cls, data: dict):
20
- return cls(**data)
21
-
22
-
23
- @dataclass
24
- class PeopleCell(DBCellBase):
25
- id: str
26
- people: List[PeopleType]
27
- type: str = "people"
28
- name: Optional[str] = None
29
-
30
- @classmethod
31
- def from_dict(cls, data: dict):
32
- return cls(people=[PeopleType.from_dict(p) for p in data.pop("people", {})], **data)
33
-
34
- def get_html(self) -> Optional[HtmlTag]:
35
- if not self.people:
36
- return None
37
- people_spans = []
38
- for person in self.people:
39
- people_spans.append(Span([], person.get_html()))
40
- return Div([], people_spans)
@@ -1,36 +0,0 @@
1
- # https://developers.notion.com/reference/property-object#phone-number
2
- from dataclasses import dataclass, field
3
- from typing import Optional
4
-
5
- from htmlBuilder.tags import Div, HtmlTag
6
-
7
- from unstructured_ingest.connector.notion.interfaces import DBCellBase, DBPropertyBase
8
-
9
-
10
- @dataclass
11
- class PhoneNumber(DBPropertyBase):
12
- id: str
13
- name: str
14
- type: str = "phone_number"
15
- phone_number: dict = field(default_factory=dict)
16
-
17
- @classmethod
18
- def from_dict(cls, data: dict):
19
- return cls(**data)
20
-
21
-
22
- @dataclass
23
- class PhoneNumberCell(DBCellBase):
24
- id: str
25
- phone_number: Optional[str]
26
- name: Optional[str] = None
27
- type: str = "phone_number"
28
-
29
- @classmethod
30
- def from_dict(cls, data: dict):
31
- return cls(**data)
32
-
33
- def get_html(self) -> Optional[HtmlTag]:
34
- if phone_number := self.phone_number:
35
- return Div([], phone_number)
36
- return None
@@ -1,67 +0,0 @@
1
- # https://developers.notion.com/reference/property-object#relation
2
- from dataclasses import dataclass
3
- from typing import Optional
4
- from urllib.parse import unquote
5
-
6
- from htmlBuilder.tags import Div, HtmlTag
7
-
8
- from unstructured_ingest.connector.notion.interfaces import (
9
- DBCellBase,
10
- DBPropertyBase,
11
- FromJSONMixin,
12
- )
13
-
14
-
15
- @dataclass
16
- class DualProperty(FromJSONMixin):
17
- synced_property_id: str
18
- synced_property_name: str
19
-
20
- @classmethod
21
- def from_dict(cls, data: dict):
22
- return cls(**data)
23
-
24
-
25
- @dataclass
26
- class RelationProp(FromJSONMixin):
27
- database_id: str
28
- type: str
29
- dual_property: DualProperty
30
-
31
- @classmethod
32
- def from_dict(cls, data: dict):
33
- t = data.get("type")
34
- if t == "dual_property":
35
- dual_property = DualProperty.from_dict(data.pop(t))
36
- else:
37
- raise ValueError(f"{t} type not recognized")
38
-
39
- return cls(dual_property=dual_property, **data)
40
-
41
-
42
- @dataclass
43
- class Relation(DBPropertyBase):
44
- id: str
45
- name: str
46
- relation: RelationProp
47
- type: str = "relation"
48
-
49
- @classmethod
50
- def from_dict(cls, data: dict):
51
- return cls(relation=RelationProp.from_dict(data.pop("relation")), **data)
52
-
53
-
54
- @dataclass
55
- class RelationCell(DBCellBase):
56
- id: str
57
- has_more: bool
58
- relation: list
59
- type: str = "relation"
60
- name: Optional[str] = None
61
-
62
- @classmethod
63
- def from_dict(cls, data: dict):
64
- return cls(**data)
65
-
66
- def get_html(self) -> Optional[HtmlTag]:
67
- return Div([], unquote(self.id))
@@ -1,68 +0,0 @@
1
- # https://developers.notion.com/reference/property-object#select
2
- from dataclasses import dataclass, field
3
- from typing import List, Optional
4
-
5
- from htmlBuilder.attributes import Style
6
- from htmlBuilder.tags import Div, HtmlTag
7
-
8
- from unstructured_ingest.connector.notion.interfaces import (
9
- DBCellBase,
10
- DBPropertyBase,
11
- FromJSONMixin,
12
- )
13
-
14
-
15
- @dataclass
16
- class SelectOption(FromJSONMixin):
17
- color: str
18
- id: str
19
- name: str
20
-
21
- @classmethod
22
- def from_dict(cls, data: dict):
23
- return cls(**data)
24
-
25
-
26
- @dataclass
27
- class SelectProp(FromJSONMixin):
28
- options: List[SelectOption] = field(default_factory=list)
29
-
30
- @classmethod
31
- def from_dict(cls, data: dict):
32
- return cls(options=[SelectOption.from_dict(o) for o in data.get("options", [])])
33
-
34
-
35
- @dataclass
36
- class Select(DBPropertyBase):
37
- id: str
38
- name: str
39
- select: SelectProp
40
- type: str = "select"
41
-
42
- @classmethod
43
- def from_dict(cls, data: dict):
44
- return cls(select=SelectProp.from_dict(data.pop("select", {})), **data)
45
-
46
-
47
- @dataclass
48
- class SelectCell(DBCellBase):
49
- id: str
50
- select: Optional[SelectOption]
51
- type: str = "select"
52
- name: Optional[str] = None
53
-
54
- @classmethod
55
- def from_dict(cls, data: dict):
56
- select_data = data.pop("select")
57
- select = None
58
- if select_data:
59
- select = SelectOption.from_dict(select_data)
60
- return cls(select=select, **data)
61
-
62
- def get_html(self) -> Optional[HtmlTag]:
63
- if select := self.select:
64
- select_attr = []
65
- if select.color and select.color != "default":
66
- select_attr.append(Style(f"color: {select.color}"))
67
- return Div(select_attr, select.name)
68
- return None
@@ -1,80 +0,0 @@
1
- # https://developers.notion.com/reference/property-object#status
2
- from dataclasses import dataclass, field
3
- from typing import List, Optional
4
-
5
- from htmlBuilder.attributes import Style
6
- from htmlBuilder.tags import Div, HtmlTag
7
-
8
- from unstructured_ingest.connector.notion.interfaces import (
9
- DBCellBase,
10
- DBPropertyBase,
11
- FromJSONMixin,
12
- )
13
-
14
-
15
- @dataclass
16
- class StatusOption(FromJSONMixin):
17
- color: str
18
- id: str
19
- name: str
20
-
21
- @classmethod
22
- def from_dict(cls, data: dict):
23
- return cls(**data)
24
-
25
-
26
- @dataclass
27
- class StatusGroup(FromJSONMixin):
28
- color: str
29
- id: str
30
- name: str
31
- option_ids: List[str] = field(default_factory=List[str])
32
-
33
- @classmethod
34
- def from_dict(cls, data: dict):
35
- return cls(**data)
36
-
37
-
38
- @dataclass
39
- class StatusProp(FromJSONMixin):
40
- options: List[StatusOption] = field(default_factory=list)
41
- groups: List[StatusGroup] = field(default_factory=list)
42
-
43
- @classmethod
44
- def from_dict(cls, data: dict):
45
- return cls(
46
- options=[StatusOption.from_dict(o) for o in data.get("options", [])],
47
- groups=[StatusGroup.from_dict(g) for g in data.get("groups", [])],
48
- )
49
-
50
-
51
- @dataclass
52
- class Status(DBPropertyBase):
53
- id: str
54
- name: str
55
- status: StatusProp
56
- type: str = "status"
57
-
58
- @classmethod
59
- def from_dict(cls, data: dict):
60
- return cls(status=StatusProp.from_dict(data.pop("status", {})), **data)
61
-
62
-
63
- @dataclass
64
- class StatusCell(DBCellBase):
65
- id: str
66
- status: Optional[StatusOption]
67
- type: str = "status"
68
- name: Optional[str] = None
69
-
70
- @classmethod
71
- def from_dict(cls, data: dict):
72
- return cls(status=StatusOption.from_dict(data.pop("status", {})), **data)
73
-
74
- def get_html(self) -> Optional[HtmlTag]:
75
- if status := self.status:
76
- select_attr = []
77
- if status.color and status.color != "default":
78
- select_attr.append(Style(f"color: {status.color}"))
79
- return Div(select_attr, status.name)
80
- return None
@@ -1,50 +0,0 @@
1
- # https://developers.notion.com/reference/property-object#title
2
- from dataclasses import dataclass, field
3
- from typing import Optional
4
-
5
- from htmlBuilder.tags import Div, HtmlTag
6
-
7
- from unstructured_ingest.connector.notion.interfaces import (
8
- DBCellBase,
9
- DBPropertyBase,
10
- FromJSONMixin,
11
- )
12
-
13
-
14
- @dataclass
15
- class UniqueID(DBPropertyBase):
16
- id: str
17
- name: str
18
- type: str = "unique_id"
19
- unique_id: dict = field(default_factory=dict)
20
-
21
- @classmethod
22
- def from_dict(cls, data: dict):
23
- return cls(**data)
24
-
25
-
26
- @dataclass
27
- class UniqueIDCellData(FromJSONMixin):
28
- prefix: str
29
- number: int
30
-
31
- @classmethod
32
- def from_dict(cls, data: dict):
33
- return cls(**data)
34
-
35
-
36
- @dataclass
37
- class UniqueIDCell(DBCellBase):
38
- id: str
39
- unique_id: Optional[UniqueIDCellData]
40
- type: str = "title"
41
- name: Optional[str] = None
42
-
43
- @classmethod
44
- def from_dict(cls, data: dict):
45
- return cls(unique_id=UniqueIDCellData.from_dict(data.pop("unique_id")), **data)
46
-
47
- def get_html(self) -> Optional[HtmlTag]:
48
- if unique_id := self.unique_id:
49
- return Div([], f"{unique_id.prefix}-{unique_id.number}")
50
- return None