unstructured-ingest 1.2.32__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of unstructured-ingest might be problematic. Click here for more details.

Files changed (243) hide show
  1. unstructured_ingest/__init__.py +1 -0
  2. unstructured_ingest/__version__.py +1 -0
  3. unstructured_ingest/cli/README.md +28 -0
  4. unstructured_ingest/cli/__init__.py +0 -0
  5. unstructured_ingest/cli/base/__init__.py +4 -0
  6. unstructured_ingest/cli/base/cmd.py +269 -0
  7. unstructured_ingest/cli/base/dest.py +84 -0
  8. unstructured_ingest/cli/base/importer.py +34 -0
  9. unstructured_ingest/cli/base/src.py +75 -0
  10. unstructured_ingest/cli/cli.py +24 -0
  11. unstructured_ingest/cli/cmds.py +14 -0
  12. unstructured_ingest/cli/utils/__init__.py +0 -0
  13. unstructured_ingest/cli/utils/click.py +237 -0
  14. unstructured_ingest/cli/utils/model_conversion.py +222 -0
  15. unstructured_ingest/data_types/__init__.py +0 -0
  16. unstructured_ingest/data_types/entities.py +17 -0
  17. unstructured_ingest/data_types/file_data.py +116 -0
  18. unstructured_ingest/embed/__init__.py +0 -0
  19. unstructured_ingest/embed/azure_openai.py +63 -0
  20. unstructured_ingest/embed/bedrock.py +323 -0
  21. unstructured_ingest/embed/huggingface.py +69 -0
  22. unstructured_ingest/embed/interfaces.py +146 -0
  23. unstructured_ingest/embed/mixedbreadai.py +134 -0
  24. unstructured_ingest/embed/octoai.py +133 -0
  25. unstructured_ingest/embed/openai.py +142 -0
  26. unstructured_ingest/embed/togetherai.py +116 -0
  27. unstructured_ingest/embed/vertexai.py +109 -0
  28. unstructured_ingest/embed/voyageai.py +130 -0
  29. unstructured_ingest/error.py +156 -0
  30. unstructured_ingest/errors_v2.py +156 -0
  31. unstructured_ingest/interfaces/__init__.py +27 -0
  32. unstructured_ingest/interfaces/connector.py +56 -0
  33. unstructured_ingest/interfaces/downloader.py +90 -0
  34. unstructured_ingest/interfaces/indexer.py +29 -0
  35. unstructured_ingest/interfaces/process.py +22 -0
  36. unstructured_ingest/interfaces/processor.py +88 -0
  37. unstructured_ingest/interfaces/upload_stager.py +89 -0
  38. unstructured_ingest/interfaces/uploader.py +67 -0
  39. unstructured_ingest/logger.py +39 -0
  40. unstructured_ingest/main.py +11 -0
  41. unstructured_ingest/otel.py +128 -0
  42. unstructured_ingest/pipeline/__init__.py +0 -0
  43. unstructured_ingest/pipeline/interfaces.py +211 -0
  44. unstructured_ingest/pipeline/otel.py +32 -0
  45. unstructured_ingest/pipeline/pipeline.py +408 -0
  46. unstructured_ingest/pipeline/steps/__init__.py +0 -0
  47. unstructured_ingest/pipeline/steps/chunk.py +78 -0
  48. unstructured_ingest/pipeline/steps/download.py +206 -0
  49. unstructured_ingest/pipeline/steps/embed.py +77 -0
  50. unstructured_ingest/pipeline/steps/filter.py +35 -0
  51. unstructured_ingest/pipeline/steps/index.py +86 -0
  52. unstructured_ingest/pipeline/steps/partition.py +77 -0
  53. unstructured_ingest/pipeline/steps/stage.py +65 -0
  54. unstructured_ingest/pipeline/steps/uncompress.py +50 -0
  55. unstructured_ingest/pipeline/steps/upload.py +58 -0
  56. unstructured_ingest/processes/__init__.py +18 -0
  57. unstructured_ingest/processes/chunker.py +131 -0
  58. unstructured_ingest/processes/connector_registry.py +69 -0
  59. unstructured_ingest/processes/connectors/__init__.py +129 -0
  60. unstructured_ingest/processes/connectors/airtable.py +238 -0
  61. unstructured_ingest/processes/connectors/assets/__init__.py +0 -0
  62. unstructured_ingest/processes/connectors/assets/databricks_delta_table_schema.sql +9 -0
  63. unstructured_ingest/processes/connectors/assets/weaviate_collection_config.json +23 -0
  64. unstructured_ingest/processes/connectors/astradb.py +592 -0
  65. unstructured_ingest/processes/connectors/azure_ai_search.py +275 -0
  66. unstructured_ingest/processes/connectors/chroma.py +193 -0
  67. unstructured_ingest/processes/connectors/confluence.py +527 -0
  68. unstructured_ingest/processes/connectors/couchbase.py +336 -0
  69. unstructured_ingest/processes/connectors/databricks/__init__.py +58 -0
  70. unstructured_ingest/processes/connectors/databricks/volumes.py +233 -0
  71. unstructured_ingest/processes/connectors/databricks/volumes_aws.py +93 -0
  72. unstructured_ingest/processes/connectors/databricks/volumes_azure.py +108 -0
  73. unstructured_ingest/processes/connectors/databricks/volumes_gcp.py +91 -0
  74. unstructured_ingest/processes/connectors/databricks/volumes_native.py +92 -0
  75. unstructured_ingest/processes/connectors/databricks/volumes_table.py +187 -0
  76. unstructured_ingest/processes/connectors/delta_table.py +310 -0
  77. unstructured_ingest/processes/connectors/discord.py +161 -0
  78. unstructured_ingest/processes/connectors/duckdb/__init__.py +15 -0
  79. unstructured_ingest/processes/connectors/duckdb/base.py +103 -0
  80. unstructured_ingest/processes/connectors/duckdb/duckdb.py +130 -0
  81. unstructured_ingest/processes/connectors/duckdb/motherduck.py +130 -0
  82. unstructured_ingest/processes/connectors/elasticsearch/__init__.py +19 -0
  83. unstructured_ingest/processes/connectors/elasticsearch/elasticsearch.py +478 -0
  84. unstructured_ingest/processes/connectors/elasticsearch/opensearch.py +523 -0
  85. unstructured_ingest/processes/connectors/fsspec/__init__.py +37 -0
  86. unstructured_ingest/processes/connectors/fsspec/azure.py +203 -0
  87. unstructured_ingest/processes/connectors/fsspec/box.py +176 -0
  88. unstructured_ingest/processes/connectors/fsspec/dropbox.py +238 -0
  89. unstructured_ingest/processes/connectors/fsspec/fsspec.py +475 -0
  90. unstructured_ingest/processes/connectors/fsspec/gcs.py +203 -0
  91. unstructured_ingest/processes/connectors/fsspec/s3.py +253 -0
  92. unstructured_ingest/processes/connectors/fsspec/sftp.py +177 -0
  93. unstructured_ingest/processes/connectors/fsspec/utils.py +17 -0
  94. unstructured_ingest/processes/connectors/github.py +226 -0
  95. unstructured_ingest/processes/connectors/gitlab.py +270 -0
  96. unstructured_ingest/processes/connectors/google_drive.py +848 -0
  97. unstructured_ingest/processes/connectors/ibm_watsonx/__init__.py +10 -0
  98. unstructured_ingest/processes/connectors/ibm_watsonx/ibm_watsonx_s3.py +367 -0
  99. unstructured_ingest/processes/connectors/jira.py +522 -0
  100. unstructured_ingest/processes/connectors/kafka/__init__.py +17 -0
  101. unstructured_ingest/processes/connectors/kafka/cloud.py +121 -0
  102. unstructured_ingest/processes/connectors/kafka/kafka.py +275 -0
  103. unstructured_ingest/processes/connectors/kafka/local.py +103 -0
  104. unstructured_ingest/processes/connectors/kdbai.py +156 -0
  105. unstructured_ingest/processes/connectors/lancedb/__init__.py +30 -0
  106. unstructured_ingest/processes/connectors/lancedb/aws.py +43 -0
  107. unstructured_ingest/processes/connectors/lancedb/azure.py +43 -0
  108. unstructured_ingest/processes/connectors/lancedb/cloud.py +42 -0
  109. unstructured_ingest/processes/connectors/lancedb/gcp.py +44 -0
  110. unstructured_ingest/processes/connectors/lancedb/lancedb.py +181 -0
  111. unstructured_ingest/processes/connectors/lancedb/local.py +44 -0
  112. unstructured_ingest/processes/connectors/local.py +227 -0
  113. unstructured_ingest/processes/connectors/milvus.py +311 -0
  114. unstructured_ingest/processes/connectors/mongodb.py +389 -0
  115. unstructured_ingest/processes/connectors/neo4j.py +534 -0
  116. unstructured_ingest/processes/connectors/notion/__init__.py +0 -0
  117. unstructured_ingest/processes/connectors/notion/client.py +349 -0
  118. unstructured_ingest/processes/connectors/notion/connector.py +350 -0
  119. unstructured_ingest/processes/connectors/notion/helpers.py +448 -0
  120. unstructured_ingest/processes/connectors/notion/ingest_backoff/__init__.py +3 -0
  121. unstructured_ingest/processes/connectors/notion/ingest_backoff/_common.py +102 -0
  122. unstructured_ingest/processes/connectors/notion/ingest_backoff/_wrapper.py +126 -0
  123. unstructured_ingest/processes/connectors/notion/ingest_backoff/types.py +24 -0
  124. unstructured_ingest/processes/connectors/notion/interfaces.py +32 -0
  125. unstructured_ingest/processes/connectors/notion/types/__init__.py +0 -0
  126. unstructured_ingest/processes/connectors/notion/types/block.py +96 -0
  127. unstructured_ingest/processes/connectors/notion/types/blocks/__init__.py +63 -0
  128. unstructured_ingest/processes/connectors/notion/types/blocks/bookmark.py +40 -0
  129. unstructured_ingest/processes/connectors/notion/types/blocks/breadcrumb.py +21 -0
  130. unstructured_ingest/processes/connectors/notion/types/blocks/bulleted_list_item.py +31 -0
  131. unstructured_ingest/processes/connectors/notion/types/blocks/callout.py +131 -0
  132. unstructured_ingest/processes/connectors/notion/types/blocks/child_database.py +23 -0
  133. unstructured_ingest/processes/connectors/notion/types/blocks/child_page.py +23 -0
  134. unstructured_ingest/processes/connectors/notion/types/blocks/code.py +43 -0
  135. unstructured_ingest/processes/connectors/notion/types/blocks/column_list.py +35 -0
  136. unstructured_ingest/processes/connectors/notion/types/blocks/divider.py +22 -0
  137. unstructured_ingest/processes/connectors/notion/types/blocks/embed.py +36 -0
  138. unstructured_ingest/processes/connectors/notion/types/blocks/equation.py +23 -0
  139. unstructured_ingest/processes/connectors/notion/types/blocks/file.py +49 -0
  140. unstructured_ingest/processes/connectors/notion/types/blocks/heading.py +37 -0
  141. unstructured_ingest/processes/connectors/notion/types/blocks/image.py +21 -0
  142. unstructured_ingest/processes/connectors/notion/types/blocks/link_preview.py +24 -0
  143. unstructured_ingest/processes/connectors/notion/types/blocks/link_to_page.py +29 -0
  144. unstructured_ingest/processes/connectors/notion/types/blocks/numbered_list.py +29 -0
  145. unstructured_ingest/processes/connectors/notion/types/blocks/paragraph.py +31 -0
  146. unstructured_ingest/processes/connectors/notion/types/blocks/pdf.py +49 -0
  147. unstructured_ingest/processes/connectors/notion/types/blocks/quote.py +37 -0
  148. unstructured_ingest/processes/connectors/notion/types/blocks/synced_block.py +109 -0
  149. unstructured_ingest/processes/connectors/notion/types/blocks/table.py +60 -0
  150. unstructured_ingest/processes/connectors/notion/types/blocks/table_of_contents.py +23 -0
  151. unstructured_ingest/processes/connectors/notion/types/blocks/template.py +30 -0
  152. unstructured_ingest/processes/connectors/notion/types/blocks/todo.py +42 -0
  153. unstructured_ingest/processes/connectors/notion/types/blocks/toggle.py +37 -0
  154. unstructured_ingest/processes/connectors/notion/types/blocks/unsupported.py +20 -0
  155. unstructured_ingest/processes/connectors/notion/types/blocks/video.py +22 -0
  156. unstructured_ingest/processes/connectors/notion/types/database.py +73 -0
  157. unstructured_ingest/processes/connectors/notion/types/database_properties/__init__.py +125 -0
  158. unstructured_ingest/processes/connectors/notion/types/database_properties/checkbox.py +39 -0
  159. unstructured_ingest/processes/connectors/notion/types/database_properties/created_by.py +36 -0
  160. unstructured_ingest/processes/connectors/notion/types/database_properties/created_time.py +35 -0
  161. unstructured_ingest/processes/connectors/notion/types/database_properties/date.py +42 -0
  162. unstructured_ingest/processes/connectors/notion/types/database_properties/email.py +37 -0
  163. unstructured_ingest/processes/connectors/notion/types/database_properties/files.py +38 -0
  164. unstructured_ingest/processes/connectors/notion/types/database_properties/formula.py +50 -0
  165. unstructured_ingest/processes/connectors/notion/types/database_properties/last_edited_by.py +34 -0
  166. unstructured_ingest/processes/connectors/notion/types/database_properties/last_edited_time.py +35 -0
  167. unstructured_ingest/processes/connectors/notion/types/database_properties/multiselect.py +74 -0
  168. unstructured_ingest/processes/connectors/notion/types/database_properties/number.py +50 -0
  169. unstructured_ingest/processes/connectors/notion/types/database_properties/people.py +42 -0
  170. unstructured_ingest/processes/connectors/notion/types/database_properties/phone_number.py +37 -0
  171. unstructured_ingest/processes/connectors/notion/types/database_properties/relation.py +68 -0
  172. unstructured_ingest/processes/connectors/notion/types/database_properties/rich_text.py +44 -0
  173. unstructured_ingest/processes/connectors/notion/types/database_properties/rollup.py +57 -0
  174. unstructured_ingest/processes/connectors/notion/types/database_properties/select.py +70 -0
  175. unstructured_ingest/processes/connectors/notion/types/database_properties/status.py +82 -0
  176. unstructured_ingest/processes/connectors/notion/types/database_properties/title.py +38 -0
  177. unstructured_ingest/processes/connectors/notion/types/database_properties/unique_id.py +51 -0
  178. unstructured_ingest/processes/connectors/notion/types/database_properties/url.py +38 -0
  179. unstructured_ingest/processes/connectors/notion/types/database_properties/verification.py +79 -0
  180. unstructured_ingest/processes/connectors/notion/types/date.py +29 -0
  181. unstructured_ingest/processes/connectors/notion/types/file.py +54 -0
  182. unstructured_ingest/processes/connectors/notion/types/page.py +52 -0
  183. unstructured_ingest/processes/connectors/notion/types/parent.py +66 -0
  184. unstructured_ingest/processes/connectors/notion/types/rich_text.py +189 -0
  185. unstructured_ingest/processes/connectors/notion/types/user.py +83 -0
  186. unstructured_ingest/processes/connectors/onedrive.py +485 -0
  187. unstructured_ingest/processes/connectors/outlook.py +242 -0
  188. unstructured_ingest/processes/connectors/pinecone.py +400 -0
  189. unstructured_ingest/processes/connectors/qdrant/__init__.py +16 -0
  190. unstructured_ingest/processes/connectors/qdrant/cloud.py +59 -0
  191. unstructured_ingest/processes/connectors/qdrant/local.py +58 -0
  192. unstructured_ingest/processes/connectors/qdrant/qdrant.py +163 -0
  193. unstructured_ingest/processes/connectors/qdrant/server.py +60 -0
  194. unstructured_ingest/processes/connectors/redisdb.py +214 -0
  195. unstructured_ingest/processes/connectors/salesforce.py +307 -0
  196. unstructured_ingest/processes/connectors/sharepoint.py +282 -0
  197. unstructured_ingest/processes/connectors/slack.py +249 -0
  198. unstructured_ingest/processes/connectors/sql/__init__.py +41 -0
  199. unstructured_ingest/processes/connectors/sql/databricks_delta_tables.py +228 -0
  200. unstructured_ingest/processes/connectors/sql/postgres.py +168 -0
  201. unstructured_ingest/processes/connectors/sql/singlestore.py +176 -0
  202. unstructured_ingest/processes/connectors/sql/snowflake.py +298 -0
  203. unstructured_ingest/processes/connectors/sql/sql.py +456 -0
  204. unstructured_ingest/processes/connectors/sql/sqlite.py +179 -0
  205. unstructured_ingest/processes/connectors/sql/teradata.py +254 -0
  206. unstructured_ingest/processes/connectors/sql/vastdb.py +263 -0
  207. unstructured_ingest/processes/connectors/utils.py +60 -0
  208. unstructured_ingest/processes/connectors/vectara.py +348 -0
  209. unstructured_ingest/processes/connectors/weaviate/__init__.py +22 -0
  210. unstructured_ingest/processes/connectors/weaviate/cloud.py +166 -0
  211. unstructured_ingest/processes/connectors/weaviate/embedded.py +90 -0
  212. unstructured_ingest/processes/connectors/weaviate/local.py +73 -0
  213. unstructured_ingest/processes/connectors/weaviate/weaviate.py +337 -0
  214. unstructured_ingest/processes/connectors/zendesk/__init__.py +0 -0
  215. unstructured_ingest/processes/connectors/zendesk/client.py +314 -0
  216. unstructured_ingest/processes/connectors/zendesk/zendesk.py +241 -0
  217. unstructured_ingest/processes/embedder.py +203 -0
  218. unstructured_ingest/processes/filter.py +60 -0
  219. unstructured_ingest/processes/partitioner.py +233 -0
  220. unstructured_ingest/processes/uncompress.py +61 -0
  221. unstructured_ingest/processes/utils/__init__.py +8 -0
  222. unstructured_ingest/processes/utils/blob_storage.py +32 -0
  223. unstructured_ingest/processes/utils/logging/connector.py +365 -0
  224. unstructured_ingest/processes/utils/logging/sanitizer.py +117 -0
  225. unstructured_ingest/unstructured_api.py +140 -0
  226. unstructured_ingest/utils/__init__.py +5 -0
  227. unstructured_ingest/utils/chunking.py +56 -0
  228. unstructured_ingest/utils/compression.py +72 -0
  229. unstructured_ingest/utils/constants.py +2 -0
  230. unstructured_ingest/utils/data_prep.py +216 -0
  231. unstructured_ingest/utils/dep_check.py +78 -0
  232. unstructured_ingest/utils/filesystem.py +27 -0
  233. unstructured_ingest/utils/html.py +174 -0
  234. unstructured_ingest/utils/ndjson.py +52 -0
  235. unstructured_ingest/utils/pydantic_models.py +52 -0
  236. unstructured_ingest/utils/string_and_date_utils.py +74 -0
  237. unstructured_ingest/utils/table.py +80 -0
  238. unstructured_ingest/utils/tls.py +15 -0
  239. unstructured_ingest-1.2.32.dist-info/METADATA +235 -0
  240. unstructured_ingest-1.2.32.dist-info/RECORD +243 -0
  241. unstructured_ingest-1.2.32.dist-info/WHEEL +4 -0
  242. unstructured_ingest-1.2.32.dist-info/entry_points.txt +2 -0
  243. unstructured_ingest-1.2.32.dist-info/licenses/LICENSE.md +201 -0
@@ -0,0 +1,52 @@
1
+ # https://developers.notion.com/reference/page
2
+ from dataclasses import dataclass, fields
3
+ from typing import Optional
4
+
5
+ from unstructured_ingest.processes.connectors.notion.interfaces import FromJSONMixin
6
+ from unstructured_ingest.processes.connectors.notion.types.file import FileObject
7
+ from unstructured_ingest.processes.connectors.notion.types.parent import Parent
8
+ from unstructured_ingest.processes.connectors.notion.types.user import PartialUser
9
+
10
+
11
+ @dataclass
12
+ class Page(FromJSONMixin):
13
+ id: str
14
+ created_time: str
15
+ created_by: PartialUser
16
+ last_edited_time: str
17
+ last_edited_by: PartialUser
18
+ archived: bool
19
+ in_trash: bool
20
+ properties: dict
21
+ parent: Parent
22
+ url: str
23
+ public_url: str
24
+ request_id: Optional[str] = None
25
+ object: str = "page"
26
+ icon: Optional[FileObject] = None
27
+ cover: Optional[FileObject] = None
28
+
29
+ @classmethod
30
+ def from_dict(cls, data: dict):
31
+ data = data.copy() # Don't modify the original
32
+ created_by = data.pop("created_by")
33
+ last_edited_by = data.pop("last_edited_by")
34
+ icon = data.pop("icon")
35
+ cover = data.pop("cover")
36
+ parent = data.pop("parent")
37
+
38
+ # Filter data to only include fields that exist in the dataclass
39
+ filtered_data = {
40
+ k: v for k, v in data.items() if k in {field.name for field in fields(cls)}
41
+ }
42
+
43
+ page = cls(
44
+ created_by=PartialUser.from_dict(created_by),
45
+ last_edited_by=PartialUser.from_dict(last_edited_by),
46
+ icon=FileObject.from_dict(icon) if icon else None,
47
+ cover=FileObject.from_dict(cover) if cover else None,
48
+ parent=Parent.from_dict(parent),
49
+ **filtered_data,
50
+ )
51
+
52
+ return page
@@ -0,0 +1,66 @@
1
+ # https://developers.notion.com/reference/parent-object
2
+ from dataclasses import dataclass
3
+
4
+ from unstructured_ingest.processes.connectors.notion.interfaces import FromJSONMixin
5
+
6
+
7
+ # https://developers.notion.com/reference/parent-object#database-parent
8
+ @dataclass
9
+ class DatabaseParent(FromJSONMixin):
10
+ database_id: str
11
+ type: str = "database_id"
12
+
13
+ @classmethod
14
+ def from_dict(cls, data: dict):
15
+ return cls(database_id=data["database_id"])
16
+
17
+
18
+ # https://developers.notion.com/reference/parent-object#page-parent
19
+ @dataclass
20
+ class PageParent(FromJSONMixin):
21
+ page_id: str
22
+ type: str = "page_id"
23
+
24
+ @classmethod
25
+ def from_dict(cls, data: dict):
26
+ return cls(page_id=data["page_id"])
27
+
28
+
29
+ # https://developers.notion.com/reference/parent-object#workspace-parent
30
+ @dataclass
31
+ class WorkspaceParent(FromJSONMixin):
32
+ type: str = "workspace"
33
+ workspace: bool = True
34
+
35
+ @classmethod
36
+ def from_dict(cls, data: dict):
37
+ return cls()
38
+
39
+
40
+ # https://developers.notion.com/reference/parent-object#block-parent
41
+ @dataclass
42
+ class BlockParent(FromJSONMixin):
43
+ block_id: str
44
+ type: str = "block_id"
45
+
46
+ @classmethod
47
+ def from_dict(cls, data: dict):
48
+ return cls(block_id=data["block_id"])
49
+
50
+
51
+ @dataclass
52
+ class Parent(FromJSONMixin):
53
+ block_id: str
54
+ type: str = "block_id"
55
+
56
+ @classmethod
57
+ def from_dict(cls, data: dict):
58
+ t = data["type"]
59
+ if t == "database_id":
60
+ return DatabaseParent.from_dict(data)
61
+ elif t == "page_id":
62
+ return PageParent.from_dict(data)
63
+ elif t == "workspace":
64
+ return WorkspaceParent.from_dict(data)
65
+ elif t == "block_id":
66
+ return BlockParent.from_dict(data)
@@ -0,0 +1,189 @@
1
+ # https://developers.notion.com/reference/rich-text
2
+ from dataclasses import dataclass
3
+ from typing import Optional
4
+
5
+ from htmlBuilder.attributes import Href, Style
6
+ from htmlBuilder.tags import A, B, Code, Div, HtmlTag, I, S, Span, U
7
+ from htmlBuilder.tags import Text as HtmlText
8
+
9
+ from unstructured_ingest.processes.connectors.notion.interfaces import (
10
+ FromJSONMixin,
11
+ GetHTMLMixin,
12
+ )
13
+ from unstructured_ingest.processes.connectors.notion.types.date import Date
14
+ from unstructured_ingest.processes.connectors.notion.types.user import People
15
+
16
+
17
+ @dataclass
18
+ class Annotations(FromJSONMixin):
19
+ bold: bool
20
+ code: bool
21
+ italic: bool
22
+ strikethrough: bool
23
+ underline: bool
24
+ color: str
25
+
26
+ @classmethod
27
+ def from_dict(cls, data: dict):
28
+ return cls(**data)
29
+
30
+
31
+ @dataclass
32
+ class Equation(FromJSONMixin, GetHTMLMixin):
33
+ expression: str
34
+
35
+ @classmethod
36
+ def from_dict(cls, data: dict):
37
+ return cls(**data)
38
+
39
+ def get_html(self) -> Optional[HtmlTag]:
40
+ return Code([], self.expression) if self.expression else None
41
+
42
+
43
+ @dataclass
44
+ class MentionDatabase(FromJSONMixin, GetHTMLMixin):
45
+ id: str
46
+
47
+ @classmethod
48
+ def from_dict(cls, data: dict):
49
+ return cls(**data)
50
+
51
+ def get_html(self) -> Optional[HtmlTag]:
52
+ return Div([], self.id) if self.id else None
53
+
54
+
55
+ @dataclass
56
+ class MentionLinkPreview(FromJSONMixin, GetHTMLMixin):
57
+ url: str
58
+
59
+ @classmethod
60
+ def from_dict(cls, data: dict):
61
+ return cls(**data)
62
+
63
+ def get_html(self) -> Optional[HtmlTag]:
64
+ return A([Href(self.url)], self.url) if self.url else None
65
+
66
+
67
+ @dataclass
68
+ class MentionPage(FromJSONMixin, GetHTMLMixin):
69
+ id: str
70
+
71
+ @classmethod
72
+ def from_dict(cls, data: dict):
73
+ return cls(**data)
74
+
75
+ def get_html(self) -> Optional[HtmlTag]:
76
+ return Div([], self.id) if self.id else None
77
+
78
+
79
+ @dataclass
80
+ class MentionTemplate(FromJSONMixin):
81
+ template_mention_date: Optional[str]
82
+ template_mention_user: Optional[str]
83
+
84
+ @classmethod
85
+ def from_dict(cls, data: dict):
86
+ return cls(**data)
87
+
88
+
89
+ @dataclass
90
+ class Mention(FromJSONMixin, GetHTMLMixin):
91
+ type: str
92
+ database: Optional[MentionDatabase] = None
93
+ date: Optional[Date] = None
94
+ link_preview: Optional[MentionLinkPreview] = None
95
+ page: Optional[MentionPage] = None
96
+ template_mention: Optional[MentionTemplate] = None
97
+ user: Optional[People] = None
98
+
99
+ @classmethod
100
+ def from_dict(cls, data: dict):
101
+ t = data["type"]
102
+ mention = cls(type=t)
103
+ if t == "date":
104
+ mention.date = Date.from_dict(data["date"])
105
+ elif t == "database":
106
+ mention.database = MentionDatabase.from_dict(data["database"])
107
+ elif t == "link_preview":
108
+ mention.link_preview = MentionLinkPreview.from_dict(data["link_preview"])
109
+ elif t == "page":
110
+ mention.page = MentionPage.from_dict(data["page"])
111
+ elif t == "template_mention":
112
+ mention.template_mention = MentionTemplate.from_dict(data["template_mention"])
113
+ elif t == "user":
114
+ mention.user = People.from_dict(data["user"])
115
+
116
+ return mention
117
+
118
+ def get_html(self) -> Optional[HtmlTag]:
119
+ t = self.type
120
+ if t == "date":
121
+ return self.date.get_html() if self.date else None
122
+ elif t == "database":
123
+ return self.database.get_html() if self.database else None
124
+ elif t == "link_preview":
125
+ return self.link_preview.get_html() if self.link_preview else None
126
+ elif t == "page":
127
+ return self.page.get_html() if self.page else None
128
+ elif t == "user":
129
+ return self.user.get_html() if self.user else None
130
+ return None
131
+
132
+
133
+ @dataclass
134
+ class Text(FromJSONMixin):
135
+ content: str
136
+ link: Optional[dict]
137
+
138
+ @classmethod
139
+ def from_dict(cls, data: dict):
140
+ return cls(**data)
141
+
142
+
143
+ @dataclass
144
+ class RichText(FromJSONMixin, GetHTMLMixin):
145
+ type: str
146
+ plain_text: str
147
+ annotations: Optional[Annotations] = None
148
+ href: Optional[str] = None
149
+ text: Optional[Text] = None
150
+ mention: Optional[Mention] = None
151
+ equation: Optional[Equation] = None
152
+
153
+ def get_html(self) -> Optional[HtmlTag]:
154
+ text = HtmlText(self.plain_text)
155
+ if self.href:
156
+ text = A([Href(self.href)], text)
157
+ if self.annotations:
158
+ annotations = self.annotations
159
+ if annotations.bold:
160
+ text = B([], text)
161
+ if annotations.code:
162
+ text = Code([], text)
163
+ if annotations.italic:
164
+ text = I([], text)
165
+ if annotations.strikethrough:
166
+ text = S([], text)
167
+ if annotations.underline:
168
+ text = U([], text)
169
+ if annotations.color and annotations.color != "default":
170
+ if isinstance(text, HtmlText):
171
+ text = Span([], text)
172
+ text.attributes.append(Style(f"color:{annotations.color}"))
173
+ return text
174
+
175
+ @classmethod
176
+ def from_dict(cls, data: dict):
177
+ t = data["type"]
178
+ rich_text = cls(
179
+ annotations=Annotations.from_dict(data.pop("annotations")),
180
+ **data,
181
+ )
182
+ if t == "text":
183
+ rich_text.text = Text.from_dict(data["text"])
184
+ elif t == "mention":
185
+ rich_text.mention = Mention.from_dict(data["mention"])
186
+ elif t == "equation":
187
+ rich_text.equation = Equation.from_dict(data["equation"])
188
+
189
+ return rich_text
@@ -0,0 +1,83 @@
1
+ # https://developers.notion.com/reference/user
2
+ from dataclasses import dataclass, field
3
+ from typing import Optional
4
+
5
+ from htmlBuilder.attributes import Href
6
+ from htmlBuilder.tags import A, Div, HtmlTag
7
+
8
+ from unstructured_ingest.processes.connectors.notion.interfaces import (
9
+ FromJSONMixin,
10
+ GetHTMLMixin,
11
+ )
12
+
13
+
14
+ @dataclass
15
+ class PartialUser(FromJSONMixin):
16
+ id: str
17
+ object: str = "user"
18
+
19
+ @classmethod
20
+ def from_dict(cls, data: dict):
21
+ return cls(id=data["id"])
22
+
23
+
24
+ @dataclass
25
+ class User(FromJSONMixin, GetHTMLMixin):
26
+ object: dict
27
+ id: str
28
+ type: Optional[str] = None
29
+ name: Optional[str] = None
30
+ avatar_url: Optional[str] = None
31
+
32
+ @classmethod
33
+ def from_dict(cls, data: dict):
34
+ return cls(**data)
35
+
36
+ def get_text(self) -> Optional[str]:
37
+ text = self.name
38
+ if self.avatar_url:
39
+ text = f"[{text}]({self.avatar_url})"
40
+ return text
41
+
42
+ def get_html(self) -> Optional[HtmlTag]:
43
+ if self.avatar_url and self.name:
44
+ return A([Href(self.avatar_url)], self.name)
45
+ elif self.name:
46
+ return Div([], self.name)
47
+ else:
48
+ return Div([], "")
49
+
50
+
51
+ @dataclass
52
+ class People(User):
53
+ person: dict = field(default_factory=dict)
54
+
55
+
56
+ @dataclass
57
+ class Bots(FromJSONMixin, GetHTMLMixin):
58
+ object: dict
59
+ id: str
60
+ bot: dict
61
+ owner: dict
62
+ type: str
63
+ workspace_name: str
64
+ name: Optional[str] = None
65
+ avatar_url: Optional[str] = None
66
+
67
+ @classmethod
68
+ def from_dict(cls, data: dict):
69
+ return cls(**data)
70
+
71
+ def get_text(self) -> Optional[str]:
72
+ text = self.name
73
+ if self.avatar_url:
74
+ text = f"[{text}]({self.avatar_url})"
75
+ return text
76
+
77
+ def get_html(self) -> Optional[HtmlTag]:
78
+ if self.avatar_url and self.name:
79
+ return A([Href(self.avatar_url)], self.name)
80
+ elif self.name:
81
+ return Div([], self.name)
82
+ else:
83
+ return Div([], "")