unstructured-ingest 1.2.32__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of unstructured-ingest might be problematic. Click here for more details.

Files changed (243) hide show
  1. unstructured_ingest/__init__.py +1 -0
  2. unstructured_ingest/__version__.py +1 -0
  3. unstructured_ingest/cli/README.md +28 -0
  4. unstructured_ingest/cli/__init__.py +0 -0
  5. unstructured_ingest/cli/base/__init__.py +4 -0
  6. unstructured_ingest/cli/base/cmd.py +269 -0
  7. unstructured_ingest/cli/base/dest.py +84 -0
  8. unstructured_ingest/cli/base/importer.py +34 -0
  9. unstructured_ingest/cli/base/src.py +75 -0
  10. unstructured_ingest/cli/cli.py +24 -0
  11. unstructured_ingest/cli/cmds.py +14 -0
  12. unstructured_ingest/cli/utils/__init__.py +0 -0
  13. unstructured_ingest/cli/utils/click.py +237 -0
  14. unstructured_ingest/cli/utils/model_conversion.py +222 -0
  15. unstructured_ingest/data_types/__init__.py +0 -0
  16. unstructured_ingest/data_types/entities.py +17 -0
  17. unstructured_ingest/data_types/file_data.py +116 -0
  18. unstructured_ingest/embed/__init__.py +0 -0
  19. unstructured_ingest/embed/azure_openai.py +63 -0
  20. unstructured_ingest/embed/bedrock.py +323 -0
  21. unstructured_ingest/embed/huggingface.py +69 -0
  22. unstructured_ingest/embed/interfaces.py +146 -0
  23. unstructured_ingest/embed/mixedbreadai.py +134 -0
  24. unstructured_ingest/embed/octoai.py +133 -0
  25. unstructured_ingest/embed/openai.py +142 -0
  26. unstructured_ingest/embed/togetherai.py +116 -0
  27. unstructured_ingest/embed/vertexai.py +109 -0
  28. unstructured_ingest/embed/voyageai.py +130 -0
  29. unstructured_ingest/error.py +156 -0
  30. unstructured_ingest/errors_v2.py +156 -0
  31. unstructured_ingest/interfaces/__init__.py +27 -0
  32. unstructured_ingest/interfaces/connector.py +56 -0
  33. unstructured_ingest/interfaces/downloader.py +90 -0
  34. unstructured_ingest/interfaces/indexer.py +29 -0
  35. unstructured_ingest/interfaces/process.py +22 -0
  36. unstructured_ingest/interfaces/processor.py +88 -0
  37. unstructured_ingest/interfaces/upload_stager.py +89 -0
  38. unstructured_ingest/interfaces/uploader.py +67 -0
  39. unstructured_ingest/logger.py +39 -0
  40. unstructured_ingest/main.py +11 -0
  41. unstructured_ingest/otel.py +128 -0
  42. unstructured_ingest/pipeline/__init__.py +0 -0
  43. unstructured_ingest/pipeline/interfaces.py +211 -0
  44. unstructured_ingest/pipeline/otel.py +32 -0
  45. unstructured_ingest/pipeline/pipeline.py +408 -0
  46. unstructured_ingest/pipeline/steps/__init__.py +0 -0
  47. unstructured_ingest/pipeline/steps/chunk.py +78 -0
  48. unstructured_ingest/pipeline/steps/download.py +206 -0
  49. unstructured_ingest/pipeline/steps/embed.py +77 -0
  50. unstructured_ingest/pipeline/steps/filter.py +35 -0
  51. unstructured_ingest/pipeline/steps/index.py +86 -0
  52. unstructured_ingest/pipeline/steps/partition.py +77 -0
  53. unstructured_ingest/pipeline/steps/stage.py +65 -0
  54. unstructured_ingest/pipeline/steps/uncompress.py +50 -0
  55. unstructured_ingest/pipeline/steps/upload.py +58 -0
  56. unstructured_ingest/processes/__init__.py +18 -0
  57. unstructured_ingest/processes/chunker.py +131 -0
  58. unstructured_ingest/processes/connector_registry.py +69 -0
  59. unstructured_ingest/processes/connectors/__init__.py +129 -0
  60. unstructured_ingest/processes/connectors/airtable.py +238 -0
  61. unstructured_ingest/processes/connectors/assets/__init__.py +0 -0
  62. unstructured_ingest/processes/connectors/assets/databricks_delta_table_schema.sql +9 -0
  63. unstructured_ingest/processes/connectors/assets/weaviate_collection_config.json +23 -0
  64. unstructured_ingest/processes/connectors/astradb.py +592 -0
  65. unstructured_ingest/processes/connectors/azure_ai_search.py +275 -0
  66. unstructured_ingest/processes/connectors/chroma.py +193 -0
  67. unstructured_ingest/processes/connectors/confluence.py +527 -0
  68. unstructured_ingest/processes/connectors/couchbase.py +336 -0
  69. unstructured_ingest/processes/connectors/databricks/__init__.py +58 -0
  70. unstructured_ingest/processes/connectors/databricks/volumes.py +233 -0
  71. unstructured_ingest/processes/connectors/databricks/volumes_aws.py +93 -0
  72. unstructured_ingest/processes/connectors/databricks/volumes_azure.py +108 -0
  73. unstructured_ingest/processes/connectors/databricks/volumes_gcp.py +91 -0
  74. unstructured_ingest/processes/connectors/databricks/volumes_native.py +92 -0
  75. unstructured_ingest/processes/connectors/databricks/volumes_table.py +187 -0
  76. unstructured_ingest/processes/connectors/delta_table.py +310 -0
  77. unstructured_ingest/processes/connectors/discord.py +161 -0
  78. unstructured_ingest/processes/connectors/duckdb/__init__.py +15 -0
  79. unstructured_ingest/processes/connectors/duckdb/base.py +103 -0
  80. unstructured_ingest/processes/connectors/duckdb/duckdb.py +130 -0
  81. unstructured_ingest/processes/connectors/duckdb/motherduck.py +130 -0
  82. unstructured_ingest/processes/connectors/elasticsearch/__init__.py +19 -0
  83. unstructured_ingest/processes/connectors/elasticsearch/elasticsearch.py +478 -0
  84. unstructured_ingest/processes/connectors/elasticsearch/opensearch.py +523 -0
  85. unstructured_ingest/processes/connectors/fsspec/__init__.py +37 -0
  86. unstructured_ingest/processes/connectors/fsspec/azure.py +203 -0
  87. unstructured_ingest/processes/connectors/fsspec/box.py +176 -0
  88. unstructured_ingest/processes/connectors/fsspec/dropbox.py +238 -0
  89. unstructured_ingest/processes/connectors/fsspec/fsspec.py +475 -0
  90. unstructured_ingest/processes/connectors/fsspec/gcs.py +203 -0
  91. unstructured_ingest/processes/connectors/fsspec/s3.py +253 -0
  92. unstructured_ingest/processes/connectors/fsspec/sftp.py +177 -0
  93. unstructured_ingest/processes/connectors/fsspec/utils.py +17 -0
  94. unstructured_ingest/processes/connectors/github.py +226 -0
  95. unstructured_ingest/processes/connectors/gitlab.py +270 -0
  96. unstructured_ingest/processes/connectors/google_drive.py +848 -0
  97. unstructured_ingest/processes/connectors/ibm_watsonx/__init__.py +10 -0
  98. unstructured_ingest/processes/connectors/ibm_watsonx/ibm_watsonx_s3.py +367 -0
  99. unstructured_ingest/processes/connectors/jira.py +522 -0
  100. unstructured_ingest/processes/connectors/kafka/__init__.py +17 -0
  101. unstructured_ingest/processes/connectors/kafka/cloud.py +121 -0
  102. unstructured_ingest/processes/connectors/kafka/kafka.py +275 -0
  103. unstructured_ingest/processes/connectors/kafka/local.py +103 -0
  104. unstructured_ingest/processes/connectors/kdbai.py +156 -0
  105. unstructured_ingest/processes/connectors/lancedb/__init__.py +30 -0
  106. unstructured_ingest/processes/connectors/lancedb/aws.py +43 -0
  107. unstructured_ingest/processes/connectors/lancedb/azure.py +43 -0
  108. unstructured_ingest/processes/connectors/lancedb/cloud.py +42 -0
  109. unstructured_ingest/processes/connectors/lancedb/gcp.py +44 -0
  110. unstructured_ingest/processes/connectors/lancedb/lancedb.py +181 -0
  111. unstructured_ingest/processes/connectors/lancedb/local.py +44 -0
  112. unstructured_ingest/processes/connectors/local.py +227 -0
  113. unstructured_ingest/processes/connectors/milvus.py +311 -0
  114. unstructured_ingest/processes/connectors/mongodb.py +389 -0
  115. unstructured_ingest/processes/connectors/neo4j.py +534 -0
  116. unstructured_ingest/processes/connectors/notion/__init__.py +0 -0
  117. unstructured_ingest/processes/connectors/notion/client.py +349 -0
  118. unstructured_ingest/processes/connectors/notion/connector.py +350 -0
  119. unstructured_ingest/processes/connectors/notion/helpers.py +448 -0
  120. unstructured_ingest/processes/connectors/notion/ingest_backoff/__init__.py +3 -0
  121. unstructured_ingest/processes/connectors/notion/ingest_backoff/_common.py +102 -0
  122. unstructured_ingest/processes/connectors/notion/ingest_backoff/_wrapper.py +126 -0
  123. unstructured_ingest/processes/connectors/notion/ingest_backoff/types.py +24 -0
  124. unstructured_ingest/processes/connectors/notion/interfaces.py +32 -0
  125. unstructured_ingest/processes/connectors/notion/types/__init__.py +0 -0
  126. unstructured_ingest/processes/connectors/notion/types/block.py +96 -0
  127. unstructured_ingest/processes/connectors/notion/types/blocks/__init__.py +63 -0
  128. unstructured_ingest/processes/connectors/notion/types/blocks/bookmark.py +40 -0
  129. unstructured_ingest/processes/connectors/notion/types/blocks/breadcrumb.py +21 -0
  130. unstructured_ingest/processes/connectors/notion/types/blocks/bulleted_list_item.py +31 -0
  131. unstructured_ingest/processes/connectors/notion/types/blocks/callout.py +131 -0
  132. unstructured_ingest/processes/connectors/notion/types/blocks/child_database.py +23 -0
  133. unstructured_ingest/processes/connectors/notion/types/blocks/child_page.py +23 -0
  134. unstructured_ingest/processes/connectors/notion/types/blocks/code.py +43 -0
  135. unstructured_ingest/processes/connectors/notion/types/blocks/column_list.py +35 -0
  136. unstructured_ingest/processes/connectors/notion/types/blocks/divider.py +22 -0
  137. unstructured_ingest/processes/connectors/notion/types/blocks/embed.py +36 -0
  138. unstructured_ingest/processes/connectors/notion/types/blocks/equation.py +23 -0
  139. unstructured_ingest/processes/connectors/notion/types/blocks/file.py +49 -0
  140. unstructured_ingest/processes/connectors/notion/types/blocks/heading.py +37 -0
  141. unstructured_ingest/processes/connectors/notion/types/blocks/image.py +21 -0
  142. unstructured_ingest/processes/connectors/notion/types/blocks/link_preview.py +24 -0
  143. unstructured_ingest/processes/connectors/notion/types/blocks/link_to_page.py +29 -0
  144. unstructured_ingest/processes/connectors/notion/types/blocks/numbered_list.py +29 -0
  145. unstructured_ingest/processes/connectors/notion/types/blocks/paragraph.py +31 -0
  146. unstructured_ingest/processes/connectors/notion/types/blocks/pdf.py +49 -0
  147. unstructured_ingest/processes/connectors/notion/types/blocks/quote.py +37 -0
  148. unstructured_ingest/processes/connectors/notion/types/blocks/synced_block.py +109 -0
  149. unstructured_ingest/processes/connectors/notion/types/blocks/table.py +60 -0
  150. unstructured_ingest/processes/connectors/notion/types/blocks/table_of_contents.py +23 -0
  151. unstructured_ingest/processes/connectors/notion/types/blocks/template.py +30 -0
  152. unstructured_ingest/processes/connectors/notion/types/blocks/todo.py +42 -0
  153. unstructured_ingest/processes/connectors/notion/types/blocks/toggle.py +37 -0
  154. unstructured_ingest/processes/connectors/notion/types/blocks/unsupported.py +20 -0
  155. unstructured_ingest/processes/connectors/notion/types/blocks/video.py +22 -0
  156. unstructured_ingest/processes/connectors/notion/types/database.py +73 -0
  157. unstructured_ingest/processes/connectors/notion/types/database_properties/__init__.py +125 -0
  158. unstructured_ingest/processes/connectors/notion/types/database_properties/checkbox.py +39 -0
  159. unstructured_ingest/processes/connectors/notion/types/database_properties/created_by.py +36 -0
  160. unstructured_ingest/processes/connectors/notion/types/database_properties/created_time.py +35 -0
  161. unstructured_ingest/processes/connectors/notion/types/database_properties/date.py +42 -0
  162. unstructured_ingest/processes/connectors/notion/types/database_properties/email.py +37 -0
  163. unstructured_ingest/processes/connectors/notion/types/database_properties/files.py +38 -0
  164. unstructured_ingest/processes/connectors/notion/types/database_properties/formula.py +50 -0
  165. unstructured_ingest/processes/connectors/notion/types/database_properties/last_edited_by.py +34 -0
  166. unstructured_ingest/processes/connectors/notion/types/database_properties/last_edited_time.py +35 -0
  167. unstructured_ingest/processes/connectors/notion/types/database_properties/multiselect.py +74 -0
  168. unstructured_ingest/processes/connectors/notion/types/database_properties/number.py +50 -0
  169. unstructured_ingest/processes/connectors/notion/types/database_properties/people.py +42 -0
  170. unstructured_ingest/processes/connectors/notion/types/database_properties/phone_number.py +37 -0
  171. unstructured_ingest/processes/connectors/notion/types/database_properties/relation.py +68 -0
  172. unstructured_ingest/processes/connectors/notion/types/database_properties/rich_text.py +44 -0
  173. unstructured_ingest/processes/connectors/notion/types/database_properties/rollup.py +57 -0
  174. unstructured_ingest/processes/connectors/notion/types/database_properties/select.py +70 -0
  175. unstructured_ingest/processes/connectors/notion/types/database_properties/status.py +82 -0
  176. unstructured_ingest/processes/connectors/notion/types/database_properties/title.py +38 -0
  177. unstructured_ingest/processes/connectors/notion/types/database_properties/unique_id.py +51 -0
  178. unstructured_ingest/processes/connectors/notion/types/database_properties/url.py +38 -0
  179. unstructured_ingest/processes/connectors/notion/types/database_properties/verification.py +79 -0
  180. unstructured_ingest/processes/connectors/notion/types/date.py +29 -0
  181. unstructured_ingest/processes/connectors/notion/types/file.py +54 -0
  182. unstructured_ingest/processes/connectors/notion/types/page.py +52 -0
  183. unstructured_ingest/processes/connectors/notion/types/parent.py +66 -0
  184. unstructured_ingest/processes/connectors/notion/types/rich_text.py +189 -0
  185. unstructured_ingest/processes/connectors/notion/types/user.py +83 -0
  186. unstructured_ingest/processes/connectors/onedrive.py +485 -0
  187. unstructured_ingest/processes/connectors/outlook.py +242 -0
  188. unstructured_ingest/processes/connectors/pinecone.py +400 -0
  189. unstructured_ingest/processes/connectors/qdrant/__init__.py +16 -0
  190. unstructured_ingest/processes/connectors/qdrant/cloud.py +59 -0
  191. unstructured_ingest/processes/connectors/qdrant/local.py +58 -0
  192. unstructured_ingest/processes/connectors/qdrant/qdrant.py +163 -0
  193. unstructured_ingest/processes/connectors/qdrant/server.py +60 -0
  194. unstructured_ingest/processes/connectors/redisdb.py +214 -0
  195. unstructured_ingest/processes/connectors/salesforce.py +307 -0
  196. unstructured_ingest/processes/connectors/sharepoint.py +282 -0
  197. unstructured_ingest/processes/connectors/slack.py +249 -0
  198. unstructured_ingest/processes/connectors/sql/__init__.py +41 -0
  199. unstructured_ingest/processes/connectors/sql/databricks_delta_tables.py +228 -0
  200. unstructured_ingest/processes/connectors/sql/postgres.py +168 -0
  201. unstructured_ingest/processes/connectors/sql/singlestore.py +176 -0
  202. unstructured_ingest/processes/connectors/sql/snowflake.py +298 -0
  203. unstructured_ingest/processes/connectors/sql/sql.py +456 -0
  204. unstructured_ingest/processes/connectors/sql/sqlite.py +179 -0
  205. unstructured_ingest/processes/connectors/sql/teradata.py +254 -0
  206. unstructured_ingest/processes/connectors/sql/vastdb.py +263 -0
  207. unstructured_ingest/processes/connectors/utils.py +60 -0
  208. unstructured_ingest/processes/connectors/vectara.py +348 -0
  209. unstructured_ingest/processes/connectors/weaviate/__init__.py +22 -0
  210. unstructured_ingest/processes/connectors/weaviate/cloud.py +166 -0
  211. unstructured_ingest/processes/connectors/weaviate/embedded.py +90 -0
  212. unstructured_ingest/processes/connectors/weaviate/local.py +73 -0
  213. unstructured_ingest/processes/connectors/weaviate/weaviate.py +337 -0
  214. unstructured_ingest/processes/connectors/zendesk/__init__.py +0 -0
  215. unstructured_ingest/processes/connectors/zendesk/client.py +314 -0
  216. unstructured_ingest/processes/connectors/zendesk/zendesk.py +241 -0
  217. unstructured_ingest/processes/embedder.py +203 -0
  218. unstructured_ingest/processes/filter.py +60 -0
  219. unstructured_ingest/processes/partitioner.py +233 -0
  220. unstructured_ingest/processes/uncompress.py +61 -0
  221. unstructured_ingest/processes/utils/__init__.py +8 -0
  222. unstructured_ingest/processes/utils/blob_storage.py +32 -0
  223. unstructured_ingest/processes/utils/logging/connector.py +365 -0
  224. unstructured_ingest/processes/utils/logging/sanitizer.py +117 -0
  225. unstructured_ingest/unstructured_api.py +140 -0
  226. unstructured_ingest/utils/__init__.py +5 -0
  227. unstructured_ingest/utils/chunking.py +56 -0
  228. unstructured_ingest/utils/compression.py +72 -0
  229. unstructured_ingest/utils/constants.py +2 -0
  230. unstructured_ingest/utils/data_prep.py +216 -0
  231. unstructured_ingest/utils/dep_check.py +78 -0
  232. unstructured_ingest/utils/filesystem.py +27 -0
  233. unstructured_ingest/utils/html.py +174 -0
  234. unstructured_ingest/utils/ndjson.py +52 -0
  235. unstructured_ingest/utils/pydantic_models.py +52 -0
  236. unstructured_ingest/utils/string_and_date_utils.py +74 -0
  237. unstructured_ingest/utils/table.py +80 -0
  238. unstructured_ingest/utils/tls.py +15 -0
  239. unstructured_ingest-1.2.32.dist-info/METADATA +235 -0
  240. unstructured_ingest-1.2.32.dist-info/RECORD +243 -0
  241. unstructured_ingest-1.2.32.dist-info/WHEEL +4 -0
  242. unstructured_ingest-1.2.32.dist-info/entry_points.txt +2 -0
  243. unstructured_ingest-1.2.32.dist-info/licenses/LICENSE.md +201 -0
@@ -0,0 +1,73 @@
1
+ # https://developers.notion.com/reference/database
2
+ from dataclasses import dataclass, field
3
+ from typing import Dict, List, Optional
4
+
5
+ from htmlBuilder.tags import Div, HtmlTag, Span
6
+
7
+ from unstructured_ingest.processes.connectors.notion.interfaces import (
8
+ DBPropertyBase,
9
+ FromJSONMixin,
10
+ GetHTMLMixin,
11
+ )
12
+ from unstructured_ingest.processes.connectors.notion.types.database_properties import (
13
+ map_properties,
14
+ )
15
+ from unstructured_ingest.processes.connectors.notion.types.file import FileObject
16
+ from unstructured_ingest.processes.connectors.notion.types.parent import Parent
17
+ from unstructured_ingest.processes.connectors.notion.types.rich_text import RichText
18
+ from unstructured_ingest.processes.connectors.notion.types.user import PartialUser
19
+
20
+
21
+ @dataclass
22
+ class Database(FromJSONMixin, GetHTMLMixin):
23
+ id: str
24
+ created_time: str
25
+ created_by: PartialUser
26
+ last_edited_time: str
27
+ last_edited_by: PartialUser
28
+ archived: bool
29
+ in_trash: bool
30
+ parent: Parent
31
+ url: str
32
+ is_inline: bool
33
+ public_url: str
34
+ request_id: Optional[str] = None
35
+ properties: Dict[str, DBPropertyBase] = field(default_factory=dict)
36
+ title: List[RichText] = field(default_factory=list)
37
+ description: List[RichText] = field(default_factory=list)
38
+ icon: Optional[FileObject] = None
39
+ cover: Optional[FileObject] = None
40
+ object: str = "database"
41
+
42
+ @classmethod
43
+ def from_dict(cls, data: dict):
44
+ created_by = data.pop("created_by")
45
+ last_edited_by = data.pop("last_edited_by")
46
+ icon = data.pop("icon")
47
+ cover = data.pop("cover")
48
+ parent = data.pop("parent")
49
+ title = data.pop("title")
50
+ description = data.pop("description")
51
+ page = cls(
52
+ properties=map_properties(data.pop("properties", {})),
53
+ created_by=PartialUser.from_dict(created_by),
54
+ last_edited_by=PartialUser.from_dict(last_edited_by),
55
+ icon=FileObject.from_dict(icon) if icon else None,
56
+ cover=FileObject.from_dict(cover) if cover else None,
57
+ parent=Parent.from_dict(parent),
58
+ title=[RichText.from_dict(data=r) for r in title],
59
+ description=[RichText.from_dict(data=r) for r in description],
60
+ **data,
61
+ )
62
+
63
+ return page
64
+
65
+ def get_html(self) -> Optional[HtmlTag]:
66
+ spans = []
67
+ if title := self.title:
68
+ spans.append(Span([], [rt.get_html() for rt in title]))
69
+ if description := self.description:
70
+ spans.append(Span([], [rt.get_html() for rt in description]))
71
+ if spans:
72
+ return Div([], spans)
73
+ return None
@@ -0,0 +1,125 @@
1
+ from typing import Dict
2
+
3
+ from unstructured_ingest.logger import logger
4
+ from unstructured_ingest.processes.connectors.notion.interfaces import DBCellBase, DBPropertyBase
5
+
6
+ from .checkbox import Checkbox, CheckboxCell
7
+ from .created_by import CreatedBy, CreatedByCell
8
+ from .created_time import CreatedTime, CreatedTimeCell
9
+ from .date import Date, DateCell
10
+ from .email import Email, EmailCell
11
+ from .files import Files, FilesCell
12
+ from .formula import Formula, FormulaCell
13
+ from .last_edited_by import LastEditedBy, LastEditedByCell
14
+ from .last_edited_time import LastEditedTime, LastEditedTimeCell
15
+ from .multiselect import MultiSelect, MultiSelectCell
16
+ from .number import Number, NumberCell
17
+ from .people import People, PeopleCell
18
+ from .phone_number import PhoneNumber, PhoneNumberCell
19
+ from .relation import Relation, RelationCell
20
+ from .rich_text import RichText, RichTextCell
21
+ from .rollup import Rollup, RollupCell
22
+ from .select import Select, SelectCell
23
+ from .status import Status, StatusCell
24
+ from .title import Title, TitleCell
25
+ from .unique_id import UniqueID, UniqueIDCell
26
+ from .url import URL, URLCell
27
+ from .verification import Verification, VerificationCell
28
+
29
+ # It's possible to add 'button' property to Notion database.
30
+ # However, current Notion API documentation doesn't mention it.
31
+ # Buttons are only functional inside Notion UI. We can simply
32
+ # ignore them so that the we don't throw an error when trying
33
+ # to map 'button' properties.
34
+ unsupported_db_prop_types = ["button"]
35
+
36
+ db_prop_type_mapping = {
37
+ "checkbox": Checkbox,
38
+ "created_by": CreatedBy,
39
+ "created_time": CreatedTime,
40
+ "date": Date,
41
+ "email": Email,
42
+ "files": Files,
43
+ "formula": Formula,
44
+ "last_edited_by": LastEditedBy,
45
+ "last_edited_time": LastEditedTime,
46
+ "multi_select": MultiSelect,
47
+ "number": Number,
48
+ "people": People,
49
+ "phone_number": PhoneNumber,
50
+ "relation": Relation,
51
+ "rich_text": RichText,
52
+ "rollup": Rollup,
53
+ "select": Select,
54
+ "status": Status,
55
+ "title": Title,
56
+ "unique_id": UniqueID,
57
+ "url": URL,
58
+ "verification": Verification,
59
+ }
60
+
61
+
62
+ def map_properties(props: Dict[str, dict]) -> Dict[str, DBPropertyBase]:
63
+ mapped_dict = {}
64
+ for k, v in props.items():
65
+ try:
66
+ property_type = v["type"]
67
+ if property_type in unsupported_db_prop_types:
68
+ logger.warning(
69
+ f"Unsupported property type '{property_type}' for property '{k}'. Skipping."
70
+ )
71
+ continue
72
+ mapped_dict[k] = db_prop_type_mapping[property_type].from_dict(v) # type: ignore
73
+ except KeyError as ke:
74
+ raise KeyError(f"failed to map to associated database property -> {k}: {v}") from ke
75
+
76
+ return mapped_dict
77
+
78
+
79
+ db_cell_type_mapping = {
80
+ "checkbox": CheckboxCell,
81
+ "created_by": CreatedByCell,
82
+ "created_time": CreatedTimeCell,
83
+ "date": DateCell,
84
+ "email": EmailCell,
85
+ "files": FilesCell,
86
+ "formula": FormulaCell,
87
+ "last_edited_by": LastEditedByCell,
88
+ "last_edited_time": LastEditedTimeCell,
89
+ "multi_select": MultiSelectCell,
90
+ "number": NumberCell,
91
+ "people": PeopleCell,
92
+ "phone_number": PhoneNumberCell,
93
+ "relation": RelationCell,
94
+ "rich_text": RichTextCell,
95
+ "rollup": RollupCell,
96
+ "select": SelectCell,
97
+ "status": StatusCell,
98
+ "title": TitleCell,
99
+ "unique_id": UniqueIDCell,
100
+ "url": URLCell,
101
+ "verification": VerificationCell,
102
+ }
103
+
104
+
105
+ def map_cells(props: Dict[str, dict]) -> Dict[str, DBCellBase]:
106
+ mapped_dict = {}
107
+ for k, v in props.items():
108
+ try:
109
+ property_type = v["type"]
110
+ if property_type in unsupported_db_prop_types:
111
+ logger.warning(
112
+ f"Unsupported property type '{property_type}' for property '{k}'. Skipping."
113
+ )
114
+ continue
115
+ mapped_dict[k] = db_cell_type_mapping[property_type].from_dict(v) # type: ignore
116
+ except KeyError as ke:
117
+ raise KeyError(f"failed to map to associated database property -> {k}: {v}") from ke
118
+
119
+ return mapped_dict
120
+
121
+
122
+ __all__ = [
123
+ "map_properties",
124
+ "map_cells",
125
+ ]
@@ -0,0 +1,39 @@
1
+ # https://developers.notion.com/reference/property-object#checkbox
2
+ from dataclasses import dataclass, field
3
+ from typing import Optional
4
+
5
+ from htmlBuilder.attributes import Checked, Type
6
+ from htmlBuilder.tags import Div, HtmlTag, Input
7
+
8
+ from unstructured_ingest.processes.connectors.notion.interfaces import DBCellBase, DBPropertyBase
9
+
10
+
11
+ @dataclass
12
+ class Checkbox(DBPropertyBase):
13
+ id: str
14
+ name: str
15
+ type: str = "checkbox"
16
+ description: Optional[str] = None
17
+ checkbox: dict = field(default_factory=dict)
18
+
19
+ @classmethod
20
+ def from_dict(cls, data: dict):
21
+ return cls(**data)
22
+
23
+
24
+ @dataclass
25
+ class CheckboxCell(DBCellBase):
26
+ id: str
27
+ checkbox: bool
28
+ name: Optional[str] = None
29
+ type: str = "checkbox"
30
+
31
+ @classmethod
32
+ def from_dict(cls, data: dict):
33
+ return cls(**data)
34
+
35
+ def get_html(self) -> Optional[HtmlTag]:
36
+ check_input_attributes = [Type("checkbox")]
37
+ if self.checkbox:
38
+ check_input_attributes.append(Checked(""))
39
+ return Div([], Input(check_input_attributes))
@@ -0,0 +1,36 @@
1
+ # https://developers.notion.com/reference/property-object#created-by
2
+ from dataclasses import dataclass, field
3
+ from typing import Optional
4
+
5
+ from htmlBuilder.tags import HtmlTag
6
+
7
+ from unstructured_ingest.processes.connectors.notion.interfaces import DBCellBase, DBPropertyBase
8
+ from unstructured_ingest.processes.connectors.notion.types.user import People
9
+
10
+
11
+ @dataclass
12
+ class CreatedBy(DBPropertyBase):
13
+ id: str
14
+ name: str
15
+ type: str = "created_by"
16
+ description: Optional[str] = None
17
+ created_by: dict = field(default_factory=dict)
18
+
19
+ @classmethod
20
+ def from_dict(cls, data: dict):
21
+ return cls(**data)
22
+
23
+
24
+ @dataclass
25
+ class CreatedByCell(DBCellBase):
26
+ id: str
27
+ created_by: People
28
+ type: str = "created_by"
29
+ name: Optional[str] = None
30
+
31
+ @classmethod
32
+ def from_dict(cls, data: dict):
33
+ return cls(created_by=People.from_dict(data.pop("created_by")), **data)
34
+
35
+ def get_html(self) -> Optional[HtmlTag]:
36
+ return self.created_by.get_html()
@@ -0,0 +1,35 @@
1
+ # https://developers.notion.com/reference/property-object#created-time
2
+ from dataclasses import dataclass, field
3
+ from typing import Optional
4
+
5
+ from htmlBuilder.tags import Div, HtmlTag
6
+
7
+ from unstructured_ingest.processes.connectors.notion.interfaces import DBCellBase, DBPropertyBase
8
+
9
+
10
+ @dataclass
11
+ class CreatedTime(DBPropertyBase):
12
+ id: str
13
+ name: str
14
+ type: str = "created_time"
15
+ description: Optional[str] = None
16
+ created_time: dict = field(default_factory=dict)
17
+
18
+ @classmethod
19
+ def from_dict(cls, data: dict):
20
+ return cls(**data)
21
+
22
+
23
+ @dataclass
24
+ class CreatedTimeCell(DBCellBase):
25
+ id: str
26
+ created_time: str
27
+ type: str = "created_time"
28
+ name: Optional[str] = None
29
+
30
+ @classmethod
31
+ def from_dict(cls, data: dict):
32
+ return cls(**data)
33
+
34
+ def get_html(self) -> Optional[HtmlTag]:
35
+ return Div([], self.created_time)
@@ -0,0 +1,42 @@
1
+ # https://developers.notion.com/reference/property-object#date
2
+ from dataclasses import dataclass, field
3
+ from typing import Optional
4
+
5
+ from htmlBuilder.tags import HtmlTag
6
+
7
+ from unstructured_ingest.processes.connectors.notion.interfaces import DBCellBase, DBPropertyBase
8
+ from unstructured_ingest.processes.connectors.notion.types.date import Date as DateType
9
+
10
+
11
+ @dataclass
12
+ class Date(DBPropertyBase):
13
+ id: str
14
+ name: str
15
+ type: str = "date"
16
+ description: Optional[str] = None
17
+ date: dict = field(default_factory=dict)
18
+
19
+ @classmethod
20
+ def from_dict(cls, data: dict):
21
+ return cls(**data)
22
+
23
+
24
+ @dataclass
25
+ class DateCell(DBCellBase):
26
+ id: str
27
+ date: Optional[DateType] = None
28
+ name: Optional[str] = None
29
+ type: str = "date"
30
+
31
+ @classmethod
32
+ def from_dict(cls, data: dict):
33
+ date = None
34
+ date_data = data.pop("date")
35
+ if date_data:
36
+ date = DateType.from_dict(date_data)
37
+ return cls(date=date, **data)
38
+
39
+ def get_html(self) -> Optional[HtmlTag]:
40
+ if date := self.date:
41
+ return date.get_html()
42
+ return None
@@ -0,0 +1,37 @@
1
+ # https://developers.notion.com/reference/property-object#email
2
+ from dataclasses import dataclass, field
3
+ from typing import Optional
4
+
5
+ from htmlBuilder.tags import Div, HtmlTag
6
+
7
+ from unstructured_ingest.processes.connectors.notion.interfaces import DBCellBase, DBPropertyBase
8
+
9
+
10
+ @dataclass
11
+ class Email(DBPropertyBase):
12
+ id: str
13
+ name: str
14
+ type: str = "email"
15
+ description: Optional[str] = None
16
+ email: dict = field(default_factory=dict)
17
+
18
+ @classmethod
19
+ def from_dict(cls, data: dict):
20
+ return cls(**data)
21
+
22
+
23
+ @dataclass
24
+ class EmailCell(DBCellBase):
25
+ id: str
26
+ email: str
27
+ name: Optional[str] = None
28
+ type: str = "email"
29
+
30
+ @classmethod
31
+ def from_dict(cls, data: dict):
32
+ return cls(**data)
33
+
34
+ def get_html(self) -> Optional[HtmlTag]:
35
+ if email := self.email:
36
+ return Div([], email)
37
+ return None
@@ -0,0 +1,38 @@
1
+ # https://developers.notion.com/reference/property-object#files
2
+ from dataclasses import dataclass, field
3
+ from typing import List, Optional
4
+
5
+ from htmlBuilder.tags import Div, HtmlTag
6
+
7
+ from unstructured_ingest.processes.connectors.notion.interfaces import DBCellBase, DBPropertyBase
8
+ from unstructured_ingest.processes.connectors.notion.types.file import FileObject
9
+
10
+
11
+ @dataclass
12
+ class Files(DBPropertyBase):
13
+ id: str
14
+ name: str
15
+ type: str = "files"
16
+ description: Optional[str] = None
17
+ files: dict = field(default_factory=dict)
18
+
19
+ @classmethod
20
+ def from_dict(cls, data: dict):
21
+ return cls(**data)
22
+
23
+
24
+ @dataclass
25
+ class FilesCell(DBCellBase):
26
+ id: str
27
+ files: List[FileObject]
28
+ type: str = "files"
29
+ name: Optional[str] = None
30
+
31
+ @classmethod
32
+ def from_dict(cls, data: dict):
33
+ return cls(files=[FileObject.from_dict(f) for f in data.pop("files", [])], **data)
34
+
35
+ def get_html(self) -> Optional[HtmlTag]:
36
+ if not self.files:
37
+ return None
38
+ return Div([], [f.get_html() for f in self.files])
@@ -0,0 +1,50 @@
1
+ # https://developers.notion.com/reference/property-object#formula
2
+ from dataclasses import dataclass
3
+ from typing import Optional
4
+
5
+ from htmlBuilder.tags import Div, HtmlTag
6
+
7
+ from unstructured_ingest.processes.connectors.notion.interfaces import (
8
+ DBCellBase,
9
+ DBPropertyBase,
10
+ FromJSONMixin,
11
+ )
12
+
13
+
14
+ @dataclass
15
+ class FormulaProp(FromJSONMixin):
16
+ expression: str
17
+
18
+ @classmethod
19
+ def from_dict(cls, data: dict):
20
+ return cls(**data)
21
+
22
+
23
+ @dataclass
24
+ class Formula(DBPropertyBase):
25
+ id: str
26
+ name: str
27
+ formula: FormulaProp
28
+ type: str = "formula"
29
+ description: Optional[str] = None
30
+
31
+ @classmethod
32
+ def from_dict(cls, data: dict):
33
+ return cls(formula=FormulaProp.from_dict(data.pop("formula", {})), **data)
34
+
35
+
36
+ @dataclass
37
+ class FormulaCell(DBCellBase):
38
+ id: str
39
+ formula: dict
40
+ type: str = "formula"
41
+ name: Optional[str] = None
42
+
43
+ @classmethod
44
+ def from_dict(cls, data: dict):
45
+ return cls(**data)
46
+
47
+ def get_html(self) -> Optional[HtmlTag]:
48
+ formula = self.formula
49
+ t = formula.get("type")
50
+ return Div([], str(formula[t]))
@@ -0,0 +1,34 @@
1
+ # https://developers.notion.com/reference/property-object#last-edited-by
2
+ from dataclasses import dataclass
3
+ from typing import Optional
4
+
5
+ from htmlBuilder.tags import HtmlTag
6
+
7
+ from unstructured_ingest.processes.connectors.notion.interfaces import DBCellBase, DBPropertyBase
8
+ from unstructured_ingest.processes.connectors.notion.types.user import People
9
+
10
+
11
+ @dataclass
12
+ class LastEditedBy(DBPropertyBase):
13
+ @classmethod
14
+ def from_dict(cls, data: dict):
15
+ return cls()
16
+
17
+ def get_text(self) -> Optional[str]:
18
+ return None
19
+
20
+
21
+ @dataclass
22
+ class LastEditedByCell(DBCellBase):
23
+ id: str
24
+ last_edited_by: People
25
+ type: str = "last_edited_by"
26
+ description: Optional[str] = None
27
+ name: Optional[str] = None
28
+
29
+ @classmethod
30
+ def from_dict(cls, data: dict):
31
+ return cls(last_edited_by=People.from_dict(data.pop("last_edited_by", {})), **data)
32
+
33
+ def get_html(self) -> Optional[HtmlTag]:
34
+ return self.last_edited_by.get_html()
@@ -0,0 +1,35 @@
1
+ # https://developers.notion.com/reference/property-object#last-edited-time
2
+ from dataclasses import dataclass, field
3
+ from typing import Optional
4
+
5
+ from htmlBuilder.tags import Div, HtmlTag
6
+
7
+ from unstructured_ingest.processes.connectors.notion.interfaces import DBCellBase, DBPropertyBase
8
+
9
+
10
+ @dataclass
11
+ class LastEditedTime(DBPropertyBase):
12
+ id: str
13
+ name: str
14
+ type: str = "last_edited_time"
15
+ description: Optional[str] = None
16
+ last_edited_time: dict = field(default_factory=dict)
17
+
18
+ @classmethod
19
+ def from_dict(cls, data: dict):
20
+ return cls(**data)
21
+
22
+
23
+ @dataclass
24
+ class LastEditedTimeCell(DBCellBase):
25
+ id: str
26
+ last_edited_time: str
27
+ type: str = "last_edited_time"
28
+ name: Optional[str] = None
29
+
30
+ @classmethod
31
+ def from_dict(cls, data: dict):
32
+ return cls(**data)
33
+
34
+ def get_html(self) -> Optional[HtmlTag]:
35
+ return Div([], self.last_edited_time)
@@ -0,0 +1,74 @@
1
+ # https://developers.notion.com/reference/property-object#multi-select
2
+ from dataclasses import dataclass, field
3
+ from typing import List, Optional
4
+
5
+ from htmlBuilder.attributes import Style
6
+ from htmlBuilder.tags import Div, HtmlTag, Span
7
+
8
+ from unstructured_ingest.processes.connectors.notion.interfaces import (
9
+ DBCellBase,
10
+ DBPropertyBase,
11
+ FromJSONMixin,
12
+ )
13
+
14
+
15
+ @dataclass
16
+ class MultiSelectOption(FromJSONMixin):
17
+ color: str
18
+ id: str
19
+ name: str
20
+
21
+ @classmethod
22
+ def from_dict(cls, data: dict):
23
+ return cls(**data)
24
+
25
+
26
+ @dataclass
27
+ class MultiSelectProp(FromJSONMixin):
28
+ options: List[MultiSelectOption] = field(default_factory=list)
29
+
30
+ @classmethod
31
+ def from_dict(cls, data: dict):
32
+ return cls(options=[MultiSelectOption.from_dict(o) for o in data.get("options", [])])
33
+
34
+
35
+ @dataclass
36
+ class MultiSelect(DBPropertyBase):
37
+ id: str
38
+ name: str
39
+ multi_select: MultiSelectProp
40
+ type: str = "multi_select"
41
+ description: Optional[str] = None
42
+
43
+ @classmethod
44
+ def from_dict(cls, data: dict):
45
+ return cls(
46
+ multi_select=data.pop("multi_select", {}),
47
+ **data,
48
+ )
49
+
50
+
51
+ @dataclass
52
+ class MultiSelectCell(DBCellBase):
53
+ id: str
54
+ multi_select: List[MultiSelectOption]
55
+ type: str = "multi_select"
56
+ name: Optional[str] = None
57
+
58
+ @classmethod
59
+ def from_dict(cls, data: dict):
60
+ return cls(
61
+ multi_select=[MultiSelectOption.from_dict(o) for o in data.pop("multi_select", [])],
62
+ **data,
63
+ )
64
+
65
+ def get_html(self) -> Optional[HtmlTag]:
66
+ if not self.multi_select:
67
+ return None
68
+ option_spans = []
69
+ for option in self.multi_select:
70
+ option_attributes = []
71
+ if option.color and option.color != "default":
72
+ option_attributes.append(Style(f"color: {option.color}"))
73
+ option_spans.append(Span(option_attributes, option.name))
74
+ return Div([], option_spans)
@@ -0,0 +1,50 @@
1
+ # https://developers.notion.com/reference/property-object#number
2
+ from dataclasses import dataclass
3
+ from typing import Optional
4
+
5
+ from htmlBuilder.tags import Div, HtmlTag
6
+
7
+ from unstructured_ingest.processes.connectors.notion.interfaces import (
8
+ DBCellBase,
9
+ DBPropertyBase,
10
+ FromJSONMixin,
11
+ )
12
+
13
+
14
+ @dataclass
15
+ class NumberProp(FromJSONMixin):
16
+ format: str
17
+
18
+ @classmethod
19
+ def from_dict(cls, data: dict):
20
+ return cls(**data)
21
+
22
+
23
+ @dataclass
24
+ class Number(DBPropertyBase):
25
+ id: str
26
+ name: str
27
+ number: NumberProp
28
+ type: str = "number"
29
+ description: Optional[str] = None
30
+
31
+ @classmethod
32
+ def from_dict(cls, data: dict):
33
+ return cls(number=NumberProp.from_dict(data.pop("number")), **data)
34
+
35
+
36
+ @dataclass
37
+ class NumberCell(DBCellBase):
38
+ id: str
39
+ number: Optional[int] = None
40
+ type: str = "number"
41
+ name: Optional[str] = None
42
+
43
+ @classmethod
44
+ def from_dict(cls, data: dict):
45
+ return cls(**data)
46
+
47
+ def get_html(self) -> Optional[HtmlTag]:
48
+ if number := self.number:
49
+ return Div([], str(number))
50
+ return None