unstructured-ingest 1.2.32__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of unstructured-ingest might be problematic. Click here for more details.

Files changed (243) hide show
  1. unstructured_ingest/__init__.py +1 -0
  2. unstructured_ingest/__version__.py +1 -0
  3. unstructured_ingest/cli/README.md +28 -0
  4. unstructured_ingest/cli/__init__.py +0 -0
  5. unstructured_ingest/cli/base/__init__.py +4 -0
  6. unstructured_ingest/cli/base/cmd.py +269 -0
  7. unstructured_ingest/cli/base/dest.py +84 -0
  8. unstructured_ingest/cli/base/importer.py +34 -0
  9. unstructured_ingest/cli/base/src.py +75 -0
  10. unstructured_ingest/cli/cli.py +24 -0
  11. unstructured_ingest/cli/cmds.py +14 -0
  12. unstructured_ingest/cli/utils/__init__.py +0 -0
  13. unstructured_ingest/cli/utils/click.py +237 -0
  14. unstructured_ingest/cli/utils/model_conversion.py +222 -0
  15. unstructured_ingest/data_types/__init__.py +0 -0
  16. unstructured_ingest/data_types/entities.py +17 -0
  17. unstructured_ingest/data_types/file_data.py +116 -0
  18. unstructured_ingest/embed/__init__.py +0 -0
  19. unstructured_ingest/embed/azure_openai.py +63 -0
  20. unstructured_ingest/embed/bedrock.py +323 -0
  21. unstructured_ingest/embed/huggingface.py +69 -0
  22. unstructured_ingest/embed/interfaces.py +146 -0
  23. unstructured_ingest/embed/mixedbreadai.py +134 -0
  24. unstructured_ingest/embed/octoai.py +133 -0
  25. unstructured_ingest/embed/openai.py +142 -0
  26. unstructured_ingest/embed/togetherai.py +116 -0
  27. unstructured_ingest/embed/vertexai.py +109 -0
  28. unstructured_ingest/embed/voyageai.py +130 -0
  29. unstructured_ingest/error.py +156 -0
  30. unstructured_ingest/errors_v2.py +156 -0
  31. unstructured_ingest/interfaces/__init__.py +27 -0
  32. unstructured_ingest/interfaces/connector.py +56 -0
  33. unstructured_ingest/interfaces/downloader.py +90 -0
  34. unstructured_ingest/interfaces/indexer.py +29 -0
  35. unstructured_ingest/interfaces/process.py +22 -0
  36. unstructured_ingest/interfaces/processor.py +88 -0
  37. unstructured_ingest/interfaces/upload_stager.py +89 -0
  38. unstructured_ingest/interfaces/uploader.py +67 -0
  39. unstructured_ingest/logger.py +39 -0
  40. unstructured_ingest/main.py +11 -0
  41. unstructured_ingest/otel.py +128 -0
  42. unstructured_ingest/pipeline/__init__.py +0 -0
  43. unstructured_ingest/pipeline/interfaces.py +211 -0
  44. unstructured_ingest/pipeline/otel.py +32 -0
  45. unstructured_ingest/pipeline/pipeline.py +408 -0
  46. unstructured_ingest/pipeline/steps/__init__.py +0 -0
  47. unstructured_ingest/pipeline/steps/chunk.py +78 -0
  48. unstructured_ingest/pipeline/steps/download.py +206 -0
  49. unstructured_ingest/pipeline/steps/embed.py +77 -0
  50. unstructured_ingest/pipeline/steps/filter.py +35 -0
  51. unstructured_ingest/pipeline/steps/index.py +86 -0
  52. unstructured_ingest/pipeline/steps/partition.py +77 -0
  53. unstructured_ingest/pipeline/steps/stage.py +65 -0
  54. unstructured_ingest/pipeline/steps/uncompress.py +50 -0
  55. unstructured_ingest/pipeline/steps/upload.py +58 -0
  56. unstructured_ingest/processes/__init__.py +18 -0
  57. unstructured_ingest/processes/chunker.py +131 -0
  58. unstructured_ingest/processes/connector_registry.py +69 -0
  59. unstructured_ingest/processes/connectors/__init__.py +129 -0
  60. unstructured_ingest/processes/connectors/airtable.py +238 -0
  61. unstructured_ingest/processes/connectors/assets/__init__.py +0 -0
  62. unstructured_ingest/processes/connectors/assets/databricks_delta_table_schema.sql +9 -0
  63. unstructured_ingest/processes/connectors/assets/weaviate_collection_config.json +23 -0
  64. unstructured_ingest/processes/connectors/astradb.py +592 -0
  65. unstructured_ingest/processes/connectors/azure_ai_search.py +275 -0
  66. unstructured_ingest/processes/connectors/chroma.py +193 -0
  67. unstructured_ingest/processes/connectors/confluence.py +527 -0
  68. unstructured_ingest/processes/connectors/couchbase.py +336 -0
  69. unstructured_ingest/processes/connectors/databricks/__init__.py +58 -0
  70. unstructured_ingest/processes/connectors/databricks/volumes.py +233 -0
  71. unstructured_ingest/processes/connectors/databricks/volumes_aws.py +93 -0
  72. unstructured_ingest/processes/connectors/databricks/volumes_azure.py +108 -0
  73. unstructured_ingest/processes/connectors/databricks/volumes_gcp.py +91 -0
  74. unstructured_ingest/processes/connectors/databricks/volumes_native.py +92 -0
  75. unstructured_ingest/processes/connectors/databricks/volumes_table.py +187 -0
  76. unstructured_ingest/processes/connectors/delta_table.py +310 -0
  77. unstructured_ingest/processes/connectors/discord.py +161 -0
  78. unstructured_ingest/processes/connectors/duckdb/__init__.py +15 -0
  79. unstructured_ingest/processes/connectors/duckdb/base.py +103 -0
  80. unstructured_ingest/processes/connectors/duckdb/duckdb.py +130 -0
  81. unstructured_ingest/processes/connectors/duckdb/motherduck.py +130 -0
  82. unstructured_ingest/processes/connectors/elasticsearch/__init__.py +19 -0
  83. unstructured_ingest/processes/connectors/elasticsearch/elasticsearch.py +478 -0
  84. unstructured_ingest/processes/connectors/elasticsearch/opensearch.py +523 -0
  85. unstructured_ingest/processes/connectors/fsspec/__init__.py +37 -0
  86. unstructured_ingest/processes/connectors/fsspec/azure.py +203 -0
  87. unstructured_ingest/processes/connectors/fsspec/box.py +176 -0
  88. unstructured_ingest/processes/connectors/fsspec/dropbox.py +238 -0
  89. unstructured_ingest/processes/connectors/fsspec/fsspec.py +475 -0
  90. unstructured_ingest/processes/connectors/fsspec/gcs.py +203 -0
  91. unstructured_ingest/processes/connectors/fsspec/s3.py +253 -0
  92. unstructured_ingest/processes/connectors/fsspec/sftp.py +177 -0
  93. unstructured_ingest/processes/connectors/fsspec/utils.py +17 -0
  94. unstructured_ingest/processes/connectors/github.py +226 -0
  95. unstructured_ingest/processes/connectors/gitlab.py +270 -0
  96. unstructured_ingest/processes/connectors/google_drive.py +848 -0
  97. unstructured_ingest/processes/connectors/ibm_watsonx/__init__.py +10 -0
  98. unstructured_ingest/processes/connectors/ibm_watsonx/ibm_watsonx_s3.py +367 -0
  99. unstructured_ingest/processes/connectors/jira.py +522 -0
  100. unstructured_ingest/processes/connectors/kafka/__init__.py +17 -0
  101. unstructured_ingest/processes/connectors/kafka/cloud.py +121 -0
  102. unstructured_ingest/processes/connectors/kafka/kafka.py +275 -0
  103. unstructured_ingest/processes/connectors/kafka/local.py +103 -0
  104. unstructured_ingest/processes/connectors/kdbai.py +156 -0
  105. unstructured_ingest/processes/connectors/lancedb/__init__.py +30 -0
  106. unstructured_ingest/processes/connectors/lancedb/aws.py +43 -0
  107. unstructured_ingest/processes/connectors/lancedb/azure.py +43 -0
  108. unstructured_ingest/processes/connectors/lancedb/cloud.py +42 -0
  109. unstructured_ingest/processes/connectors/lancedb/gcp.py +44 -0
  110. unstructured_ingest/processes/connectors/lancedb/lancedb.py +181 -0
  111. unstructured_ingest/processes/connectors/lancedb/local.py +44 -0
  112. unstructured_ingest/processes/connectors/local.py +227 -0
  113. unstructured_ingest/processes/connectors/milvus.py +311 -0
  114. unstructured_ingest/processes/connectors/mongodb.py +389 -0
  115. unstructured_ingest/processes/connectors/neo4j.py +534 -0
  116. unstructured_ingest/processes/connectors/notion/__init__.py +0 -0
  117. unstructured_ingest/processes/connectors/notion/client.py +349 -0
  118. unstructured_ingest/processes/connectors/notion/connector.py +350 -0
  119. unstructured_ingest/processes/connectors/notion/helpers.py +448 -0
  120. unstructured_ingest/processes/connectors/notion/ingest_backoff/__init__.py +3 -0
  121. unstructured_ingest/processes/connectors/notion/ingest_backoff/_common.py +102 -0
  122. unstructured_ingest/processes/connectors/notion/ingest_backoff/_wrapper.py +126 -0
  123. unstructured_ingest/processes/connectors/notion/ingest_backoff/types.py +24 -0
  124. unstructured_ingest/processes/connectors/notion/interfaces.py +32 -0
  125. unstructured_ingest/processes/connectors/notion/types/__init__.py +0 -0
  126. unstructured_ingest/processes/connectors/notion/types/block.py +96 -0
  127. unstructured_ingest/processes/connectors/notion/types/blocks/__init__.py +63 -0
  128. unstructured_ingest/processes/connectors/notion/types/blocks/bookmark.py +40 -0
  129. unstructured_ingest/processes/connectors/notion/types/blocks/breadcrumb.py +21 -0
  130. unstructured_ingest/processes/connectors/notion/types/blocks/bulleted_list_item.py +31 -0
  131. unstructured_ingest/processes/connectors/notion/types/blocks/callout.py +131 -0
  132. unstructured_ingest/processes/connectors/notion/types/blocks/child_database.py +23 -0
  133. unstructured_ingest/processes/connectors/notion/types/blocks/child_page.py +23 -0
  134. unstructured_ingest/processes/connectors/notion/types/blocks/code.py +43 -0
  135. unstructured_ingest/processes/connectors/notion/types/blocks/column_list.py +35 -0
  136. unstructured_ingest/processes/connectors/notion/types/blocks/divider.py +22 -0
  137. unstructured_ingest/processes/connectors/notion/types/blocks/embed.py +36 -0
  138. unstructured_ingest/processes/connectors/notion/types/blocks/equation.py +23 -0
  139. unstructured_ingest/processes/connectors/notion/types/blocks/file.py +49 -0
  140. unstructured_ingest/processes/connectors/notion/types/blocks/heading.py +37 -0
  141. unstructured_ingest/processes/connectors/notion/types/blocks/image.py +21 -0
  142. unstructured_ingest/processes/connectors/notion/types/blocks/link_preview.py +24 -0
  143. unstructured_ingest/processes/connectors/notion/types/blocks/link_to_page.py +29 -0
  144. unstructured_ingest/processes/connectors/notion/types/blocks/numbered_list.py +29 -0
  145. unstructured_ingest/processes/connectors/notion/types/blocks/paragraph.py +31 -0
  146. unstructured_ingest/processes/connectors/notion/types/blocks/pdf.py +49 -0
  147. unstructured_ingest/processes/connectors/notion/types/blocks/quote.py +37 -0
  148. unstructured_ingest/processes/connectors/notion/types/blocks/synced_block.py +109 -0
  149. unstructured_ingest/processes/connectors/notion/types/blocks/table.py +60 -0
  150. unstructured_ingest/processes/connectors/notion/types/blocks/table_of_contents.py +23 -0
  151. unstructured_ingest/processes/connectors/notion/types/blocks/template.py +30 -0
  152. unstructured_ingest/processes/connectors/notion/types/blocks/todo.py +42 -0
  153. unstructured_ingest/processes/connectors/notion/types/blocks/toggle.py +37 -0
  154. unstructured_ingest/processes/connectors/notion/types/blocks/unsupported.py +20 -0
  155. unstructured_ingest/processes/connectors/notion/types/blocks/video.py +22 -0
  156. unstructured_ingest/processes/connectors/notion/types/database.py +73 -0
  157. unstructured_ingest/processes/connectors/notion/types/database_properties/__init__.py +125 -0
  158. unstructured_ingest/processes/connectors/notion/types/database_properties/checkbox.py +39 -0
  159. unstructured_ingest/processes/connectors/notion/types/database_properties/created_by.py +36 -0
  160. unstructured_ingest/processes/connectors/notion/types/database_properties/created_time.py +35 -0
  161. unstructured_ingest/processes/connectors/notion/types/database_properties/date.py +42 -0
  162. unstructured_ingest/processes/connectors/notion/types/database_properties/email.py +37 -0
  163. unstructured_ingest/processes/connectors/notion/types/database_properties/files.py +38 -0
  164. unstructured_ingest/processes/connectors/notion/types/database_properties/formula.py +50 -0
  165. unstructured_ingest/processes/connectors/notion/types/database_properties/last_edited_by.py +34 -0
  166. unstructured_ingest/processes/connectors/notion/types/database_properties/last_edited_time.py +35 -0
  167. unstructured_ingest/processes/connectors/notion/types/database_properties/multiselect.py +74 -0
  168. unstructured_ingest/processes/connectors/notion/types/database_properties/number.py +50 -0
  169. unstructured_ingest/processes/connectors/notion/types/database_properties/people.py +42 -0
  170. unstructured_ingest/processes/connectors/notion/types/database_properties/phone_number.py +37 -0
  171. unstructured_ingest/processes/connectors/notion/types/database_properties/relation.py +68 -0
  172. unstructured_ingest/processes/connectors/notion/types/database_properties/rich_text.py +44 -0
  173. unstructured_ingest/processes/connectors/notion/types/database_properties/rollup.py +57 -0
  174. unstructured_ingest/processes/connectors/notion/types/database_properties/select.py +70 -0
  175. unstructured_ingest/processes/connectors/notion/types/database_properties/status.py +82 -0
  176. unstructured_ingest/processes/connectors/notion/types/database_properties/title.py +38 -0
  177. unstructured_ingest/processes/connectors/notion/types/database_properties/unique_id.py +51 -0
  178. unstructured_ingest/processes/connectors/notion/types/database_properties/url.py +38 -0
  179. unstructured_ingest/processes/connectors/notion/types/database_properties/verification.py +79 -0
  180. unstructured_ingest/processes/connectors/notion/types/date.py +29 -0
  181. unstructured_ingest/processes/connectors/notion/types/file.py +54 -0
  182. unstructured_ingest/processes/connectors/notion/types/page.py +52 -0
  183. unstructured_ingest/processes/connectors/notion/types/parent.py +66 -0
  184. unstructured_ingest/processes/connectors/notion/types/rich_text.py +189 -0
  185. unstructured_ingest/processes/connectors/notion/types/user.py +83 -0
  186. unstructured_ingest/processes/connectors/onedrive.py +485 -0
  187. unstructured_ingest/processes/connectors/outlook.py +242 -0
  188. unstructured_ingest/processes/connectors/pinecone.py +400 -0
  189. unstructured_ingest/processes/connectors/qdrant/__init__.py +16 -0
  190. unstructured_ingest/processes/connectors/qdrant/cloud.py +59 -0
  191. unstructured_ingest/processes/connectors/qdrant/local.py +58 -0
  192. unstructured_ingest/processes/connectors/qdrant/qdrant.py +163 -0
  193. unstructured_ingest/processes/connectors/qdrant/server.py +60 -0
  194. unstructured_ingest/processes/connectors/redisdb.py +214 -0
  195. unstructured_ingest/processes/connectors/salesforce.py +307 -0
  196. unstructured_ingest/processes/connectors/sharepoint.py +282 -0
  197. unstructured_ingest/processes/connectors/slack.py +249 -0
  198. unstructured_ingest/processes/connectors/sql/__init__.py +41 -0
  199. unstructured_ingest/processes/connectors/sql/databricks_delta_tables.py +228 -0
  200. unstructured_ingest/processes/connectors/sql/postgres.py +168 -0
  201. unstructured_ingest/processes/connectors/sql/singlestore.py +176 -0
  202. unstructured_ingest/processes/connectors/sql/snowflake.py +298 -0
  203. unstructured_ingest/processes/connectors/sql/sql.py +456 -0
  204. unstructured_ingest/processes/connectors/sql/sqlite.py +179 -0
  205. unstructured_ingest/processes/connectors/sql/teradata.py +254 -0
  206. unstructured_ingest/processes/connectors/sql/vastdb.py +263 -0
  207. unstructured_ingest/processes/connectors/utils.py +60 -0
  208. unstructured_ingest/processes/connectors/vectara.py +348 -0
  209. unstructured_ingest/processes/connectors/weaviate/__init__.py +22 -0
  210. unstructured_ingest/processes/connectors/weaviate/cloud.py +166 -0
  211. unstructured_ingest/processes/connectors/weaviate/embedded.py +90 -0
  212. unstructured_ingest/processes/connectors/weaviate/local.py +73 -0
  213. unstructured_ingest/processes/connectors/weaviate/weaviate.py +337 -0
  214. unstructured_ingest/processes/connectors/zendesk/__init__.py +0 -0
  215. unstructured_ingest/processes/connectors/zendesk/client.py +314 -0
  216. unstructured_ingest/processes/connectors/zendesk/zendesk.py +241 -0
  217. unstructured_ingest/processes/embedder.py +203 -0
  218. unstructured_ingest/processes/filter.py +60 -0
  219. unstructured_ingest/processes/partitioner.py +233 -0
  220. unstructured_ingest/processes/uncompress.py +61 -0
  221. unstructured_ingest/processes/utils/__init__.py +8 -0
  222. unstructured_ingest/processes/utils/blob_storage.py +32 -0
  223. unstructured_ingest/processes/utils/logging/connector.py +365 -0
  224. unstructured_ingest/processes/utils/logging/sanitizer.py +117 -0
  225. unstructured_ingest/unstructured_api.py +140 -0
  226. unstructured_ingest/utils/__init__.py +5 -0
  227. unstructured_ingest/utils/chunking.py +56 -0
  228. unstructured_ingest/utils/compression.py +72 -0
  229. unstructured_ingest/utils/constants.py +2 -0
  230. unstructured_ingest/utils/data_prep.py +216 -0
  231. unstructured_ingest/utils/dep_check.py +78 -0
  232. unstructured_ingest/utils/filesystem.py +27 -0
  233. unstructured_ingest/utils/html.py +174 -0
  234. unstructured_ingest/utils/ndjson.py +52 -0
  235. unstructured_ingest/utils/pydantic_models.py +52 -0
  236. unstructured_ingest/utils/string_and_date_utils.py +74 -0
  237. unstructured_ingest/utils/table.py +80 -0
  238. unstructured_ingest/utils/tls.py +15 -0
  239. unstructured_ingest-1.2.32.dist-info/METADATA +235 -0
  240. unstructured_ingest-1.2.32.dist-info/RECORD +243 -0
  241. unstructured_ingest-1.2.32.dist-info/WHEEL +4 -0
  242. unstructured_ingest-1.2.32.dist-info/entry_points.txt +2 -0
  243. unstructured_ingest-1.2.32.dist-info/licenses/LICENSE.md +201 -0
@@ -0,0 +1,42 @@
1
+ # https://developers.notion.com/reference/property-object#people
2
+ from dataclasses import dataclass, field
3
+ from typing import List, Optional
4
+
5
+ from htmlBuilder.tags import Div, HtmlTag, Span
6
+
7
+ from unstructured_ingest.processes.connectors.notion.interfaces import DBCellBase, DBPropertyBase
8
+ from unstructured_ingest.processes.connectors.notion.types.user import People as PeopleType
9
+
10
+
11
+ @dataclass
12
+ class People(DBPropertyBase):
13
+ id: str
14
+ name: str
15
+ description: Optional[str] = None
16
+ type: str = "people"
17
+ description: Optional[str] = None
18
+ people: dict = field(default_factory=dict)
19
+
20
+ @classmethod
21
+ def from_dict(cls, data: dict):
22
+ return cls(**data)
23
+
24
+
25
+ @dataclass
26
+ class PeopleCell(DBCellBase):
27
+ id: str
28
+ people: List[PeopleType]
29
+ type: str = "people"
30
+ name: Optional[str] = None
31
+
32
+ @classmethod
33
+ def from_dict(cls, data: dict):
34
+ return cls(people=[PeopleType.from_dict(p) for p in data.pop("people", {})], **data)
35
+
36
+ def get_html(self) -> Optional[HtmlTag]:
37
+ if not self.people:
38
+ return None
39
+ people_spans = []
40
+ for person in self.people:
41
+ people_spans.append(Span([], person.get_html()))
42
+ return Div([], people_spans)
@@ -0,0 +1,37 @@
1
+ # https://developers.notion.com/reference/property-object#phone-number
2
+ from dataclasses import dataclass, field
3
+ from typing import Optional
4
+
5
+ from htmlBuilder.tags import Div, HtmlTag
6
+
7
+ from unstructured_ingest.processes.connectors.notion.interfaces import DBCellBase, DBPropertyBase
8
+
9
+
10
+ @dataclass
11
+ class PhoneNumber(DBPropertyBase):
12
+ id: str
13
+ name: str
14
+ type: str = "phone_number"
15
+ description: Optional[str] = None
16
+ phone_number: dict = field(default_factory=dict)
17
+
18
+ @classmethod
19
+ def from_dict(cls, data: dict):
20
+ return cls(**data)
21
+
22
+
23
+ @dataclass
24
+ class PhoneNumberCell(DBCellBase):
25
+ id: str
26
+ phone_number: Optional[str]
27
+ name: Optional[str] = None
28
+ type: str = "phone_number"
29
+
30
+ @classmethod
31
+ def from_dict(cls, data: dict):
32
+ return cls(**data)
33
+
34
+ def get_html(self) -> Optional[HtmlTag]:
35
+ if phone_number := self.phone_number:
36
+ return Div([], phone_number)
37
+ return None
@@ -0,0 +1,68 @@
1
+ # https://developers.notion.com/reference/property-object#relation
2
+ from dataclasses import dataclass
3
+ from typing import Optional
4
+ from urllib.parse import unquote
5
+
6
+ from htmlBuilder.tags import Div, HtmlTag
7
+
8
+ from unstructured_ingest.processes.connectors.notion.interfaces import (
9
+ DBCellBase,
10
+ DBPropertyBase,
11
+ FromJSONMixin,
12
+ )
13
+
14
+
15
+ @dataclass
16
+ class DualProperty(FromJSONMixin):
17
+ synced_property_id: str
18
+ synced_property_name: str
19
+
20
+ @classmethod
21
+ def from_dict(cls, data: dict):
22
+ return cls(**data)
23
+
24
+
25
+ @dataclass
26
+ class RelationProp(FromJSONMixin):
27
+ database_id: str
28
+ type: str
29
+ dual_property: DualProperty
30
+
31
+ @classmethod
32
+ def from_dict(cls, data: dict):
33
+ t = data.get("type")
34
+ if t == "dual_property":
35
+ dual_property = DualProperty.from_dict(data.pop(t))
36
+ else:
37
+ raise ValueError(f"{t} type not recognized")
38
+
39
+ return cls(dual_property=dual_property, **data)
40
+
41
+
42
+ @dataclass
43
+ class Relation(DBPropertyBase):
44
+ id: str
45
+ name: str
46
+ relation: RelationProp
47
+ type: str = "relation"
48
+ description: Optional[str] = None
49
+
50
+ @classmethod
51
+ def from_dict(cls, data: dict):
52
+ return cls(relation=RelationProp.from_dict(data.pop("relation")), **data)
53
+
54
+
55
+ @dataclass
56
+ class RelationCell(DBCellBase):
57
+ id: str
58
+ has_more: bool
59
+ relation: list
60
+ type: str = "relation"
61
+ name: Optional[str] = None
62
+
63
+ @classmethod
64
+ def from_dict(cls, data: dict):
65
+ return cls(**data)
66
+
67
+ def get_html(self) -> Optional[HtmlTag]:
68
+ return Div([], unquote(self.id))
@@ -0,0 +1,44 @@
1
+ # https://developers.notion.com/reference/property-object#rich-text
2
+ from dataclasses import dataclass, field
3
+ from typing import List, Optional
4
+
5
+ from htmlBuilder.tags import Div, HtmlTag, Span
6
+
7
+ from unstructured_ingest.processes.connectors.notion.interfaces import DBCellBase, DBPropertyBase
8
+ from unstructured_ingest.processes.connectors.notion.types.rich_text import (
9
+ RichText as RichTextType,
10
+ )
11
+
12
+
13
+ @dataclass
14
+ class RichText(DBPropertyBase):
15
+ id: str
16
+ name: str
17
+ type: str = "rich_text"
18
+ description: Optional[str] = None
19
+ rich_text: dict = field(default_factory=dict)
20
+
21
+ @classmethod
22
+ def from_dict(cls, data: dict):
23
+ return cls(**data)
24
+
25
+
26
+ @dataclass
27
+ class RichTextCell(DBCellBase):
28
+ id: str
29
+ rich_text: List[RichTextType]
30
+ name: Optional[str] = None
31
+ type: str = "rich_text"
32
+
33
+ @classmethod
34
+ def from_dict(cls, data: dict):
35
+ return cls(
36
+ rich_text=[RichTextType.from_dict(rt) for rt in data.pop("rich_text", [])],
37
+ **data,
38
+ )
39
+
40
+ def get_html(self) -> Optional[HtmlTag]:
41
+ if not self.rich_text:
42
+ return None
43
+ spans = [Span([], rt.get_html()) for rt in self.rich_text]
44
+ return Div([], spans)
@@ -0,0 +1,57 @@
1
+ # https://developers.notion.com/reference/property-object#rollup
2
+ from dataclasses import dataclass
3
+ from typing import Optional
4
+
5
+ from htmlBuilder.tags import Div, HtmlTag, Span
6
+
7
+ from unstructured_ingest.processes.connectors.notion.interfaces import (
8
+ DBCellBase,
9
+ DBPropertyBase,
10
+ FromJSONMixin,
11
+ )
12
+
13
+
14
+ @dataclass
15
+ class RollupProp(FromJSONMixin):
16
+ function: str
17
+ relation_property_id: str
18
+ relation_property_name: str
19
+ rollup_property_id: str
20
+ rollup_property_name: str
21
+
22
+ @classmethod
23
+ def from_dict(cls, data: dict):
24
+ return cls(**data)
25
+
26
+
27
+ @dataclass
28
+ class Rollup(DBPropertyBase):
29
+ id: str
30
+ name: str
31
+ rollup: RollupProp
32
+ type: str = "rollup"
33
+ description: Optional[str] = None
34
+
35
+ @classmethod
36
+ def from_dict(cls, data: dict):
37
+ return cls(rollup=RollupProp.from_dict(data.pop("rollup")), **data)
38
+
39
+
40
+ @dataclass
41
+ class RollupCell(DBCellBase):
42
+ id: str
43
+ rollup: dict
44
+ type: str = "rollup"
45
+ name: Optional[str] = None
46
+
47
+ @classmethod
48
+ def from_dict(cls, data: dict):
49
+ return cls(**data)
50
+
51
+ def get_html(self) -> Optional[HtmlTag]:
52
+ rollup = self.rollup
53
+ t = rollup.get("type")
54
+ v = rollup[t]
55
+ if isinstance(v, list):
56
+ return Div([], [Span([], str(x)) for x in v])
57
+ return Div([], str(v))
@@ -0,0 +1,70 @@
1
+ # https://developers.notion.com/reference/property-object#select
2
+ from dataclasses import dataclass, field
3
+ from typing import List, Optional
4
+
5
+ from htmlBuilder.attributes import Style
6
+ from htmlBuilder.tags import Div, HtmlTag
7
+
8
+ from unstructured_ingest.processes.connectors.notion.interfaces import (
9
+ DBCellBase,
10
+ DBPropertyBase,
11
+ FromJSONMixin,
12
+ )
13
+
14
+
15
+ @dataclass
16
+ class SelectOption(FromJSONMixin):
17
+ color: str
18
+ id: str
19
+ name: str
20
+ description: Optional[str] = None
21
+
22
+ @classmethod
23
+ def from_dict(cls, data: dict):
24
+ return cls(**data)
25
+
26
+
27
+ @dataclass
28
+ class SelectProp(FromJSONMixin):
29
+ options: List[SelectOption] = field(default_factory=list)
30
+
31
+ @classmethod
32
+ def from_dict(cls, data: dict):
33
+ return cls(options=[SelectOption.from_dict(o) for o in data.get("options", [])])
34
+
35
+
36
+ @dataclass
37
+ class Select(DBPropertyBase):
38
+ id: str
39
+ name: str
40
+ select: SelectProp
41
+ type: str = "select"
42
+ description: Optional[str] = None
43
+
44
+ @classmethod
45
+ def from_dict(cls, data: dict):
46
+ return cls(select=SelectProp.from_dict(data.pop("select", {})), **data)
47
+
48
+
49
+ @dataclass
50
+ class SelectCell(DBCellBase):
51
+ id: str
52
+ select: Optional[SelectOption]
53
+ type: str = "select"
54
+ name: Optional[str] = None
55
+
56
+ @classmethod
57
+ def from_dict(cls, data: dict):
58
+ select_data = data.pop("select")
59
+ select = None
60
+ if select_data:
61
+ select = SelectOption.from_dict(select_data)
62
+ return cls(select=select, **data)
63
+
64
+ def get_html(self) -> Optional[HtmlTag]:
65
+ if select := self.select:
66
+ select_attr = []
67
+ if select.color and select.color != "default":
68
+ select_attr.append(Style(f"color: {select.color}"))
69
+ return Div(select_attr, select.name)
70
+ return None
@@ -0,0 +1,82 @@
1
+ # https://developers.notion.com/reference/property-object#status
2
+ from dataclasses import dataclass, field
3
+ from typing import List, Optional
4
+
5
+ from htmlBuilder.attributes import Style
6
+ from htmlBuilder.tags import Div, HtmlTag
7
+
8
+ from unstructured_ingest.processes.connectors.notion.interfaces import (
9
+ DBCellBase,
10
+ DBPropertyBase,
11
+ FromJSONMixin,
12
+ )
13
+
14
+
15
+ @dataclass
16
+ class StatusOption(FromJSONMixin):
17
+ color: str
18
+ id: str
19
+ name: str
20
+ description: Optional[str] = None
21
+
22
+ @classmethod
23
+ def from_dict(cls, data: dict):
24
+ return cls(**data)
25
+
26
+
27
+ @dataclass
28
+ class StatusGroup(FromJSONMixin):
29
+ color: str
30
+ id: str
31
+ name: str
32
+ option_ids: List[str] = field(default_factory=List[str])
33
+
34
+ @classmethod
35
+ def from_dict(cls, data: dict):
36
+ return cls(**data)
37
+
38
+
39
+ @dataclass
40
+ class StatusProp(FromJSONMixin):
41
+ options: List[StatusOption] = field(default_factory=list)
42
+ groups: List[StatusGroup] = field(default_factory=list)
43
+
44
+ @classmethod
45
+ def from_dict(cls, data: dict):
46
+ return cls(
47
+ options=[StatusOption.from_dict(o) for o in data.get("options", [])],
48
+ groups=[StatusGroup.from_dict(g) for g in data.get("groups", [])],
49
+ )
50
+
51
+
52
+ @dataclass
53
+ class Status(DBPropertyBase):
54
+ id: str
55
+ name: str
56
+ status: StatusProp
57
+ type: str = "status"
58
+ description: Optional[str] = None
59
+
60
+ @classmethod
61
+ def from_dict(cls, data: dict):
62
+ return cls(status=StatusProp.from_dict(data.pop("status", {})), **data)
63
+
64
+
65
+ @dataclass
66
+ class StatusCell(DBCellBase):
67
+ id: str
68
+ status: Optional[StatusOption]
69
+ type: str = "status"
70
+ name: Optional[str] = None
71
+
72
+ @classmethod
73
+ def from_dict(cls, data: dict):
74
+ return cls(status=StatusOption.from_dict(data.pop("status", {})), **data)
75
+
76
+ def get_html(self) -> Optional[HtmlTag]:
77
+ if status := self.status:
78
+ select_attr = []
79
+ if status.color and status.color != "default":
80
+ select_attr.append(Style(f"color: {status.color}"))
81
+ return Div(select_attr, status.name)
82
+ return None
@@ -0,0 +1,38 @@
1
+ # https://developers.notion.com/reference/property-object#title
2
+ from dataclasses import dataclass, field
3
+ from typing import List, Optional
4
+
5
+ from htmlBuilder.tags import Div, HtmlTag
6
+
7
+ from unstructured_ingest.processes.connectors.notion.interfaces import DBCellBase, DBPropertyBase
8
+ from unstructured_ingest.processes.connectors.notion.types.rich_text import RichText
9
+
10
+
11
+ @dataclass
12
+ class Title(DBPropertyBase):
13
+ id: str
14
+ name: str
15
+ type: str = "title"
16
+ title: dict = field(default_factory=dict)
17
+ description: Optional[str] = None
18
+
19
+ @classmethod
20
+ def from_dict(cls, data: dict):
21
+ return cls(**data)
22
+
23
+
24
+ @dataclass
25
+ class TitleCell(DBCellBase):
26
+ id: str
27
+ title: List[RichText]
28
+ type: str = "title"
29
+ name: Optional[str] = None
30
+
31
+ @classmethod
32
+ def from_dict(cls, data: dict):
33
+ return cls(title=[RichText.from_dict(rt) for rt in data.pop("title", [])], **data)
34
+
35
+ def get_html(self) -> Optional[HtmlTag]:
36
+ if not self.title:
37
+ return None
38
+ return Div([], [rt.get_html() for rt in self.title])
@@ -0,0 +1,51 @@
1
+ # https://developers.notion.com/reference/property-object#title
2
+ from dataclasses import dataclass, field
3
+ from typing import Optional
4
+
5
+ from htmlBuilder.tags import Div, HtmlTag
6
+
7
+ from unstructured_ingest.processes.connectors.notion.interfaces import (
8
+ DBCellBase,
9
+ DBPropertyBase,
10
+ FromJSONMixin,
11
+ )
12
+
13
+
14
+ @dataclass
15
+ class UniqueID(DBPropertyBase):
16
+ id: str
17
+ name: str
18
+ type: str = "unique_id"
19
+ unique_id: dict = field(default_factory=dict)
20
+ description: Optional[str] = None
21
+
22
+ @classmethod
23
+ def from_dict(cls, data: dict):
24
+ return cls(**data)
25
+
26
+
27
+ @dataclass
28
+ class UniqueIDCellData(FromJSONMixin):
29
+ prefix: str
30
+ number: int
31
+
32
+ @classmethod
33
+ def from_dict(cls, data: dict):
34
+ return cls(**data)
35
+
36
+
37
+ @dataclass
38
+ class UniqueIDCell(DBCellBase):
39
+ id: str
40
+ unique_id: Optional[UniqueIDCellData]
41
+ type: str = "title"
42
+ name: Optional[str] = None
43
+
44
+ @classmethod
45
+ def from_dict(cls, data: dict):
46
+ return cls(unique_id=UniqueIDCellData.from_dict(data.pop("unique_id")), **data)
47
+
48
+ def get_html(self) -> Optional[HtmlTag]:
49
+ if unique_id := self.unique_id:
50
+ return Div([], f"{unique_id.prefix}-{unique_id.number}")
51
+ return None
@@ -0,0 +1,38 @@
1
+ # https://developers.notion.com/reference/property-object#url
2
+ from dataclasses import dataclass, field
3
+ from typing import Optional
4
+
5
+ from htmlBuilder.attributes import Href
6
+ from htmlBuilder.tags import A, HtmlTag
7
+
8
+ from unstructured_ingest.processes.connectors.notion.interfaces import DBCellBase, DBPropertyBase
9
+
10
+
11
+ @dataclass
12
+ class URL(DBPropertyBase):
13
+ id: str
14
+ name: str
15
+ type: str = "url"
16
+ url: dict = field(default_factory=dict)
17
+ description: Optional[str] = None
18
+
19
+ @classmethod
20
+ def from_dict(cls, data: dict):
21
+ return cls(**data)
22
+
23
+
24
+ @dataclass
25
+ class URLCell(DBCellBase):
26
+ id: str
27
+ url: Optional[str] = None
28
+ name: Optional[str] = None
29
+ type: str = "url"
30
+
31
+ @classmethod
32
+ def from_dict(cls, data: dict):
33
+ return cls(**data)
34
+
35
+ def get_html(self) -> Optional[HtmlTag]:
36
+ if url := self.url:
37
+ return A([Href(url)], url)
38
+ return None
@@ -0,0 +1,79 @@
1
+ # https://developers.notion.com/reference/property-object#url
2
+ from dataclasses import dataclass, field
3
+ from typing import Optional
4
+
5
+ from htmlBuilder.tags import Div, HtmlTag, Span
6
+
7
+ from unstructured_ingest.processes.connectors.notion.interfaces import (
8
+ DBCellBase,
9
+ DBPropertyBase,
10
+ FromJSONMixin,
11
+ GetHTMLMixin,
12
+ )
13
+ from unstructured_ingest.processes.connectors.notion.types.date import Date
14
+ from unstructured_ingest.processes.connectors.notion.types.user import People
15
+
16
+
17
+ @dataclass
18
+ class Verification(DBPropertyBase):
19
+ id: str
20
+ name: str
21
+ type: str = "verification"
22
+ verification: dict = field(default_factory=dict)
23
+ description: Optional[str] = None
24
+
25
+ @classmethod
26
+ def from_dict(cls, data: dict):
27
+ return cls(**data)
28
+
29
+
30
+ @dataclass
31
+ class VerificationData(FromJSONMixin, GetHTMLMixin):
32
+ state: Optional[str]
33
+ verified_by: Optional[People]
34
+ date: Optional[Date]
35
+
36
+ @classmethod
37
+ def from_dict(cls, data: dict):
38
+ verified_by = data.pop("verified_by", None)
39
+ date = data.pop("date", None)
40
+ return cls(
41
+ verified_by=People.from_dict(data=verified_by) if verified_by else None,
42
+ date=Date.from_dict(data=date) if date else None,
43
+ **data,
44
+ )
45
+
46
+ def get_html(self) -> Optional[HtmlTag]:
47
+ elements = []
48
+ if state := self.state:
49
+ elements.append(Span([], state))
50
+ if (verified_by := self.verified_by) and (verified_by_html := verified_by.get_html()):
51
+ elements.append(verified_by_html)
52
+ if (date := self.date) and (date_html := date.get_html()):
53
+ elements.append(date_html)
54
+ if elements:
55
+ return Div([], elements)
56
+ return None
57
+
58
+
59
+ @dataclass
60
+ class VerificationCell(DBCellBase):
61
+ id: str
62
+ verification: Optional[VerificationData]
63
+ name: Optional[str] = None
64
+ type: str = "verification"
65
+
66
+ @classmethod
67
+ def from_dict(cls, data: dict):
68
+ return cls(verification=VerificationData.from_dict(data.pop("verification")), **data)
69
+
70
+ def get_html(self) -> Optional[HtmlTag]:
71
+ elements = []
72
+ if name := self.name:
73
+ elements.append(Span([], name))
74
+ if (verification := self.verification) and (verification_html := verification.get_html()):
75
+ elements.append(verification_html)
76
+
77
+ if elements:
78
+ return Div([], elements)
79
+ return None
@@ -0,0 +1,29 @@
1
+ # https://developers.notion.com/reference/property-value-object#date-property-values
2
+ from dataclasses import dataclass
3
+ from typing import Optional
4
+
5
+ from htmlBuilder.tags import Div, HtmlTag
6
+
7
+ from unstructured_ingest.processes.connectors.notion.interfaces import (
8
+ FromJSONMixin,
9
+ GetHTMLMixin,
10
+ )
11
+
12
+
13
+ @dataclass
14
+ class Date(FromJSONMixin, GetHTMLMixin):
15
+ start: str
16
+ end: Optional[str] = None
17
+ time_zone: Optional[str] = None
18
+
19
+ @classmethod
20
+ def from_dict(cls, data: dict):
21
+ return cls(**data)
22
+
23
+ def get_html(self) -> Optional[HtmlTag]:
24
+ text = f"{self.start}"
25
+ if end := self.end:
26
+ text += f" - {end}"
27
+ if self.time_zone:
28
+ text += f" {self.time_zone}"
29
+ return Div([], text)
@@ -0,0 +1,54 @@
1
+ # https://developers.notion.com/reference/file-object
2
+ from dataclasses import dataclass
3
+ from typing import Optional
4
+
5
+ from htmlBuilder.attributes import Href
6
+ from htmlBuilder.tags import A, HtmlTag
7
+
8
+ from unstructured_ingest.processes.connectors.notion.interfaces import (
9
+ FromJSONMixin,
10
+ GetHTMLMixin,
11
+ )
12
+
13
+
14
+ @dataclass
15
+ class External(FromJSONMixin):
16
+ url: str
17
+
18
+ @classmethod
19
+ def from_dict(cls, data: dict):
20
+ return cls(**data)
21
+
22
+
23
+ @dataclass
24
+ class File(FromJSONMixin):
25
+ url: str
26
+ expiry_time: str
27
+
28
+ @classmethod
29
+ def from_dict(cls, data: dict):
30
+ return cls(**data)
31
+
32
+
33
+ @dataclass
34
+ class FileObject(FromJSONMixin, GetHTMLMixin):
35
+ type: str
36
+ external: Optional[External] = None
37
+ file: Optional[File] = None
38
+
39
+ @classmethod
40
+ def from_dict(cls, data: dict):
41
+ t = data["type"]
42
+ file_object = cls(type=t)
43
+ if t == "external":
44
+ file_object.external = External.from_dict(data["external"])
45
+ elif t == "file":
46
+ file_object.file = File.from_dict(data["file"])
47
+ return file_object
48
+
49
+ def get_html(self) -> Optional[HtmlTag]:
50
+ if self.file:
51
+ return A([Href(self.file.url)], self.file.url)
52
+ if self.external:
53
+ return A([Href(self.external.url)], self.external.url)
54
+ return None