topos-node 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (249) hide show
  1. shared/__init__.py +59 -0
  2. shared/filtering.py +640 -0
  3. shared/schema_registry.py +229 -0
  4. topos/__init__.py +5 -0
  5. topos/__version__.py +6 -0
  6. topos/analytics/__init__.py +15 -0
  7. topos/analytics/duckdb_adapter.py +48 -0
  8. topos/analytics/messenger_communities.py +349 -0
  9. topos/analytics/messenger_graph.py +522 -0
  10. topos/analytics/messenger_labels.py +321 -0
  11. topos/analytics/profiles.py +22 -0
  12. topos/analytics/query_engine.py +64 -0
  13. topos/analytics/raw_queries.py +174 -0
  14. topos/api/__init__.py +1 -0
  15. topos/api/analytics.py +52 -0
  16. topos/api/app_registry.py +31 -0
  17. topos/api/backup.py +15 -0
  18. topos/api/compute_remote.py +175 -0
  19. topos/api/data_commit.py +158 -0
  20. topos/api/data_explorer_table_prefs.py +81 -0
  21. topos/api/db.py +10 -0
  22. topos/api/device.py +25 -0
  23. topos/api/enrichment.py +959 -0
  24. topos/api/filter_lab.py +195 -0
  25. topos/api/health.py +61 -0
  26. topos/api/ingestion_api.py +37 -0
  27. topos/api/ingestion_compat.py +21 -0
  28. topos/api/ingestion_sources.py +600 -0
  29. topos/api/llm.py +76 -0
  30. topos/api/local_mcp.py +46 -0
  31. topos/api/messenger_analytics.py +385 -0
  32. topos/api/query_api.py +13 -0
  33. topos/api/sanitization_ollama_config.py +64 -0
  34. topos/api/source_install.py +324 -0
  35. topos/api/sources.py +13 -0
  36. topos/api/sync.py +10 -0
  37. topos/api/ui_config.py +83 -0
  38. topos/api/uma_data.py +311 -0
  39. topos/api/usage.py +49 -0
  40. topos/api/user_identity.py +46 -0
  41. topos/app.py +239 -0
  42. topos/auth.py +17 -0
  43. topos/canonicalization/__init__.py +1 -0
  44. topos/canonicalization/mappers/__init__.py +22 -0
  45. topos/canonicalization/mappers/base.py +26 -0
  46. topos/canonicalization/mappers/chatgpt_mapper.py +40 -0
  47. topos/canonicalization/mappers/grok_mapper.py +17 -0
  48. topos/canonicalization/mappers/messenger_mapper.py +58 -0
  49. topos/canonicalization/models.py +31 -0
  50. topos/canonicalization/resolver.py +23 -0
  51. topos/cli/__init__.py +1 -0
  52. topos/cli/__main__.py +6 -0
  53. topos/cli/commands.py +132 -0
  54. topos/config/__init__.py +1 -0
  55. topos/config/sanitization_ollama.py +189 -0
  56. topos/config/settings.py +310 -0
  57. topos/contacts/__init__.py +5 -0
  58. topos/contacts/identity.py +24 -0
  59. topos/control_plane_client.py +300 -0
  60. topos/core/__init__.py +1 -0
  61. topos/core/api_models.py +128 -0
  62. topos/core/connection_resilience.py +99 -0
  63. topos/core/device_helpers.py +8 -0
  64. topos/core/errors.py +13 -0
  65. topos/core/events.py +12 -0
  66. topos/core/handlers.py +5625 -0
  67. topos/core/logging.py +175 -0
  68. topos/core/metrics.py +21 -0
  69. topos/core/startup_banner.py +62 -0
  70. topos/core/state.py +682 -0
  71. topos/core/table_layers.py +45 -0
  72. topos/core/types.py +13 -0
  73. topos/data_explorer_table_prefs.py +150 -0
  74. topos/engine/__init__.py +29 -0
  75. topos/engine/backends/__init__.py +50 -0
  76. topos/engine/backends/base.py +21 -0
  77. topos/engine/backends/huggingface.py +151 -0
  78. topos/engine/backends/ollama.py +181 -0
  79. topos/engine/backends/stub.py +22 -0
  80. topos/engine/engine.py +165 -0
  81. topos/engine/intake.py +32 -0
  82. topos/engine/queue_manager.py +112 -0
  83. topos/engine/registration.py +126 -0
  84. topos/engine/result_formatter.py +38 -0
  85. topos/engine/router.py +19 -0
  86. topos/engine/scoped_token.py +82 -0
  87. topos/engine/tasks.py +154 -0
  88. topos/engine/transport.py +44 -0
  89. topos/engine/usage_guard.py +100 -0
  90. topos/engine/usage_observation.py +129 -0
  91. topos/engine/validator.py +23 -0
  92. topos/enrichment/__init__.py +1 -0
  93. topos/enrichment/derived_tables.py +214 -0
  94. topos/enrichment/jobs/__init__.py +30 -0
  95. topos/enrichment/jobs/base.py +54 -0
  96. topos/enrichment/jobs/canonical/__init__.py +1 -0
  97. topos/enrichment/jobs/canonical/embeddings_job.py +27 -0
  98. topos/enrichment/jobs/canonical/emo_27_job.py +97 -0
  99. topos/enrichment/jobs/canonical/entities_job.py +27 -0
  100. topos/enrichment/jobs/canonical/sentiment_job.py +27 -0
  101. topos/enrichment/jobs/canonical/topics_job.py +27 -0
  102. topos/enrichment/jobs/raw/__init__.py +1 -0
  103. topos/enrichment/jobs/raw/attachments_job.py +12 -0
  104. topos/enrichment/jobs/raw/language_job.py +12 -0
  105. topos/enrichment/jobs/raw/time_normalization_job.py +12 -0
  106. topos/enrichment/jobs/raw/tool_calls_job.py +12 -0
  107. topos/enrichment/models/__init__.py +1 -0
  108. topos/enrichment/models/manager.py +8 -0
  109. topos/enrichment/models/registry.py +71 -0
  110. topos/enrichment/models/versioning.py +8 -0
  111. topos/enrichment/orchestrator.py +177 -0
  112. topos/enrichment/processor.py +17 -0
  113. topos/enrichment/progress_bar.py +122 -0
  114. topos/enrichment/website_classifier.py +31 -0
  115. topos/filter_lab/__init__.py +1 -0
  116. topos/filter_lab/bundles.py +300 -0
  117. topos/filter_lab/schema.py +86 -0
  118. topos/filter_lab/service.py +167 -0
  119. topos/filter_lab/store.py +374 -0
  120. topos/filter_lab/worker.py +250 -0
  121. topos/hosted_pool_lease.py +153 -0
  122. topos/ingestion/__init__.py +1 -0
  123. topos/ingestion/checkpoints/__init__.py +6 -0
  124. topos/ingestion/checkpoints/checkpoint_store.py +24 -0
  125. topos/ingestion/checkpoints/sqlite_checkpoint_store.py +82 -0
  126. topos/ingestion/ingest_helpers.py +504 -0
  127. topos/ingestion/jobs.py +91 -0
  128. topos/ingestion/local_sync.py +823 -0
  129. topos/ingestion/log_preview.py +21 -0
  130. topos/ingestion/manager.py +1100 -0
  131. topos/ingestion/parser.py +174 -0
  132. topos/ingestion/parsers/__init__.py +32 -0
  133. topos/ingestion/parsers/base.py +24 -0
  134. topos/ingestion/parsers/browser_parser.py +171 -0
  135. topos/ingestion/parsers/calendar_parser.py +21 -0
  136. topos/ingestion/parsers/chatgpt_conversation_flattener.py +266 -0
  137. topos/ingestion/parsers/chatgpt_parser.py +67 -0
  138. topos/ingestion/parsers/grok_parser.py +21 -0
  139. topos/ingestion/parsers/messenger_parser.py +97 -0
  140. topos/ingestion/progress.py +54 -0
  141. topos/ingestion/sources/__init__.py +20 -0
  142. topos/ingestion/sources/base.py +39 -0
  143. topos/ingestion/sources/calendar.py +29 -0
  144. topos/ingestion/sources/chatgpt.py +29 -0
  145. topos/ingestion/sources/contact_importers.py +274 -0
  146. topos/ingestion/sources/grok.py +29 -0
  147. topos/ingestion/sources/imessage_reader.py +479 -0
  148. topos/ingestion/sources/signal_export_parser.py +132 -0
  149. topos/ingestion/sources/signal_reader.py +491 -0
  150. topos/ingestion/state_machine.py +70 -0
  151. topos/ingestion/triggers/__init__.py +1 -0
  152. topos/ingestion/triggers/file_trigger.py +36 -0
  153. topos/ingestion/triggers/sqlite_trigger.py +18 -0
  154. topos/ingestion/validation/__init__.py +1 -0
  155. topos/ingestion/validation/base.py +27 -0
  156. topos/ingestion/validation/schema_registry.py +111 -0
  157. topos/ingestion/validation/schema_validator.py +13 -0
  158. topos/lineage/__init__.py +1 -0
  159. topos/lineage/provenance.py +9 -0
  160. topos/lineage/tracker.py +9 -0
  161. topos/mcp_stdio_proxy.py +83 -0
  162. topos/observability/__init__.py +1 -0
  163. topos/observability/alerts.py +7 -0
  164. topos/observability/metrics.py +25 -0
  165. topos/observability/tracing.py +18 -0
  166. topos/openai_client.py +69 -0
  167. topos/projections/__init__.py +1 -0
  168. topos/projections/vector_index/__init__.py +1 -0
  169. topos/projections/vector_index/base.py +21 -0
  170. topos/projections/vector_index/builders.py +11 -0
  171. topos/projections/vector_index/health_checks.py +5 -0
  172. topos/rate_limit.py +43 -0
  173. topos/sanitization/__init__.py +16 -0
  174. topos/sanitization/ollama_transforms.py +276 -0
  175. topos/scope_resolution.py +89 -0
  176. topos/services/__init__.py +1 -0
  177. topos/services/container.py +46 -0
  178. topos/services/embeddings/__init__.py +1 -0
  179. topos/services/embeddings/base.py +7 -0
  180. topos/services/embeddings/local.py +9 -0
  181. topos/services/embeddings/remote.py +9 -0
  182. topos/services/interfaces.py +40 -0
  183. topos/services/llm/__init__.py +1 -0
  184. topos/services/llm/base.py +7 -0
  185. topos/services/llm/openai.py +126 -0
  186. topos/services/local.py +123 -0
  187. topos/services/postgres.py +385 -0
  188. topos/sources/__init__.py +6 -0
  189. topos/sources/definitions.py +114 -0
  190. topos/sources/install_service.py +836 -0
  191. topos/sources/registry.py +263 -0
  192. topos/sources/runtime_install.py +427 -0
  193. topos/storage/__init__.py +1 -0
  194. topos/storage/canonical/__init__.py +18 -0
  195. topos/storage/canonical/ai_chat/__init__.py +22 -0
  196. topos/storage/canonical/ai_chat/canonicalizer.py +147 -0
  197. topos/storage/canonical/ai_chat/mapper.py +168 -0
  198. topos/storage/canonical/ai_chat/model.py +87 -0
  199. topos/storage/canonical/ai_chat/tables.py +179 -0
  200. topos/storage/canonical/canonical_store.py +24 -0
  201. topos/storage/canonical/conversations_tables.py +1020 -0
  202. topos/storage/canonical/mapping_store.py +30 -0
  203. topos/storage/canonical/postgres.py +10 -0
  204. topos/storage/db/__init__.py +1 -0
  205. topos/storage/db/client.py +8 -0
  206. topos/storage/db/migrations/__init__.py +1 -0
  207. topos/storage/db/migrations/stage9_column_renames.py +78 -0
  208. topos/storage/db/paths.py +122 -0
  209. topos/storage/db/postgres.py +240 -0
  210. topos/storage/db/schema.py +6 -0
  211. topos/storage/enrichment/__init__.py +1 -0
  212. topos/storage/enrichment/canonical_enrichment_store.py +7 -0
  213. topos/storage/enrichment/raw_enrichment_store.py +18 -0
  214. topos/storage/normalized/__init__.py +1 -0
  215. topos/storage/normalized/normalized_store.py +24 -0
  216. topos/storage/oplog/__init__.py +1 -0
  217. topos/storage/oplog/decision.py +6 -0
  218. topos/storage/oplog/oplog_store.py +17 -0
  219. topos/storage/oplog/postgres.py +10 -0
  220. topos/storage/projections/__init__.py +1 -0
  221. topos/storage/projections/index_ops_store.py +6 -0
  222. topos/storage/projections/vector_index_store.py +6 -0
  223. topos/storage/raw/__init__.py +1 -0
  224. topos/storage/raw/browser_flat_tables.py +303 -0
  225. topos/storage/raw/file_store.py +100 -0
  226. topos/storage/raw/raw_store.py +29 -0
  227. topos/storage/raw/raw_tables_manager.py +295 -0
  228. topos/storage/raw/sqlite_raw_store.py +17 -0
  229. topos/storage/security/encryption.py +21 -0
  230. topos/storage/signal_identity.py +71 -0
  231. topos/storage/source_settings.py +116 -0
  232. topos/storage/user_identity.py +69 -0
  233. topos/sync/__init__.py +5 -0
  234. topos/sync/client.py +272 -0
  235. topos/sync_handlers.py +70 -0
  236. topos/testing/__init__.py +1 -0
  237. topos/testing/lifespan.py +7 -0
  238. topos/uma_contact_enrichment.py +1032 -0
  239. topos/uma_filters.py +669 -0
  240. topos/uma_resource_id.py +24 -0
  241. topos/uma_rpt.py +69 -0
  242. topos/utils/base_object.py +61 -0
  243. topos/websocket_client.py +21 -0
  244. topos_node-0.1.0.dist-info/METADATA +199 -0
  245. topos_node-0.1.0.dist-info/RECORD +249 -0
  246. topos_node-0.1.0.dist-info/WHEEL +5 -0
  247. topos_node-0.1.0.dist-info/entry_points.txt +2 -0
  248. topos_node-0.1.0.dist-info/licenses/LICENSE +201 -0
  249. topos_node-0.1.0.dist-info/top_level.txt +2 -0
@@ -0,0 +1,263 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import List
4
+
5
+ from .definitions import DataSourceDefinition
6
+ from shared.filtering import FilterInstance, FilterManifest
7
+
8
+
9
+ def _manifest(*filters: FilterInstance) -> dict:
10
+ return FilterManifest(filters=list(filters)).to_storage_dict()
11
+
12
+
13
+ CHATGPT_FILE = DataSourceDefinition(
14
+ source_id="chatgpt_file_ingestion",
15
+ display_name="ChatGPT File Ingestion",
16
+ source_type="file",
17
+ schema_id="chatgpt.conversation.v2", # Updated to v2 for real ChatGPT data
18
+ parser_id="chatgpt.conversation.v2", # Updated to v2
19
+ canonical_mapper_id="chatgpt",
20
+ canonical_group_id="ai_messages",
21
+ raw_enrichment_jobs=["attachments", "tool_calls", "language", "time_normalization"],
22
+ canonical_enrichment_jobs=["entities", "topics", "sentiment", "embeddings", "emo_27"],
23
+ analytics_profile_id="chatgpt_dev",
24
+ enrichment_trigger="manual", # Enrichment skipped during ingestion, trigger via POST /v1/enrichment/process
25
+ ingestion_trigger="manual", # Ingestion processing waits for manual trigger after upload
26
+ default_scope_id="aiMessages",
27
+ allowed_scope_ids=["aiMessages:read", "aiChat:read"],
28
+ default_filter_hints=["rolling_window_days", "max_rows"],
29
+ filter_tier_kind="sensitivity",
30
+ default_filter_tiers={
31
+ "low": _manifest(FilterInstance(filter_id="rolling_window_days", params={"days": 90})),
32
+ "medium": _manifest(
33
+ FilterInstance(filter_id="rolling_window_days", params={"days": 30}),
34
+ FilterInstance(filter_id="max_rows", params={"count": 500}),
35
+ ),
36
+ "high": _manifest(
37
+ FilterInstance(filter_id="rolling_window_days", params={"days": 7}),
38
+ FilterInstance(filter_id="max_rows", params={"count": 100}),
39
+ ),
40
+ },
41
+ field_transform_defaults=[
42
+ {"table_id": "ai_chat_messages", "field": "content", "transform_ids": ["pii_redaction", "nsfw_sanitization"]},
43
+ {"table_id": "ai_chat_messages", "field": "event_at", "transform_ids": ["timestamp_to_date"]},
44
+ ],
45
+ )
46
+
47
+ CHATGPT_UI = DataSourceDefinition(
48
+ source_id="chatgpt_ui_conversation",
49
+ display_name="ChatGPT UI Conversation",
50
+ source_type="ui_stream",
51
+ schema_id="chatgpt.conversation.v1",
52
+ parser_id="chatgpt.conversation.v1",
53
+ canonical_mapper_id="chatgpt",
54
+ canonical_group_id="ai_messages",
55
+ raw_enrichment_jobs=["attachments", "tool_calls", "language", "time_normalization"],
56
+ canonical_enrichment_jobs=["entities", "topics", "sentiment", "embeddings", "emo_27"],
57
+ analytics_profile_id="chatgpt_dev",
58
+ enrichment_trigger="automatic", # Enrichment runs automatically during ingestion
59
+ default_scope_id="aiMessages",
60
+ allowed_scope_ids=["aiMessages:read", "aiChat:read"],
61
+ default_filter_hints=["rolling_window_days", "max_rows"],
62
+ filter_tier_kind="sensitivity",
63
+ default_filter_tiers={
64
+ "low": _manifest(FilterInstance(filter_id="rolling_window_days", params={"days": 90})),
65
+ "medium": _manifest(
66
+ FilterInstance(filter_id="rolling_window_days", params={"days": 30}),
67
+ FilterInstance(filter_id="max_rows", params={"count": 500}),
68
+ ),
69
+ "high": _manifest(
70
+ FilterInstance(filter_id="rolling_window_days", params={"days": 7}),
71
+ FilterInstance(filter_id="max_rows", params={"count": 100}),
72
+ ),
73
+ },
74
+ field_transform_defaults=[
75
+ {"table_id": "ai_chat_messages", "field": "content", "transform_ids": ["pii_redaction", "nsfw_sanitization"]},
76
+ {"table_id": "ai_chat_messages", "field": "event_at", "transform_ids": ["timestamp_to_date"]},
77
+ ],
78
+ )
79
+
80
+ # Sprint 3: Browser plugin source
81
+ BROWSER_VISITS = DataSourceDefinition(
82
+ source_id="browser_visits",
83
+ display_name="Browser Visits",
84
+ source_type="ui_stream",
85
+ schema_id="browser.visits.v1",
86
+ parser_id="browser.visits.v1",
87
+ canonical_mapper_id=None, # No canonical mapping for MVP
88
+ canonical_group_id=None,
89
+ raw_enrichment_jobs=["url_classification"], # Classify URL category during browser ingestion
90
+ canonical_enrichment_jobs=[],
91
+ analytics_profile_id=None,
92
+ enrichment_trigger="manual", # No automatic enrichment
93
+ ingestion_trigger="automatic",
94
+ default_scope_id="activity",
95
+ allowed_scope_ids=["activity:read", "activity:write"],
96
+ default_filter_hints=["rolling_window_days", "timestamp_to_date", "column_blocklist"],
97
+ filter_tier_kind="inferability",
98
+ default_filter_tiers={
99
+ "low": _manifest(FilterInstance(filter_id="rolling_window_days", params={"days": 30})),
100
+ "medium": _manifest(
101
+ FilterInstance(filter_id="rolling_window_days", params={"days": 14}),
102
+ FilterInstance(filter_id="timestamp_to_date", params={}),
103
+ ),
104
+ "high": _manifest(
105
+ FilterInstance(filter_id="rolling_window_days", params={"days": 7}),
106
+ FilterInstance(filter_id="timestamp_to_date", params={}),
107
+ FilterInstance(filter_id="column_blocklist", params={"fields": ["url"]}),
108
+ ),
109
+ },
110
+ field_transform_defaults=[
111
+ {"table_id": "browser_visits", "field": "url", "transform_ids": ["pii_redaction"]},
112
+ {"table_id": "browser_visits", "field": "title", "transform_ids": ["pii_redaction"]},
113
+ {"table_id": "browser_visits", "field": "visited_at", "transform_ids": ["timestamp_to_date"]},
114
+ ],
115
+ )
116
+
117
+ # Browser plugin events: clicks, highlights, star_page, VIDEO_PLAY
118
+ BROWSER_EVENTS = DataSourceDefinition(
119
+ source_id="browser_events",
120
+ display_name="Browser Events",
121
+ source_type="ui_stream",
122
+ schema_id="browser.events.v1",
123
+ parser_id="browser.events.v1",
124
+ canonical_mapper_id=None,
125
+ canonical_group_id=None,
126
+ raw_enrichment_jobs=[],
127
+ canonical_enrichment_jobs=[],
128
+ analytics_profile_id=None,
129
+ enrichment_trigger="manual",
130
+ ingestion_trigger="automatic",
131
+ default_scope_id="activity",
132
+ allowed_scope_ids=["activity:read", "activity:write"],
133
+ default_filter_hints=["rolling_window_days", "timestamp_to_date"],
134
+ filter_tier_kind="inferability",
135
+ default_filter_tiers={
136
+ "low": _manifest(FilterInstance(filter_id="rolling_window_days", params={"days": 30})),
137
+ "medium": _manifest(
138
+ FilterInstance(filter_id="rolling_window_days", params={"days": 14}),
139
+ FilterInstance(filter_id="timestamp_to_date", params={}),
140
+ ),
141
+ "high": _manifest(
142
+ FilterInstance(filter_id="rolling_window_days", params={"days": 7}),
143
+ FilterInstance(filter_id="timestamp_to_date", params={}),
144
+ FilterInstance(filter_id="max_rows", params={"count": 250}),
145
+ ),
146
+ },
147
+ field_transform_defaults=[
148
+ {"table_id": "browser_events", "field": "url", "transform_ids": ["pii_redaction"]},
149
+ {"table_id": "browser_events", "field": "title", "transform_ids": ["pii_redaction"]},
150
+ {"table_id": "browser_events", "field": "content", "transform_ids": ["pii_redaction", "nsfw_sanitization"]},
151
+ {"table_id": "browser_events", "field": "visited_at", "transform_ids": ["timestamp_to_date"]},
152
+ ],
153
+ )
154
+
155
+ # Sprint 02: Messenger ingestion (local_sync -> conversation_messages)
156
+ IMESSAGE = DataSourceDefinition(
157
+ source_id="imessage",
158
+ display_name="iMessage",
159
+ source_type="local_sync",
160
+ schema_id="imessage.messages.v1",
161
+ parser_id="imessage.messages.v1",
162
+ canonical_mapper_id="imessage",
163
+ canonical_group_id="conversations",
164
+ raw_enrichment_jobs=[],
165
+ canonical_enrichment_jobs=["emo_27"],
166
+ analytics_profile_id=None,
167
+ enrichment_trigger="automatic",
168
+ ingestion_trigger="automatic", # Sync runs on schedule or "Sync now"
169
+ default_scope_id="messages",
170
+ allowed_scope_ids=["messages:read", "messages:write"],
171
+ default_filter_hints=["rolling_window_days", "max_rows", "timestamp_to_date"],
172
+ filter_tier_kind="sensitivity",
173
+ default_filter_tiers={
174
+ "low": _manifest(FilterInstance(filter_id="rolling_window_days", params={"days": 90})),
175
+ "medium": _manifest(
176
+ FilterInstance(filter_id="rolling_window_days", params={"days": 30}),
177
+ FilterInstance(filter_id="max_rows", params={"count": 1000}),
178
+ FilterInstance(filter_id="timestamp_to_date", params={}),
179
+ ),
180
+ "high": _manifest(
181
+ FilterInstance(filter_id="rolling_window_days", params={"days": 14}),
182
+ FilterInstance(filter_id="max_rows", params={"count": 250}),
183
+ FilterInstance(filter_id="timestamp_to_date", params={}),
184
+ ),
185
+ },
186
+ field_transform_defaults=[
187
+ {"table_id": "conversation_messages", "field": "content", "transform_ids": ["pii_redaction", "nsfw_sanitization"]},
188
+ {"table_id": "conversation_messages", "field": "event_at", "transform_ids": ["timestamp_to_date"]},
189
+ ],
190
+ )
191
+
192
+ SIGNAL = DataSourceDefinition(
193
+ source_id="signal",
194
+ display_name="Signal Desktop",
195
+ source_type="local_sync",
196
+ schema_id="signal.messages.v1",
197
+ parser_id="signal.messages.v1",
198
+ canonical_mapper_id="signal",
199
+ canonical_group_id="conversations",
200
+ raw_enrichment_jobs=[],
201
+ canonical_enrichment_jobs=["emo_27"],
202
+ analytics_profile_id=None,
203
+ enrichment_trigger="automatic",
204
+ ingestion_trigger="automatic",
205
+ default_scope_id="messages",
206
+ allowed_scope_ids=["messages:read", "messages:write"],
207
+ default_filter_hints=["rolling_window_days", "max_rows", "timestamp_to_date"],
208
+ filter_tier_kind="sensitivity",
209
+ default_filter_tiers={
210
+ "low": _manifest(FilterInstance(filter_id="rolling_window_days", params={"days": 90})),
211
+ "medium": _manifest(
212
+ FilterInstance(filter_id="rolling_window_days", params={"days": 30}),
213
+ FilterInstance(filter_id="max_rows", params={"count": 1000}),
214
+ FilterInstance(filter_id="timestamp_to_date", params={}),
215
+ ),
216
+ "high": _manifest(
217
+ FilterInstance(filter_id="rolling_window_days", params={"days": 14}),
218
+ FilterInstance(filter_id="max_rows", params={"count": 250}),
219
+ FilterInstance(filter_id="timestamp_to_date", params={}),
220
+ ),
221
+ },
222
+ field_transform_defaults=[
223
+ {"table_id": "conversation_messages", "field": "content", "transform_ids": ["pii_redaction", "nsfw_sanitization"]},
224
+ {"table_id": "conversation_messages", "field": "event_at", "transform_ids": ["timestamp_to_date"]},
225
+ ],
226
+ )
227
+
228
+ REGISTRY = {
229
+ CHATGPT_FILE.source_id: CHATGPT_FILE,
230
+ CHATGPT_UI.source_id: CHATGPT_UI,
231
+ BROWSER_VISITS.source_id: BROWSER_VISITS,
232
+ BROWSER_EVENTS.source_id: BROWSER_EVENTS,
233
+ IMESSAGE.source_id: IMESSAGE,
234
+ SIGNAL.source_id: SIGNAL,
235
+ }
236
+
237
+
238
+ def list_sources() -> list[DataSourceDefinition]:
239
+ return list(REGISTRY.values())
240
+
241
+
242
+ def get_sources_by_scope(scope_id: str) -> List[str]:
243
+ """
244
+ Return source_id list for sources whose default_scope_id or allowed_scope_ids match scope_id.
245
+ scope_id may be the base name without :read/:write (e.g. 'messages') or a full MVP scope id.
246
+ Used by Topos/Control Plane for scope → source resolution.
247
+ """
248
+ scope_id = (scope_id or "").strip()
249
+ if not scope_id:
250
+ return []
251
+ scope_base = scope_id.split(":", 1)[0]
252
+ return [
253
+ defn.source_id
254
+ for defn in REGISTRY.values()
255
+ if (
256
+ (defn.default_scope_id or "").strip() == scope_id
257
+ or (defn.default_scope_id or "").strip() == scope_base
258
+ or any(
259
+ (allowed or "").strip() == scope_id or (allowed or "").strip().split(":", 1)[0] == scope_base
260
+ for allowed in (defn.allowed_scope_ids or [])
261
+ )
262
+ )
263
+ ]