topos-node 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (249) hide show
  1. shared/__init__.py +59 -0
  2. shared/filtering.py +640 -0
  3. shared/schema_registry.py +229 -0
  4. topos/__init__.py +5 -0
  5. topos/__version__.py +6 -0
  6. topos/analytics/__init__.py +15 -0
  7. topos/analytics/duckdb_adapter.py +48 -0
  8. topos/analytics/messenger_communities.py +349 -0
  9. topos/analytics/messenger_graph.py +522 -0
  10. topos/analytics/messenger_labels.py +321 -0
  11. topos/analytics/profiles.py +22 -0
  12. topos/analytics/query_engine.py +64 -0
  13. topos/analytics/raw_queries.py +174 -0
  14. topos/api/__init__.py +1 -0
  15. topos/api/analytics.py +52 -0
  16. topos/api/app_registry.py +31 -0
  17. topos/api/backup.py +15 -0
  18. topos/api/compute_remote.py +175 -0
  19. topos/api/data_commit.py +158 -0
  20. topos/api/data_explorer_table_prefs.py +81 -0
  21. topos/api/db.py +10 -0
  22. topos/api/device.py +25 -0
  23. topos/api/enrichment.py +959 -0
  24. topos/api/filter_lab.py +195 -0
  25. topos/api/health.py +61 -0
  26. topos/api/ingestion_api.py +37 -0
  27. topos/api/ingestion_compat.py +21 -0
  28. topos/api/ingestion_sources.py +600 -0
  29. topos/api/llm.py +76 -0
  30. topos/api/local_mcp.py +46 -0
  31. topos/api/messenger_analytics.py +385 -0
  32. topos/api/query_api.py +13 -0
  33. topos/api/sanitization_ollama_config.py +64 -0
  34. topos/api/source_install.py +324 -0
  35. topos/api/sources.py +13 -0
  36. topos/api/sync.py +10 -0
  37. topos/api/ui_config.py +83 -0
  38. topos/api/uma_data.py +311 -0
  39. topos/api/usage.py +49 -0
  40. topos/api/user_identity.py +46 -0
  41. topos/app.py +239 -0
  42. topos/auth.py +17 -0
  43. topos/canonicalization/__init__.py +1 -0
  44. topos/canonicalization/mappers/__init__.py +22 -0
  45. topos/canonicalization/mappers/base.py +26 -0
  46. topos/canonicalization/mappers/chatgpt_mapper.py +40 -0
  47. topos/canonicalization/mappers/grok_mapper.py +17 -0
  48. topos/canonicalization/mappers/messenger_mapper.py +58 -0
  49. topos/canonicalization/models.py +31 -0
  50. topos/canonicalization/resolver.py +23 -0
  51. topos/cli/__init__.py +1 -0
  52. topos/cli/__main__.py +6 -0
  53. topos/cli/commands.py +132 -0
  54. topos/config/__init__.py +1 -0
  55. topos/config/sanitization_ollama.py +189 -0
  56. topos/config/settings.py +310 -0
  57. topos/contacts/__init__.py +5 -0
  58. topos/contacts/identity.py +24 -0
  59. topos/control_plane_client.py +300 -0
  60. topos/core/__init__.py +1 -0
  61. topos/core/api_models.py +128 -0
  62. topos/core/connection_resilience.py +99 -0
  63. topos/core/device_helpers.py +8 -0
  64. topos/core/errors.py +13 -0
  65. topos/core/events.py +12 -0
  66. topos/core/handlers.py +5625 -0
  67. topos/core/logging.py +175 -0
  68. topos/core/metrics.py +21 -0
  69. topos/core/startup_banner.py +62 -0
  70. topos/core/state.py +682 -0
  71. topos/core/table_layers.py +45 -0
  72. topos/core/types.py +13 -0
  73. topos/data_explorer_table_prefs.py +150 -0
  74. topos/engine/__init__.py +29 -0
  75. topos/engine/backends/__init__.py +50 -0
  76. topos/engine/backends/base.py +21 -0
  77. topos/engine/backends/huggingface.py +151 -0
  78. topos/engine/backends/ollama.py +181 -0
  79. topos/engine/backends/stub.py +22 -0
  80. topos/engine/engine.py +165 -0
  81. topos/engine/intake.py +32 -0
  82. topos/engine/queue_manager.py +112 -0
  83. topos/engine/registration.py +126 -0
  84. topos/engine/result_formatter.py +38 -0
  85. topos/engine/router.py +19 -0
  86. topos/engine/scoped_token.py +82 -0
  87. topos/engine/tasks.py +154 -0
  88. topos/engine/transport.py +44 -0
  89. topos/engine/usage_guard.py +100 -0
  90. topos/engine/usage_observation.py +129 -0
  91. topos/engine/validator.py +23 -0
  92. topos/enrichment/__init__.py +1 -0
  93. topos/enrichment/derived_tables.py +214 -0
  94. topos/enrichment/jobs/__init__.py +30 -0
  95. topos/enrichment/jobs/base.py +54 -0
  96. topos/enrichment/jobs/canonical/__init__.py +1 -0
  97. topos/enrichment/jobs/canonical/embeddings_job.py +27 -0
  98. topos/enrichment/jobs/canonical/emo_27_job.py +97 -0
  99. topos/enrichment/jobs/canonical/entities_job.py +27 -0
  100. topos/enrichment/jobs/canonical/sentiment_job.py +27 -0
  101. topos/enrichment/jobs/canonical/topics_job.py +27 -0
  102. topos/enrichment/jobs/raw/__init__.py +1 -0
  103. topos/enrichment/jobs/raw/attachments_job.py +12 -0
  104. topos/enrichment/jobs/raw/language_job.py +12 -0
  105. topos/enrichment/jobs/raw/time_normalization_job.py +12 -0
  106. topos/enrichment/jobs/raw/tool_calls_job.py +12 -0
  107. topos/enrichment/models/__init__.py +1 -0
  108. topos/enrichment/models/manager.py +8 -0
  109. topos/enrichment/models/registry.py +71 -0
  110. topos/enrichment/models/versioning.py +8 -0
  111. topos/enrichment/orchestrator.py +177 -0
  112. topos/enrichment/processor.py +17 -0
  113. topos/enrichment/progress_bar.py +122 -0
  114. topos/enrichment/website_classifier.py +31 -0
  115. topos/filter_lab/__init__.py +1 -0
  116. topos/filter_lab/bundles.py +300 -0
  117. topos/filter_lab/schema.py +86 -0
  118. topos/filter_lab/service.py +167 -0
  119. topos/filter_lab/store.py +374 -0
  120. topos/filter_lab/worker.py +250 -0
  121. topos/hosted_pool_lease.py +153 -0
  122. topos/ingestion/__init__.py +1 -0
  123. topos/ingestion/checkpoints/__init__.py +6 -0
  124. topos/ingestion/checkpoints/checkpoint_store.py +24 -0
  125. topos/ingestion/checkpoints/sqlite_checkpoint_store.py +82 -0
  126. topos/ingestion/ingest_helpers.py +504 -0
  127. topos/ingestion/jobs.py +91 -0
  128. topos/ingestion/local_sync.py +823 -0
  129. topos/ingestion/log_preview.py +21 -0
  130. topos/ingestion/manager.py +1100 -0
  131. topos/ingestion/parser.py +174 -0
  132. topos/ingestion/parsers/__init__.py +32 -0
  133. topos/ingestion/parsers/base.py +24 -0
  134. topos/ingestion/parsers/browser_parser.py +171 -0
  135. topos/ingestion/parsers/calendar_parser.py +21 -0
  136. topos/ingestion/parsers/chatgpt_conversation_flattener.py +266 -0
  137. topos/ingestion/parsers/chatgpt_parser.py +67 -0
  138. topos/ingestion/parsers/grok_parser.py +21 -0
  139. topos/ingestion/parsers/messenger_parser.py +97 -0
  140. topos/ingestion/progress.py +54 -0
  141. topos/ingestion/sources/__init__.py +20 -0
  142. topos/ingestion/sources/base.py +39 -0
  143. topos/ingestion/sources/calendar.py +29 -0
  144. topos/ingestion/sources/chatgpt.py +29 -0
  145. topos/ingestion/sources/contact_importers.py +274 -0
  146. topos/ingestion/sources/grok.py +29 -0
  147. topos/ingestion/sources/imessage_reader.py +479 -0
  148. topos/ingestion/sources/signal_export_parser.py +132 -0
  149. topos/ingestion/sources/signal_reader.py +491 -0
  150. topos/ingestion/state_machine.py +70 -0
  151. topos/ingestion/triggers/__init__.py +1 -0
  152. topos/ingestion/triggers/file_trigger.py +36 -0
  153. topos/ingestion/triggers/sqlite_trigger.py +18 -0
  154. topos/ingestion/validation/__init__.py +1 -0
  155. topos/ingestion/validation/base.py +27 -0
  156. topos/ingestion/validation/schema_registry.py +111 -0
  157. topos/ingestion/validation/schema_validator.py +13 -0
  158. topos/lineage/__init__.py +1 -0
  159. topos/lineage/provenance.py +9 -0
  160. topos/lineage/tracker.py +9 -0
  161. topos/mcp_stdio_proxy.py +83 -0
  162. topos/observability/__init__.py +1 -0
  163. topos/observability/alerts.py +7 -0
  164. topos/observability/metrics.py +25 -0
  165. topos/observability/tracing.py +18 -0
  166. topos/openai_client.py +69 -0
  167. topos/projections/__init__.py +1 -0
  168. topos/projections/vector_index/__init__.py +1 -0
  169. topos/projections/vector_index/base.py +21 -0
  170. topos/projections/vector_index/builders.py +11 -0
  171. topos/projections/vector_index/health_checks.py +5 -0
  172. topos/rate_limit.py +43 -0
  173. topos/sanitization/__init__.py +16 -0
  174. topos/sanitization/ollama_transforms.py +276 -0
  175. topos/scope_resolution.py +89 -0
  176. topos/services/__init__.py +1 -0
  177. topos/services/container.py +46 -0
  178. topos/services/embeddings/__init__.py +1 -0
  179. topos/services/embeddings/base.py +7 -0
  180. topos/services/embeddings/local.py +9 -0
  181. topos/services/embeddings/remote.py +9 -0
  182. topos/services/interfaces.py +40 -0
  183. topos/services/llm/__init__.py +1 -0
  184. topos/services/llm/base.py +7 -0
  185. topos/services/llm/openai.py +126 -0
  186. topos/services/local.py +123 -0
  187. topos/services/postgres.py +385 -0
  188. topos/sources/__init__.py +6 -0
  189. topos/sources/definitions.py +114 -0
  190. topos/sources/install_service.py +836 -0
  191. topos/sources/registry.py +263 -0
  192. topos/sources/runtime_install.py +427 -0
  193. topos/storage/__init__.py +1 -0
  194. topos/storage/canonical/__init__.py +18 -0
  195. topos/storage/canonical/ai_chat/__init__.py +22 -0
  196. topos/storage/canonical/ai_chat/canonicalizer.py +147 -0
  197. topos/storage/canonical/ai_chat/mapper.py +168 -0
  198. topos/storage/canonical/ai_chat/model.py +87 -0
  199. topos/storage/canonical/ai_chat/tables.py +179 -0
  200. topos/storage/canonical/canonical_store.py +24 -0
  201. topos/storage/canonical/conversations_tables.py +1020 -0
  202. topos/storage/canonical/mapping_store.py +30 -0
  203. topos/storage/canonical/postgres.py +10 -0
  204. topos/storage/db/__init__.py +1 -0
  205. topos/storage/db/client.py +8 -0
  206. topos/storage/db/migrations/__init__.py +1 -0
  207. topos/storage/db/migrations/stage9_column_renames.py +78 -0
  208. topos/storage/db/paths.py +122 -0
  209. topos/storage/db/postgres.py +240 -0
  210. topos/storage/db/schema.py +6 -0
  211. topos/storage/enrichment/__init__.py +1 -0
  212. topos/storage/enrichment/canonical_enrichment_store.py +7 -0
  213. topos/storage/enrichment/raw_enrichment_store.py +18 -0
  214. topos/storage/normalized/__init__.py +1 -0
  215. topos/storage/normalized/normalized_store.py +24 -0
  216. topos/storage/oplog/__init__.py +1 -0
  217. topos/storage/oplog/decision.py +6 -0
  218. topos/storage/oplog/oplog_store.py +17 -0
  219. topos/storage/oplog/postgres.py +10 -0
  220. topos/storage/projections/__init__.py +1 -0
  221. topos/storage/projections/index_ops_store.py +6 -0
  222. topos/storage/projections/vector_index_store.py +6 -0
  223. topos/storage/raw/__init__.py +1 -0
  224. topos/storage/raw/browser_flat_tables.py +303 -0
  225. topos/storage/raw/file_store.py +100 -0
  226. topos/storage/raw/raw_store.py +29 -0
  227. topos/storage/raw/raw_tables_manager.py +295 -0
  228. topos/storage/raw/sqlite_raw_store.py +17 -0
  229. topos/storage/security/encryption.py +21 -0
  230. topos/storage/signal_identity.py +71 -0
  231. topos/storage/source_settings.py +116 -0
  232. topos/storage/user_identity.py +69 -0
  233. topos/sync/__init__.py +5 -0
  234. topos/sync/client.py +272 -0
  235. topos/sync_handlers.py +70 -0
  236. topos/testing/__init__.py +1 -0
  237. topos/testing/lifespan.py +7 -0
  238. topos/uma_contact_enrichment.py +1032 -0
  239. topos/uma_filters.py +669 -0
  240. topos/uma_resource_id.py +24 -0
  241. topos/uma_rpt.py +69 -0
  242. topos/utils/base_object.py +61 -0
  243. topos/websocket_client.py +21 -0
  244. topos_node-0.1.0.dist-info/METADATA +199 -0
  245. topos_node-0.1.0.dist-info/RECORD +249 -0
  246. topos_node-0.1.0.dist-info/WHEEL +5 -0
  247. topos_node-0.1.0.dist-info/entry_points.txt +2 -0
  248. topos_node-0.1.0.dist-info/licenses/LICENSE +201 -0
  249. topos_node-0.1.0.dist-info/top_level.txt +2 -0
topos/api/uma_data.py ADDED
@@ -0,0 +1,311 @@
1
+ # Topos UMA scoped data endpoints (US-3.8) and filter enforcement (US-3.6)
2
+ # Sprint 05: scope resolution — return data only for tables allowed by RPT scopes.
3
+ # See: control_plane/uma/uma_sprints/SPRINT_3_USER_SHARING.md, sprints_roles_scopes_stage_1/SPRINT_05_TOPOS_SCOPE_RESOLUTION.md
4
+
5
+ from __future__ import annotations
6
+
7
+ import hashlib
8
+
9
+ from typing import Any, Dict, List, Optional, Set, Tuple
10
+
11
+ from fastapi import APIRouter, HTTPException, Query, Request, status
12
+
13
+ from ..core.state import get_db_connection
14
+ from ..uma_rpt import RPTValidationError, get_control_plane_http_base, introspect_for_resource
15
+ from ..uma_filters import UMAFilterError, apply_filter_manifest, apply_filters, extract_field_transforms, extract_filter_manifest, get_limit_cap
16
+ from ..scope_resolution import resolve_scopes_to_tables, may_access_table
17
+ from ..uma_contact_enrichment import apply_message_contact_pipeline, strip_contact_runtime_filters
18
+ from ..uma_resource_id import parse_dataset_id_from_uma_dataset_resource_id
19
+ from ..engine.usage_observation import emit_usage_observation
20
+
21
+ router = APIRouter(prefix="/v1/uma/resources", tags=["uma-data"])
22
+
23
+
24
+ def _table_exists(conn, table_name: str) -> bool:
25
+ try:
26
+ cursor = conn.execute(
27
+ "SELECT name FROM sqlite_master WHERE type='table' AND name=?",
28
+ (table_name,),
29
+ )
30
+ return cursor.fetchone() is not None
31
+ except Exception:
32
+ return False
33
+
34
+
35
+ def _sqlite_table_columns(conn, table: str) -> Set[str]:
36
+ try:
37
+ cur = conn.execute('PRAGMA table_info("{}")'.format(table.replace('"', "")))
38
+ return {str(row[1]) for row in cur.fetchall()}
39
+ except Exception:
40
+ return set()
41
+
42
+
43
+ def _message_time_order_column(conn, table: str) -> str:
44
+ """Prefer event_at when present; else ts (Stage 6 seed uses ts-only)."""
45
+ cols = _sqlite_table_columns(conn, table)
46
+ if "event_at" in cols:
47
+ return "event_at"
48
+ if "ts" in cols:
49
+ return "ts"
50
+ return "message_id"
51
+
52
+
53
+ def _get_messages_from_db(
54
+ conn,
55
+ dataset_id: Optional[str],
56
+ limit: int,
57
+ offset: int,
58
+ allowed_tables: Optional[Set[str]] = None,
59
+ ) -> List[Dict[str, Any]]:
60
+ """Query only the message-like tables permitted by the granted scopes."""
61
+
62
+ def _fetch_rows(query: str, params: Tuple[int, int]) -> List[Dict[str, Any]]:
63
+ cursor = conn.execute(query, params)
64
+ columns = [d[0] for d in cursor.description]
65
+ return [dict(zip(columns, row)) for row in cursor.fetchall()]
66
+
67
+ def _allowed_message_sources() -> List[str]:
68
+ if not allowed_tables:
69
+ candidates = []
70
+ if _table_exists(conn, "conversation_messages"):
71
+ candidates.append("conversation_messages")
72
+ elif _table_exists(conn, "messages"):
73
+ candidates.append("messages")
74
+ if _table_exists(conn, "ai_chat_messages"):
75
+ candidates.append("ai_chat_messages")
76
+ return candidates
77
+
78
+ candidates: List[str] = []
79
+ if "messages" in allowed_tables:
80
+ if _table_exists(conn, "conversation_messages"):
81
+ candidates.append("conversation_messages")
82
+ elif _table_exists(conn, "messages"):
83
+ candidates.append("messages")
84
+ if "conversation_messages" in allowed_tables and _table_exists(conn, "conversation_messages"):
85
+ if "conversation_messages" not in candidates:
86
+ candidates.append("conversation_messages")
87
+ if (
88
+ {"ai_chat", "ai_messages", "ai_chat_messages"} & set(allowed_tables)
89
+ and _table_exists(conn, "ai_chat_messages")
90
+ ):
91
+ candidates.append("ai_chat_messages")
92
+ return candidates
93
+
94
+ sources = _allowed_message_sources()
95
+ if not sources:
96
+ return []
97
+
98
+ # Keep single-table queries paginated in SQL; when multiple tables are allowed,
99
+ # fetch and merge in Python so limit/offset apply to the combined result set.
100
+ if sources == ["conversation_messages"]:
101
+ oc = _message_time_order_column(conn, "conversation_messages")
102
+ return _fetch_rows(
103
+ f"SELECT * FROM conversation_messages ORDER BY {oc} DESC LIMIT ? OFFSET ?",
104
+ (limit, offset),
105
+ )
106
+ if sources == ["messages"]:
107
+ return _fetch_rows(
108
+ "SELECT * FROM messages ORDER BY ts DESC LIMIT ? OFFSET ?",
109
+ (limit, offset),
110
+ )
111
+ if sources == ["ai_chat_messages"]:
112
+ oc = _message_time_order_column(conn, "ai_chat_messages")
113
+ return _fetch_rows(
114
+ f"SELECT * FROM ai_chat_messages ORDER BY {oc} DESC LIMIT ? OFFSET ?",
115
+ (limit, offset),
116
+ )
117
+
118
+ merged: List[Dict[str, Any]] = []
119
+ if "conversation_messages" in sources:
120
+ oc_cm = _message_time_order_column(conn, "conversation_messages")
121
+ merged.extend(
122
+ _fetch_rows(
123
+ f"SELECT * FROM conversation_messages ORDER BY {oc_cm} DESC LIMIT ? OFFSET ?",
124
+ (limit + offset, 0),
125
+ )
126
+ )
127
+ if "messages" in sources:
128
+ merged.extend(
129
+ _fetch_rows(
130
+ "SELECT * FROM messages ORDER BY ts DESC LIMIT ? OFFSET ?",
131
+ (limit + offset, 0),
132
+ )
133
+ )
134
+ if "ai_chat_messages" in sources:
135
+ oc_ac = _message_time_order_column(conn, "ai_chat_messages")
136
+ merged.extend(
137
+ _fetch_rows(
138
+ f"SELECT * FROM ai_chat_messages ORDER BY {oc_ac} DESC LIMIT ? OFFSET ?",
139
+ (limit + offset, 0),
140
+ )
141
+ )
142
+ merged.sort(key=lambda row: (row.get("event_at") or row.get("ts") or "", row.get("message_id") or ""), reverse=True)
143
+ return merged[offset:offset + limit]
144
+
145
+
146
+ def _get_oplog_from_db(
147
+ conn,
148
+ dataset_id: Optional[str],
149
+ limit: int,
150
+ offset: int,
151
+ ) -> List[Dict[str, Any]]:
152
+ """Query oplog table if it exists; otherwise return empty list."""
153
+ if not _table_exists(conn, "oplog"):
154
+ return []
155
+ query = "SELECT * FROM oplog ORDER BY hlc_ts LIMIT ? OFFSET ?"
156
+ try:
157
+ cursor = conn.execute(query, (limit, offset))
158
+ columns = [d[0] for d in cursor.description]
159
+ return [dict(zip(columns, row)) for row in cursor.fetchall()]
160
+ except Exception:
161
+ return []
162
+
163
+
164
+ def _extract_bearer(request: Request) -> Optional[str]:
165
+ auth = request.headers.get("authorization")
166
+ if not auth or not auth.startswith("Bearer "):
167
+ return None
168
+ return auth[7:].strip() or None
169
+
170
+
171
+ async def require_uma_rpt(request: Request, resource_id: str) -> Dict[str, Any]:
172
+ """
173
+ Dependency: validate RPT for this resource_id via Control Plane introspect_for_resource.
174
+ Sets request.state.uma_introspection and returns it. Raises 401/403 if invalid.
175
+ """
176
+ base = get_control_plane_http_base()
177
+ if not base:
178
+ raise HTTPException(
179
+ status_code=status.HTTP_503_SERVICE_UNAVAILABLE,
180
+ detail="UMA not configured: TOPOS_CONTROL_PLANE_URL required for RPT validation",
181
+ )
182
+ token = _extract_bearer(request)
183
+ if not token:
184
+ raise HTTPException(
185
+ status_code=status.HTTP_401_UNAUTHORIZED,
186
+ detail="Missing or invalid Authorization: Bearer <RPT>",
187
+ )
188
+ try:
189
+ payload = await introspect_for_resource(resource_id=resource_id, token=token)
190
+ except RPTValidationError as e:
191
+ raise HTTPException(status_code=e.status_code, detail=str(e))
192
+ request.state.uma_introspection = payload
193
+ await emit_usage_observation(
194
+ action="uma.permission_ticket.validated",
195
+ quantity=1,
196
+ producer="api.uma_data",
197
+ canonical_action_identity={
198
+ "resource_id": resource_id,
199
+ "rpt_token_sha": hashlib.sha256(token.encode("utf-8")).hexdigest(),
200
+ "scope_count": len(payload.get("allowed_scopes") or []),
201
+ },
202
+ topos_id=parse_dataset_id_from_uma_dataset_resource_id(resource_id),
203
+ trust_class="observe_only",
204
+ metadata={"endpoint": "uma_introspect_for_resource"},
205
+ )
206
+ return payload
207
+
208
+
209
+ @router.get("/{resource_id}/data/messages")
210
+ async def get_uma_messages(
211
+ request: Request,
212
+ resource_id: str,
213
+ limit: int = Query(100, ge=1, le=1000),
214
+ offset: int = Query(0, ge=0),
215
+ dataset_id: Optional[str] = Query(None),
216
+ ):
217
+ """
218
+ Return messages for the UMA resource, filtered by the permission's filters.
219
+ Sprint 05: returns data only if RPT allows messages, ai_messages, or ai_chat scope.
220
+ """
221
+ resource_id = resource_id.strip()
222
+ payload = await require_uma_rpt(request, resource_id)
223
+ allowed_scopes = payload.get("allowed_scopes") or []
224
+ allowed_tables = resolve_scopes_to_tables(allowed_scopes)
225
+ if not allowed_tables:
226
+ return {"messages": [], "count": 0}
227
+ if not (
228
+ may_access_table(allowed_tables, "messages")
229
+ or may_access_table(allowed_tables, "conversation_messages")
230
+ or may_access_table(allowed_tables, "ai_chat_messages")
231
+ or may_access_table(allowed_tables, "ai_chat")
232
+ or may_access_table(allowed_tables, "ai_messages")
233
+ ):
234
+ return {"messages": [], "count": 0}
235
+ conn = get_db_connection()
236
+ if not conn:
237
+ raise HTTPException(
238
+ status_code=status.HTTP_503_SERVICE_UNAVAILABLE,
239
+ detail="Database not initialized",
240
+ )
241
+ filters = (request.state.uma_introspection or {}).get("filters")
242
+ manifest = extract_filter_manifest(filters if isinstance(filters, dict) else None)
243
+ ai_only = bool(
244
+ allowed_tables & {"ai_chat_messages", "ai_messages", "ai_chat"}
245
+ ) and not bool(allowed_tables & {"messages", "conversation_messages"})
246
+ conv_only = bool(allowed_tables & {"messages", "conversation_messages"}) and not bool(
247
+ allowed_tables & {"ai_chat_messages", "ai_messages", "ai_chat"}
248
+ )
249
+ logical_table = "ai_chat_messages" if ai_only else "conversation_messages" if conv_only else None
250
+ limited = get_limit_cap(limit, manifest, logical_table)
251
+ effective_dataset_id = (dataset_id or "").strip() or parse_dataset_id_from_uma_dataset_resource_id(
252
+ resource_id
253
+ )
254
+ items = _get_messages_from_db(conn, effective_dataset_id, limited, offset, allowed_tables=allowed_tables)
255
+ try:
256
+ items, uma_contact_sidecar = apply_message_contact_pipeline(
257
+ items,
258
+ conn=conn,
259
+ dataset_id=effective_dataset_id,
260
+ allowed_scopes=allowed_scopes,
261
+ manifest=manifest,
262
+ filters=filters if isinstance(filters, dict) else None,
263
+ )
264
+ manifest_for_generic = strip_contact_runtime_filters(manifest)
265
+ fts = extract_field_transforms(filters if isinstance(filters, dict) else None)
266
+ filtered = apply_filter_manifest(
267
+ list(items),
268
+ manifest_for_generic,
269
+ field_transforms=fts,
270
+ table_id=logical_table,
271
+ )
272
+ except UMAFilterError as exc:
273
+ raise HTTPException(status_code=status.HTTP_422_UNPROCESSABLE_ENTITY, detail=str(exc)) from exc
274
+ return {
275
+ "messages": filtered,
276
+ "count": len(filtered),
277
+ "message_owner": uma_contact_sidecar.get("message_owner") or {},
278
+ }
279
+
280
+
281
+ @router.get("/{resource_id}/data/oplog")
282
+ async def get_uma_oplog(
283
+ request: Request,
284
+ resource_id: str,
285
+ limit: int = Query(100, ge=1, le=1000),
286
+ offset: int = Query(0, ge=0),
287
+ dataset_id: Optional[str] = Query(None),
288
+ ):
289
+ """
290
+ Return oplog entries for the UMA resource, filtered by the permission's filters.
291
+ Sprint 05: returns data only if RPT has at least one allowed scope (any read access).
292
+ """
293
+ resource_id = resource_id.strip()
294
+ payload = await require_uma_rpt(request, resource_id)
295
+ allowed_scopes = payload.get("allowed_scopes") or []
296
+ allowed_tables = resolve_scopes_to_tables(allowed_scopes)
297
+ if not allowed_tables:
298
+ return {"ops": [], "count": 0}
299
+ conn = get_db_connection()
300
+ if not conn:
301
+ raise HTTPException(
302
+ status_code=status.HTTP_503_SERVICE_UNAVAILABLE,
303
+ detail="Database not initialized",
304
+ )
305
+ items = _get_oplog_from_db(conn, dataset_id, limit, offset)
306
+ filters = (request.state.uma_introspection or {}).get("filters")
307
+ try:
308
+ filtered = apply_filters(items, filters)
309
+ except UMAFilterError as exc:
310
+ raise HTTPException(status_code=status.HTTP_422_UNPROCESSABLE_ENTITY, detail=str(exc)) from exc
311
+ return {"ops": filtered, "count": len(filtered)}
topos/api/usage.py ADDED
@@ -0,0 +1,49 @@
1
+ """Engine-owned request counts: UMA + MCP. GET /api/request-counts (requires API key)."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from fastapi import APIRouter, Depends, Query
6
+
7
+ from ..auth import require_api_key
8
+ from ..core.handlers import handle_control_plane_request
9
+
10
+ router = APIRouter(prefix="/api", tags=["usage"])
11
+
12
+
13
+ @router.get("/request-counts")
14
+ async def get_request_counts(
15
+ owner_user_id: str | None = Query(None, description="Resource owner (default: engine's linked user)"),
16
+ since_days: int = Query(90, ge=1, le=365),
17
+ _: None = Depends(require_api_key), # noqa: B008
18
+ ) -> dict:
19
+ """
20
+ Return UMA and MCP request counts from the engine's DB.
21
+ Same data as get_request_counts message type (for direct frontend or CP proxy).
22
+ """
23
+ import uuid
24
+ msg = {
25
+ "id": str(uuid.uuid4()),
26
+ "type": "get_request_counts",
27
+ "payload": {"owner_user_id": owner_user_id or "", "since_days": since_days},
28
+ }
29
+ out = await handle_control_plane_request(msg)
30
+ if out.get("status") == "error":
31
+ return {
32
+ "uma": {
33
+ "total_read_requests": 0,
34
+ "total_write_requests": 0,
35
+ "by_app": [],
36
+ "by_requesting_user": [],
37
+ "access_attribution": {
38
+ "window_days": since_days,
39
+ "owner_self_reads": 0,
40
+ "owner_self_writes": 0,
41
+ "grantee_reads": 0,
42
+ "grantee_writes": 0,
43
+ "unknown_reads": 0,
44
+ "unknown_writes": 0,
45
+ },
46
+ },
47
+ "mcp": {"by_source": [], "by_tool": [], "by_access_context": [], "total": 0},
48
+ }
49
+ return out.get("payload", {})
@@ -0,0 +1,46 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import Optional
4
+
5
+ from fastapi import APIRouter, Body, Depends, Query
6
+
7
+ from ..auth import require_api_key
8
+ from ..core.state import get_db_connection
9
+ from ..storage.user_identity import get_user_identity, put_user_identity
10
+
11
+ router = APIRouter(tags=["user-identity"])
12
+
13
+
14
+ @router.get("/v1/user-identity", dependencies=[Depends(require_api_key)])
15
+ async def get_user_identity_endpoint(
16
+ dataset_id: Optional[str] = Query(default=None, description="Dataset scope for owner identity"),
17
+ ):
18
+ if not dataset_id:
19
+ return {"status": "error", "error": "dataset_id required"}
20
+ conn = get_db_connection()
21
+ if not conn:
22
+ return {"status": "error", "error": "Database not available"}
23
+ identity = get_user_identity(conn, dataset_id)
24
+ if identity is None:
25
+ return {"status": "ok", "dataset_id": dataset_id, "display_name": None}
26
+ return {"status": "ok", "dataset_id": dataset_id, **identity}
27
+
28
+
29
+ @router.put("/v1/user-identity", dependencies=[Depends(require_api_key)])
30
+ async def put_user_identity_endpoint(
31
+ dataset_id: Optional[str] = Query(default=None),
32
+ display_name: Optional[str] = Query(default=None),
33
+ body: Optional[dict] = Body(default=None),
34
+ ):
35
+ if not dataset_id and body:
36
+ dataset_id = body.get("dataset_id")
37
+ if not dataset_id:
38
+ return {"status": "error", "error": "dataset_id required"}
39
+ next_display_name = display_name if display_name is not None else (body.get("display_name") if body else None)
40
+ if isinstance(next_display_name, str):
41
+ next_display_name = next_display_name.strip() or None
42
+ conn = get_db_connection()
43
+ if not conn:
44
+ return {"status": "error", "error": "Database not available"}
45
+ put_user_identity(conn, dataset_id, display_name=next_display_name)
46
+ return {"status": "ok", "dataset_id": dataset_id, "display_name": next_display_name}
topos/app.py ADDED
@@ -0,0 +1,239 @@
1
+ from __future__ import annotations
2
+
3
+ import asyncio
4
+ import importlib.util
5
+ import logging
6
+ import sys
7
+
8
+ from fastapi import FastAPI
9
+ from fastapi.middleware.cors import CORSMiddleware
10
+
11
+ from .__version__ import __version__
12
+ from .api import (
13
+ analytics as analytics_routes,
14
+ app_registry as app_registry_routes,
15
+ backup as backup_routes,
16
+ compute_remote as compute_remote_routes,
17
+ data_commit as data_commit_routes,
18
+ db as db_routes,
19
+ device as device_routes,
20
+ enrichment as enrichment_routes,
21
+ health as health_routes,
22
+ ingestion_compat as ingestion_compat_routes,
23
+ ingestion_api as ingestion_routes,
24
+ ingestion_sources as ingestion_sources_routes,
25
+ local_mcp as local_mcp_routes,
26
+ llm as llm_routes,
27
+ messenger_analytics as messenger_analytics_routes,
28
+ query_api as query_routes,
29
+ source_install as source_install_routes,
30
+ sources as sources_routes,
31
+ sync as sync_routes,
32
+ uma_data as uma_data_routes,
33
+ user_identity as user_identity_routes,
34
+ usage as usage_routes,
35
+ ui_config as ui_config_routes,
36
+ data_explorer_table_prefs as data_explorer_table_prefs_routes,
37
+ sanitization_ollama_config as sanitization_ollama_config_routes,
38
+ filter_lab as filter_lab_routes,
39
+ )
40
+ from .config.settings import settings
41
+ from .core.logging import configure_logging
42
+ from .core import state
43
+ from .core.handlers import handle_control_plane_request
44
+ from .control_plane_client import ControlPlaneClient
45
+ from .engine.registration import build_engine_heartbeat_message, build_engine_register_message
46
+ from .hosted_pool_lease import HostedPoolLeaseClient
47
+ from .services.container import get_services
48
+ from .sync import SyncClient
49
+ from .sync_handlers import handle_sync_op
50
+
51
+ configure_logging()
52
+ logger = logging.getLogger("topos.app")
53
+
54
+ app = FastAPI(
55
+ title="Topos",
56
+ description="Topos node: Topos Database (data plane) and Topos Engine (compute plane), typically co-deployed in this process.",
57
+ version=__version__,
58
+ )
59
+
60
+ logger.info("CORS allowed origins: %s", settings.allowed_origins)
61
+ if settings.allowed_origin_regex:
62
+ logger.info("CORS allowed origin regex: %s", settings.allowed_origin_regex)
63
+
64
+ app.add_middleware(
65
+ CORSMiddleware,
66
+ allow_origins=settings.allowed_origins,
67
+ allow_origin_regex=settings.allowed_origin_regex,
68
+ allow_credentials=False,
69
+ allow_methods=["*"],
70
+ allow_headers=["*"],
71
+ )
72
+
73
+ app.include_router(health_routes.router)
74
+ app.include_router(local_mcp_routes.router)
75
+ app.include_router(llm_routes.router)
76
+ app.include_router(db_routes.router)
77
+ app.include_router(sync_routes.router)
78
+ app.include_router(device_routes.router)
79
+ app.include_router(backup_routes.router)
80
+ app.include_router(analytics_routes.router)
81
+ app.include_router(sources_routes.router)
82
+ app.include_router(source_install_routes.router, prefix="/v1")
83
+ app.include_router(app_registry_routes.router)
84
+ app.include_router(ingestion_compat_routes.router)
85
+ app.include_router(enrichment_routes.router, prefix="/v1")
86
+ app.include_router(ingestion_routes.router, prefix="/v1")
87
+ app.include_router(ingestion_sources_routes.router)
88
+ app.include_router(query_routes.router, prefix="/v1")
89
+ app.include_router(messenger_analytics_routes.router, prefix="/v1")
90
+ app.include_router(uma_data_routes.router)
91
+ app.include_router(usage_routes.router)
92
+ app.include_router(ui_config_routes.router)
93
+ app.include_router(data_explorer_table_prefs_routes.router)
94
+ app.include_router(user_identity_routes.router)
95
+ app.include_router(sanitization_ollama_config_routes.router)
96
+ app.include_router(filter_lab_routes.router)
97
+ app.include_router(compute_remote_routes.router)
98
+ app.include_router(data_commit_routes.router)
99
+
100
+
101
+ @app.on_event("startup")
102
+ async def startup_event() -> None:
103
+ logger.info("Runtime Python executable: %s", sys.executable)
104
+ logger.info(
105
+ "Runtime deps available: transformers=%s torch=%s",
106
+ bool(importlib.util.find_spec("transformers")),
107
+ bool(importlib.util.find_spec("torch")),
108
+ )
109
+ # Tests may inject an in-memory connection before startup.
110
+ # Avoid initializing file-backed services in that case.
111
+ if state.db_conn is None:
112
+ _ = get_services()
113
+ # Run Stage 9 column renames at startup so request handlers never block the event loop on migration.
114
+ try:
115
+ from .core.state import db_conn, get_db_connection
116
+ from .storage.db.migrations.stage9_column_renames import run_stage9_migrations
117
+ # Respect pre-injected test connections; avoid replacing test DB handles during startup.
118
+ conn = db_conn if db_conn is not None else get_db_connection()
119
+ if conn:
120
+ result = run_stage9_migrations(conn)
121
+ if result.get("applied"):
122
+ logger.info("Stage 9 migrations applied at startup: %d renames", len(result["applied"]))
123
+ except Exception as e:
124
+ logger.debug("Stage 9 migrations at startup (non-fatal): %s", e)
125
+ if settings.topos_control_plane_url:
126
+ if settings.hosted_pool_lease_enabled:
127
+ try:
128
+ state.hosted_pool_lease_client = HostedPoolLeaseClient(
129
+ control_plane_ws_url=settings.topos_control_plane_url
130
+ )
131
+ lease = await state.hosted_pool_lease_client.issue()
132
+ settings.topos_key = lease.connector_key
133
+ logger.info(
134
+ "Hosted pool lease issued key=%s... ttl=%ss",
135
+ lease.connector_key[:8],
136
+ lease.lease_ttl_seconds,
137
+ )
138
+
139
+ async def _lease_renew_loop() -> None:
140
+ while True:
141
+ try:
142
+ current = state.hosted_pool_lease_client.lease
143
+ ttl_seconds = int(current.lease_ttl_seconds) if current else 300
144
+ sleep_seconds = max(
145
+ 15,
146
+ ttl_seconds - max(5, int(settings.hosted_pool_lease_renew_skew_seconds)),
147
+ )
148
+ await asyncio.sleep(sleep_seconds)
149
+ renewed = await state.hosted_pool_lease_client.renew()
150
+ logger.debug(
151
+ "Hosted pool lease renewed key=%s... expires_at=%s",
152
+ renewed.connector_key[:8],
153
+ renewed.lease_expires_at.isoformat() if renewed.lease_expires_at else "unknown",
154
+ )
155
+ except asyncio.CancelledError:
156
+ raise
157
+ except Exception as lease_exc: # noqa: BLE001
158
+ logger.warning("Hosted pool lease renew failed: %s", lease_exc)
159
+ await asyncio.sleep(10.0)
160
+
161
+ state.hosted_pool_lease_task = asyncio.create_task(_lease_renew_loop())
162
+ except Exception as lease_exc: # noqa: BLE001
163
+ logger.error("Hosted pool lease issue failed: %s", lease_exc, exc_info=True)
164
+ raise
165
+ state.control_plane_client = ControlPlaneClient(
166
+ control_plane_url=settings.topos_control_plane_url,
167
+ api_key=str(settings.topos_key or ""),
168
+ handler=handle_control_plane_request,
169
+ verify_ssl=settings.control_plane_verify_ssl,
170
+ )
171
+ state.control_plane_client.start()
172
+ if settings.wait_for_control_plane_on_startup:
173
+ connected = await state.control_plane_client.wait_until_connected(
174
+ timeout_s=settings.connection_readiness_timeout_seconds
175
+ )
176
+ if not connected:
177
+ logger.warning(
178
+ "Control plane client did not become ready within %.1fs",
179
+ settings.connection_readiness_timeout_seconds,
180
+ )
181
+ async def _presence_loop() -> None:
182
+ # Registration/heartbeat are unsolicited presence messages.
183
+ # CP may ignore them in legacy mode; they are required for split-identity rollout scaffolding.
184
+ await asyncio.sleep(0.1)
185
+ while True:
186
+ if state.control_plane_client:
187
+ await state.control_plane_client.send_message(build_engine_register_message())
188
+ break
189
+ await asyncio.sleep(1.0)
190
+ while True:
191
+ await asyncio.sleep(30.0)
192
+ if state.control_plane_client:
193
+ await state.control_plane_client.send_message(build_engine_heartbeat_message())
194
+
195
+ state.engine_presence_task = asyncio.create_task(_presence_loop())
196
+ if settings.enable_sync and settings.topos_user_id:
197
+ state.sync_client = SyncClient(
198
+ sync_url=settings.get_sync_url(),
199
+ api_key=str(settings.topos_key or ""),
200
+ user_id=settings.topos_user_id,
201
+ dataset_id=f"{settings.topos_user_id}:{settings.topos_default_dataset_id}",
202
+ on_op_received=handle_sync_op,
203
+ verify_ssl=settings.control_plane_verify_ssl,
204
+ )
205
+ state.sync_client.start()
206
+ if settings.wait_for_sync_on_startup:
207
+ connected = await state.sync_client.wait_until_connected(
208
+ timeout_s=settings.connection_readiness_timeout_seconds
209
+ )
210
+ if not connected:
211
+ logger.warning(
212
+ "Sync client did not become ready within %.1fs",
213
+ settings.connection_readiness_timeout_seconds,
214
+ )
215
+
216
+
217
+ @app.on_event("shutdown")
218
+ async def shutdown_event() -> None:
219
+ if state.engine_presence_task:
220
+ state.engine_presence_task.cancel()
221
+ try:
222
+ await state.engine_presence_task
223
+ except asyncio.CancelledError:
224
+ pass
225
+ state.engine_presence_task = None
226
+ if state.control_plane_client:
227
+ await state.control_plane_client.stop()
228
+ if state.hosted_pool_lease_task:
229
+ state.hosted_pool_lease_task.cancel()
230
+ try:
231
+ await state.hosted_pool_lease_task
232
+ except asyncio.CancelledError:
233
+ pass
234
+ state.hosted_pool_lease_task = None
235
+ if state.hosted_pool_lease_client:
236
+ await state.hosted_pool_lease_client.revoke()
237
+ state.hosted_pool_lease_client = None
238
+ if state.sync_client:
239
+ await state.sync_client.stop()
topos/auth.py ADDED
@@ -0,0 +1,17 @@
1
+ from fastapi import Depends, HTTPException, status
2
+ from fastapi.security import HTTPAuthorizationCredentials, HTTPBearer
3
+
4
+ bearer_scheme = HTTPBearer(auto_error=False)
5
+
6
+
7
+ def require_api_key(credentials: HTTPAuthorizationCredentials = Depends(bearer_scheme)) -> None:
8
+ """Validate incoming Bearer token against TOPOS_KEY."""
9
+ # Resolve settings at call-time so tests that reload env/modules
10
+ # see the latest TOPOS_KEY value.
11
+ from .config.settings import settings as runtime_settings
12
+
13
+ if credentials is None or credentials.scheme.lower() != "bearer":
14
+ raise HTTPException(status_code=status.HTTP_401_UNAUTHORIZED, detail="Missing authorization")
15
+
16
+ if credentials.credentials != str(runtime_settings.topos_key or ""):
17
+ raise HTTPException(status_code=status.HTTP_401_UNAUTHORIZED, detail="Invalid authorization token")
@@ -0,0 +1 @@
1
+ """Canonicalization layer for Topos."""