topos-node 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (249) hide show
  1. shared/__init__.py +59 -0
  2. shared/filtering.py +640 -0
  3. shared/schema_registry.py +229 -0
  4. topos/__init__.py +5 -0
  5. topos/__version__.py +6 -0
  6. topos/analytics/__init__.py +15 -0
  7. topos/analytics/duckdb_adapter.py +48 -0
  8. topos/analytics/messenger_communities.py +349 -0
  9. topos/analytics/messenger_graph.py +522 -0
  10. topos/analytics/messenger_labels.py +321 -0
  11. topos/analytics/profiles.py +22 -0
  12. topos/analytics/query_engine.py +64 -0
  13. topos/analytics/raw_queries.py +174 -0
  14. topos/api/__init__.py +1 -0
  15. topos/api/analytics.py +52 -0
  16. topos/api/app_registry.py +31 -0
  17. topos/api/backup.py +15 -0
  18. topos/api/compute_remote.py +175 -0
  19. topos/api/data_commit.py +158 -0
  20. topos/api/data_explorer_table_prefs.py +81 -0
  21. topos/api/db.py +10 -0
  22. topos/api/device.py +25 -0
  23. topos/api/enrichment.py +959 -0
  24. topos/api/filter_lab.py +195 -0
  25. topos/api/health.py +61 -0
  26. topos/api/ingestion_api.py +37 -0
  27. topos/api/ingestion_compat.py +21 -0
  28. topos/api/ingestion_sources.py +600 -0
  29. topos/api/llm.py +76 -0
  30. topos/api/local_mcp.py +46 -0
  31. topos/api/messenger_analytics.py +385 -0
  32. topos/api/query_api.py +13 -0
  33. topos/api/sanitization_ollama_config.py +64 -0
  34. topos/api/source_install.py +324 -0
  35. topos/api/sources.py +13 -0
  36. topos/api/sync.py +10 -0
  37. topos/api/ui_config.py +83 -0
  38. topos/api/uma_data.py +311 -0
  39. topos/api/usage.py +49 -0
  40. topos/api/user_identity.py +46 -0
  41. topos/app.py +239 -0
  42. topos/auth.py +17 -0
  43. topos/canonicalization/__init__.py +1 -0
  44. topos/canonicalization/mappers/__init__.py +22 -0
  45. topos/canonicalization/mappers/base.py +26 -0
  46. topos/canonicalization/mappers/chatgpt_mapper.py +40 -0
  47. topos/canonicalization/mappers/grok_mapper.py +17 -0
  48. topos/canonicalization/mappers/messenger_mapper.py +58 -0
  49. topos/canonicalization/models.py +31 -0
  50. topos/canonicalization/resolver.py +23 -0
  51. topos/cli/__init__.py +1 -0
  52. topos/cli/__main__.py +6 -0
  53. topos/cli/commands.py +132 -0
  54. topos/config/__init__.py +1 -0
  55. topos/config/sanitization_ollama.py +189 -0
  56. topos/config/settings.py +310 -0
  57. topos/contacts/__init__.py +5 -0
  58. topos/contacts/identity.py +24 -0
  59. topos/control_plane_client.py +300 -0
  60. topos/core/__init__.py +1 -0
  61. topos/core/api_models.py +128 -0
  62. topos/core/connection_resilience.py +99 -0
  63. topos/core/device_helpers.py +8 -0
  64. topos/core/errors.py +13 -0
  65. topos/core/events.py +12 -0
  66. topos/core/handlers.py +5625 -0
  67. topos/core/logging.py +175 -0
  68. topos/core/metrics.py +21 -0
  69. topos/core/startup_banner.py +62 -0
  70. topos/core/state.py +682 -0
  71. topos/core/table_layers.py +45 -0
  72. topos/core/types.py +13 -0
  73. topos/data_explorer_table_prefs.py +150 -0
  74. topos/engine/__init__.py +29 -0
  75. topos/engine/backends/__init__.py +50 -0
  76. topos/engine/backends/base.py +21 -0
  77. topos/engine/backends/huggingface.py +151 -0
  78. topos/engine/backends/ollama.py +181 -0
  79. topos/engine/backends/stub.py +22 -0
  80. topos/engine/engine.py +165 -0
  81. topos/engine/intake.py +32 -0
  82. topos/engine/queue_manager.py +112 -0
  83. topos/engine/registration.py +126 -0
  84. topos/engine/result_formatter.py +38 -0
  85. topos/engine/router.py +19 -0
  86. topos/engine/scoped_token.py +82 -0
  87. topos/engine/tasks.py +154 -0
  88. topos/engine/transport.py +44 -0
  89. topos/engine/usage_guard.py +100 -0
  90. topos/engine/usage_observation.py +129 -0
  91. topos/engine/validator.py +23 -0
  92. topos/enrichment/__init__.py +1 -0
  93. topos/enrichment/derived_tables.py +214 -0
  94. topos/enrichment/jobs/__init__.py +30 -0
  95. topos/enrichment/jobs/base.py +54 -0
  96. topos/enrichment/jobs/canonical/__init__.py +1 -0
  97. topos/enrichment/jobs/canonical/embeddings_job.py +27 -0
  98. topos/enrichment/jobs/canonical/emo_27_job.py +97 -0
  99. topos/enrichment/jobs/canonical/entities_job.py +27 -0
  100. topos/enrichment/jobs/canonical/sentiment_job.py +27 -0
  101. topos/enrichment/jobs/canonical/topics_job.py +27 -0
  102. topos/enrichment/jobs/raw/__init__.py +1 -0
  103. topos/enrichment/jobs/raw/attachments_job.py +12 -0
  104. topos/enrichment/jobs/raw/language_job.py +12 -0
  105. topos/enrichment/jobs/raw/time_normalization_job.py +12 -0
  106. topos/enrichment/jobs/raw/tool_calls_job.py +12 -0
  107. topos/enrichment/models/__init__.py +1 -0
  108. topos/enrichment/models/manager.py +8 -0
  109. topos/enrichment/models/registry.py +71 -0
  110. topos/enrichment/models/versioning.py +8 -0
  111. topos/enrichment/orchestrator.py +177 -0
  112. topos/enrichment/processor.py +17 -0
  113. topos/enrichment/progress_bar.py +122 -0
  114. topos/enrichment/website_classifier.py +31 -0
  115. topos/filter_lab/__init__.py +1 -0
  116. topos/filter_lab/bundles.py +300 -0
  117. topos/filter_lab/schema.py +86 -0
  118. topos/filter_lab/service.py +167 -0
  119. topos/filter_lab/store.py +374 -0
  120. topos/filter_lab/worker.py +250 -0
  121. topos/hosted_pool_lease.py +153 -0
  122. topos/ingestion/__init__.py +1 -0
  123. topos/ingestion/checkpoints/__init__.py +6 -0
  124. topos/ingestion/checkpoints/checkpoint_store.py +24 -0
  125. topos/ingestion/checkpoints/sqlite_checkpoint_store.py +82 -0
  126. topos/ingestion/ingest_helpers.py +504 -0
  127. topos/ingestion/jobs.py +91 -0
  128. topos/ingestion/local_sync.py +823 -0
  129. topos/ingestion/log_preview.py +21 -0
  130. topos/ingestion/manager.py +1100 -0
  131. topos/ingestion/parser.py +174 -0
  132. topos/ingestion/parsers/__init__.py +32 -0
  133. topos/ingestion/parsers/base.py +24 -0
  134. topos/ingestion/parsers/browser_parser.py +171 -0
  135. topos/ingestion/parsers/calendar_parser.py +21 -0
  136. topos/ingestion/parsers/chatgpt_conversation_flattener.py +266 -0
  137. topos/ingestion/parsers/chatgpt_parser.py +67 -0
  138. topos/ingestion/parsers/grok_parser.py +21 -0
  139. topos/ingestion/parsers/messenger_parser.py +97 -0
  140. topos/ingestion/progress.py +54 -0
  141. topos/ingestion/sources/__init__.py +20 -0
  142. topos/ingestion/sources/base.py +39 -0
  143. topos/ingestion/sources/calendar.py +29 -0
  144. topos/ingestion/sources/chatgpt.py +29 -0
  145. topos/ingestion/sources/contact_importers.py +274 -0
  146. topos/ingestion/sources/grok.py +29 -0
  147. topos/ingestion/sources/imessage_reader.py +479 -0
  148. topos/ingestion/sources/signal_export_parser.py +132 -0
  149. topos/ingestion/sources/signal_reader.py +491 -0
  150. topos/ingestion/state_machine.py +70 -0
  151. topos/ingestion/triggers/__init__.py +1 -0
  152. topos/ingestion/triggers/file_trigger.py +36 -0
  153. topos/ingestion/triggers/sqlite_trigger.py +18 -0
  154. topos/ingestion/validation/__init__.py +1 -0
  155. topos/ingestion/validation/base.py +27 -0
  156. topos/ingestion/validation/schema_registry.py +111 -0
  157. topos/ingestion/validation/schema_validator.py +13 -0
  158. topos/lineage/__init__.py +1 -0
  159. topos/lineage/provenance.py +9 -0
  160. topos/lineage/tracker.py +9 -0
  161. topos/mcp_stdio_proxy.py +83 -0
  162. topos/observability/__init__.py +1 -0
  163. topos/observability/alerts.py +7 -0
  164. topos/observability/metrics.py +25 -0
  165. topos/observability/tracing.py +18 -0
  166. topos/openai_client.py +69 -0
  167. topos/projections/__init__.py +1 -0
  168. topos/projections/vector_index/__init__.py +1 -0
  169. topos/projections/vector_index/base.py +21 -0
  170. topos/projections/vector_index/builders.py +11 -0
  171. topos/projections/vector_index/health_checks.py +5 -0
  172. topos/rate_limit.py +43 -0
  173. topos/sanitization/__init__.py +16 -0
  174. topos/sanitization/ollama_transforms.py +276 -0
  175. topos/scope_resolution.py +89 -0
  176. topos/services/__init__.py +1 -0
  177. topos/services/container.py +46 -0
  178. topos/services/embeddings/__init__.py +1 -0
  179. topos/services/embeddings/base.py +7 -0
  180. topos/services/embeddings/local.py +9 -0
  181. topos/services/embeddings/remote.py +9 -0
  182. topos/services/interfaces.py +40 -0
  183. topos/services/llm/__init__.py +1 -0
  184. topos/services/llm/base.py +7 -0
  185. topos/services/llm/openai.py +126 -0
  186. topos/services/local.py +123 -0
  187. topos/services/postgres.py +385 -0
  188. topos/sources/__init__.py +6 -0
  189. topos/sources/definitions.py +114 -0
  190. topos/sources/install_service.py +836 -0
  191. topos/sources/registry.py +263 -0
  192. topos/sources/runtime_install.py +427 -0
  193. topos/storage/__init__.py +1 -0
  194. topos/storage/canonical/__init__.py +18 -0
  195. topos/storage/canonical/ai_chat/__init__.py +22 -0
  196. topos/storage/canonical/ai_chat/canonicalizer.py +147 -0
  197. topos/storage/canonical/ai_chat/mapper.py +168 -0
  198. topos/storage/canonical/ai_chat/model.py +87 -0
  199. topos/storage/canonical/ai_chat/tables.py +179 -0
  200. topos/storage/canonical/canonical_store.py +24 -0
  201. topos/storage/canonical/conversations_tables.py +1020 -0
  202. topos/storage/canonical/mapping_store.py +30 -0
  203. topos/storage/canonical/postgres.py +10 -0
  204. topos/storage/db/__init__.py +1 -0
  205. topos/storage/db/client.py +8 -0
  206. topos/storage/db/migrations/__init__.py +1 -0
  207. topos/storage/db/migrations/stage9_column_renames.py +78 -0
  208. topos/storage/db/paths.py +122 -0
  209. topos/storage/db/postgres.py +240 -0
  210. topos/storage/db/schema.py +6 -0
  211. topos/storage/enrichment/__init__.py +1 -0
  212. topos/storage/enrichment/canonical_enrichment_store.py +7 -0
  213. topos/storage/enrichment/raw_enrichment_store.py +18 -0
  214. topos/storage/normalized/__init__.py +1 -0
  215. topos/storage/normalized/normalized_store.py +24 -0
  216. topos/storage/oplog/__init__.py +1 -0
  217. topos/storage/oplog/decision.py +6 -0
  218. topos/storage/oplog/oplog_store.py +17 -0
  219. topos/storage/oplog/postgres.py +10 -0
  220. topos/storage/projections/__init__.py +1 -0
  221. topos/storage/projections/index_ops_store.py +6 -0
  222. topos/storage/projections/vector_index_store.py +6 -0
  223. topos/storage/raw/__init__.py +1 -0
  224. topos/storage/raw/browser_flat_tables.py +303 -0
  225. topos/storage/raw/file_store.py +100 -0
  226. topos/storage/raw/raw_store.py +29 -0
  227. topos/storage/raw/raw_tables_manager.py +295 -0
  228. topos/storage/raw/sqlite_raw_store.py +17 -0
  229. topos/storage/security/encryption.py +21 -0
  230. topos/storage/signal_identity.py +71 -0
  231. topos/storage/source_settings.py +116 -0
  232. topos/storage/user_identity.py +69 -0
  233. topos/sync/__init__.py +5 -0
  234. topos/sync/client.py +272 -0
  235. topos/sync_handlers.py +70 -0
  236. topos/testing/__init__.py +1 -0
  237. topos/testing/lifespan.py +7 -0
  238. topos/uma_contact_enrichment.py +1032 -0
  239. topos/uma_filters.py +669 -0
  240. topos/uma_resource_id.py +24 -0
  241. topos/uma_rpt.py +69 -0
  242. topos/utils/base_object.py +61 -0
  243. topos/websocket_client.py +21 -0
  244. topos_node-0.1.0.dist-info/METADATA +199 -0
  245. topos_node-0.1.0.dist-info/RECORD +249 -0
  246. topos_node-0.1.0.dist-info/WHEEL +5 -0
  247. topos_node-0.1.0.dist-info/entry_points.txt +2 -0
  248. topos_node-0.1.0.dist-info/licenses/LICENSE +201 -0
  249. topos_node-0.1.0.dist-info/top_level.txt +2 -0
@@ -0,0 +1,250 @@
1
+ """Background processing for Filter Lab job groups (serial runs, Ollama pull/cleanup)."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import hashlib
6
+ import json
7
+ import logging
8
+ import time
9
+ from typing import Any, Dict, List, Set
10
+
11
+ from topos.config.sanitization_ollama import resolve_sanitization_ollama_effective
12
+ from topos.config.settings import settings
13
+ from topos.core.state import get_db_connection
14
+ from topos.engine.backends.ollama import OllamaAdapter
15
+ from topos.sanitization.ollama_transforms import apply_text_transform_with_ollama
16
+
17
+ from . import bundles as bundles_mod
18
+ from . import store
19
+
20
+ logger = logging.getLogger("topos.filter_lab.worker")
21
+
22
+
23
+ def _protection_tags(eff: Any) -> Set[str]:
24
+ s = {eff.default_model}
25
+ s.update(v for v in eff.models.values() if v)
26
+ return {str(x).strip() for x in s if x and str(x).strip()}
27
+
28
+
29
+ def _input_hash(text: str) -> str:
30
+ return hashlib.sha256(text.encode("utf-8")).hexdigest()[:24]
31
+
32
+
33
+ def _ensure_model_pulled(
34
+ adapter: OllamaAdapter,
35
+ model_tag: str,
36
+ baseline: Set[str],
37
+ pulled: List[str],
38
+ conn: Any,
39
+ group_id: str,
40
+ ) -> None:
41
+ if model_tag in baseline:
42
+ return
43
+ if model_tag in pulled:
44
+ return
45
+ adapter.pull_model(model_tag)
46
+ pulled.append(model_tag)
47
+ store.set_group_pulled_models(conn, group_id, list(pulled))
48
+ store.insert_model_event(conn, group_id, "pull", model_tag)
49
+
50
+
51
+ def _cleanup_ephemeral(
52
+ adapter: OllamaAdapter,
53
+ pulled: List[str],
54
+ baseline: Set[str],
55
+ protected: Set[str],
56
+ conn: Any,
57
+ group_id: str,
58
+ ) -> None:
59
+ for tag in pulled:
60
+ if tag in baseline:
61
+ continue
62
+ if tag in protected:
63
+ continue
64
+ try:
65
+ adapter.delete_model(tag)
66
+ store.insert_model_event(conn, group_id, "delete", tag)
67
+ except Exception as exc: # noqa: BLE001
68
+ logger.warning("filter_lab cleanup delete failed for %s: %s", tag, exc)
69
+
70
+
71
+ def process_job_group_sync(group_id: str) -> None:
72
+ """Execute all queued runs for a group serially; policy B cleanup on terminal state."""
73
+ conn = get_db_connection()
74
+ if not conn:
75
+ logger.error("filter_lab: no DB for group %s", group_id)
76
+ return
77
+
78
+ row = store.get_group(conn, group_id)
79
+ if not row:
80
+ return
81
+ group = dict(row)
82
+ status = group["status"]
83
+ if status in ("completed", "failed", "cancelled"):
84
+ return
85
+
86
+ adapter: OllamaAdapter | None = None
87
+ pulled: List[str] = []
88
+ baseline_set: Set[str] = set()
89
+ protected: Set[str] = set()
90
+
91
+ try:
92
+ try:
93
+ eff = resolve_sanitization_ollama_effective(settings, conn)
94
+ except Exception as exc: # noqa: BLE001
95
+ logger.error("filter_lab: effective config failed: %s", exc)
96
+ store.update_group_status(conn, group_id, "failed")
97
+ return
98
+
99
+ # Pipeline "sanitization Ollama enabled" can be off while the user still wants Lab eval.
100
+ # Use the same host / models / limits; only skip blocking on eff.enabled.
101
+ if not eff.enabled:
102
+ logger.info(
103
+ "filter_lab: sanitization pipeline is disabled in config; running Lab eval to Ollama anyway (group=%s)",
104
+ group_id,
105
+ )
106
+ eff_for_lab = eff.model_copy(update={"enabled": True})
107
+
108
+ opts: Dict[str, Any] = {}
109
+ try:
110
+ opts = json.loads(group.get("options_json") or "{}")
111
+ except json.JSONDecodeError:
112
+ opts = {}
113
+
114
+ eff_merged = eff_for_lab.model_copy(
115
+ update={
116
+ "timeout_sec": float(opts["timeout_sec"])
117
+ if isinstance(opts.get("timeout_sec"), (int, float))
118
+ else eff.timeout_sec,
119
+ "max_input_chars": int(opts["max_input_chars"])
120
+ if isinstance(opts.get("max_input_chars"), int)
121
+ else eff.max_input_chars,
122
+ }
123
+ )
124
+
125
+ bundle = bundles_mod.get_bundle(group["bundle_id"])
126
+ if not bundle:
127
+ store.update_group_status(conn, group_id, "failed")
128
+ return
129
+
130
+ baseline_list = list(json.loads(group.get("baseline_models_json") or "[]"))
131
+ baseline_set = set(baseline_list)
132
+ pulled = list(json.loads(group.get("pulled_models_json") or "[]"))
133
+ protected = _protection_tags(eff_merged)
134
+ adapter = OllamaAdapter(base_url=eff_merged.host)
135
+
136
+ store.update_group_status(conn, group_id, "running")
137
+
138
+ for run_row in store.list_runs(conn, group_id):
139
+ run = dict(run_row)
140
+ if run["status"] != "queued":
141
+ continue
142
+
143
+ g2 = dict(store.get_group(conn, group_id) or {})
144
+ if g2.get("status") == "cancelled":
145
+ store.update_run(
146
+ conn,
147
+ run["id"],
148
+ status="cancelled",
149
+ finished_at=store.utc_now_iso(),
150
+ )
151
+ continue
152
+
153
+ rec_id = run["record_id"]
154
+ model_tag = run["model_tag"]
155
+ record = next(
156
+ (r for r in (bundle.get("records") or []) if str(r.get("id")) == rec_id),
157
+ None,
158
+ )
159
+ if not record:
160
+ store.update_run(
161
+ conn,
162
+ run["id"],
163
+ status="failed",
164
+ finished_at=store.utc_now_iso(),
165
+ error_code="UNKNOWN_RECORD",
166
+ )
167
+ continue
168
+
169
+ text = bundles_mod.record_text(record)
170
+ max_c = eff_merged.max_input_chars
171
+ if max_c > 0 and len(text) > max_c:
172
+ store.update_run(
173
+ conn,
174
+ run["id"],
175
+ status="failed",
176
+ finished_at=store.utc_now_iso(),
177
+ error_code="INPUT_TOO_LARGE",
178
+ input_hash=_input_hash(text),
179
+ input_text=text[: max_c + 50],
180
+ )
181
+ continue
182
+
183
+ store.update_run(
184
+ conn,
185
+ run["id"],
186
+ status="running",
187
+ started_at=store.utc_now_iso(),
188
+ input_hash=_input_hash(text),
189
+ input_text=text[:8000],
190
+ )
191
+
192
+ try:
193
+ assert adapter is not None
194
+ _ensure_model_pulled(adapter, model_tag, baseline_set, pulled, conn, group_id)
195
+ t0 = time.perf_counter()
196
+ out = apply_text_transform_with_ollama(
197
+ text,
198
+ group["filter_id"],
199
+ None,
200
+ effective=eff_merged,
201
+ model_override=model_tag,
202
+ )
203
+ ms = int((time.perf_counter() - t0) * 1000)
204
+ store.update_run(
205
+ conn,
206
+ run["id"],
207
+ status="succeeded",
208
+ finished_at=store.utc_now_iso(),
209
+ latency_ms=ms,
210
+ output_text=out,
211
+ metrics_json=json.dumps({"input_chars": len(text), "output_chars": len(out)}),
212
+ )
213
+ except Exception as exc: # noqa: BLE001
214
+ logger.warning("filter_lab run failed: %s", exc)
215
+ store.update_run(
216
+ conn,
217
+ run["id"],
218
+ status="failed",
219
+ finished_at=store.utc_now_iso(),
220
+ error_code="RUN_ERROR",
221
+ output_text=str(exc)[:2000],
222
+ )
223
+
224
+ g3 = dict(store.get_group(conn, group_id) or {})
225
+ if g3.get("status") == "cancelled":
226
+ for r in store.list_runs(conn, group_id):
227
+ rd = dict(r)
228
+ if rd["status"] == "queued":
229
+ store.update_run(
230
+ conn,
231
+ rd["id"],
232
+ status="cancelled",
233
+ finished_at=store.utc_now_iso(),
234
+ )
235
+ else:
236
+ store.update_group_status(conn, group_id, "completed")
237
+
238
+ finally:
239
+ if adapter and conn:
240
+ try:
241
+ row_f = store.get_group(conn, group_id)
242
+ if row_f:
243
+ group_f = dict(row_f)
244
+ pulled_f = list(json.loads(group_f.get("pulled_models_json") or "[]"))
245
+ baseline_f = set(json.loads(group_f.get("baseline_models_json") or "[]"))
246
+ eff2 = resolve_sanitization_ollama_effective(settings, conn)
247
+ prot2 = _protection_tags(eff2)
248
+ _cleanup_ephemeral(adapter, pulled_f, baseline_f, prot2, conn, group_id)
249
+ except Exception as exc: # noqa: BLE001
250
+ logger.warning("filter_lab cleanup skipped: %s", exc)
@@ -0,0 +1,153 @@
1
+ from __future__ import annotations
2
+
3
+ import asyncio
4
+ import logging
5
+ import os
6
+ from dataclasses import dataclass
7
+ from datetime import datetime, timezone
8
+ from typing import Any, Dict, Optional
9
+ from urllib.parse import urlparse
10
+
11
+ import httpx
12
+
13
+ from .config.settings import settings
14
+
15
+ logger = logging.getLogger("topos.hosted_pool_lease")
16
+
17
+
18
+ def _parse_iso_datetime(value: Any) -> Optional[datetime]:
19
+ raw = str(value or "").strip()
20
+ if not raw:
21
+ return None
22
+ try:
23
+ parsed = datetime.fromisoformat(raw.replace("Z", "+00:00"))
24
+ if parsed.tzinfo is None:
25
+ parsed = parsed.replace(tzinfo=timezone.utc)
26
+ return parsed.astimezone(timezone.utc)
27
+ except Exception: # noqa: BLE001
28
+ return None
29
+
30
+
31
+ def _control_plane_http_base(ws_url: str) -> str:
32
+ parsed = urlparse(ws_url)
33
+ if parsed.scheme not in {"ws", "wss"}:
34
+ raise ValueError(f"Unsupported control plane websocket URL scheme: {parsed.scheme}")
35
+ http_scheme = "https" if parsed.scheme == "wss" else "http"
36
+ netloc = parsed.netloc
37
+ if not netloc:
38
+ raise ValueError("Control plane websocket URL missing host")
39
+ return f"{http_scheme}://{netloc}"
40
+
41
+
42
+ def _metadata_identity_token(audience: str) -> str:
43
+ metadata_url = (
44
+ "http://metadata.google.internal/computeMetadata/v1/instance/service-accounts/default/identity"
45
+ )
46
+ params = {"audience": audience, "format": "full"}
47
+ headers = {"Metadata-Flavor": "Google"}
48
+ with httpx.Client(timeout=5.0) as client:
49
+ resp = client.get(metadata_url, params=params, headers=headers)
50
+ resp.raise_for_status()
51
+ token = str(resp.text or "").strip()
52
+ if not token:
53
+ raise RuntimeError("Metadata server returned an empty identity token")
54
+ return token
55
+
56
+
57
+ @dataclass
58
+ class HostedPoolLease:
59
+ connector_key: str
60
+ lease_expires_at: Optional[datetime]
61
+ lease_ttl_seconds: int
62
+
63
+
64
+ class HostedPoolLeaseClient:
65
+ def __init__(self, *, control_plane_ws_url: str) -> None:
66
+ self.control_plane_ws_url = control_plane_ws_url
67
+ self.control_plane_http_base = _control_plane_http_base(control_plane_ws_url)
68
+ self.issue_path = str(settings.hosted_pool_lease_issue_path or "/v1/system/pool-connectors/lease/issue")
69
+ self.renew_path = str(settings.hosted_pool_lease_renew_path or "/v1/system/pool-connectors/lease/renew")
70
+ self.revoke_path = str(settings.hosted_pool_lease_revoke_path or "/v1/system/pool-connectors/lease/revoke")
71
+ self.pool_group = str(settings.hosted_pool_lease_pool_group or "default")
72
+ self.instance_id = str(os.getenv("HOSTNAME") or "unknown-instance").strip()
73
+ self.service_name = str(os.getenv("K_SERVICE") or "unknown-service").strip()
74
+ self.revision = str(os.getenv("K_REVISION") or "").strip() or None
75
+ self.lease: Optional[HostedPoolLease] = None
76
+
77
+ def _build_url(self, path: str) -> str:
78
+ normalized = f"/{str(path or '').lstrip('/')}"
79
+ return f"{self.control_plane_http_base}{normalized}"
80
+
81
+ def _audience(self) -> str:
82
+ configured = str(settings.hosted_pool_lease_audience or "").strip()
83
+ return configured or self.control_plane_http_base
84
+
85
+ def _identity_token(self) -> str:
86
+ return _metadata_identity_token(self._audience())
87
+
88
+ async def issue(self) -> HostedPoolLease:
89
+ url = self._build_url(self.issue_path)
90
+ token = await asyncio.to_thread(self._identity_token)
91
+ payload: Dict[str, Any] = {
92
+ "service_name": self.service_name,
93
+ "revision": self.revision,
94
+ "instance_id": self.instance_id,
95
+ "pool_group": self.pool_group,
96
+ }
97
+ async with httpx.AsyncClient(timeout=10.0) as client:
98
+ resp = await client.post(url, headers={"Authorization": f"Bearer {token}"}, json=payload)
99
+ resp.raise_for_status()
100
+ body = resp.json()
101
+ connector_key = str(body.get("connector_key") or "").strip()
102
+ if not connector_key:
103
+ raise RuntimeError("Lease issue response missing connector_key")
104
+ ttl = int(body.get("lease_ttl_seconds") or 300)
105
+ lease = HostedPoolLease(
106
+ connector_key=connector_key,
107
+ lease_expires_at=_parse_iso_datetime(body.get("lease_expires_at")),
108
+ lease_ttl_seconds=max(30, ttl),
109
+ )
110
+ self.lease = lease
111
+ return lease
112
+
113
+ async def renew(self) -> HostedPoolLease:
114
+ if not self.lease:
115
+ return await self.issue()
116
+ url = self._build_url(self.renew_path)
117
+ token = await asyncio.to_thread(self._identity_token)
118
+ payload = {
119
+ "connector_key": self.lease.connector_key,
120
+ "service_name": self.service_name,
121
+ "revision": self.revision,
122
+ "instance_id": self.instance_id,
123
+ }
124
+ async with httpx.AsyncClient(timeout=10.0) as client:
125
+ resp = await client.post(url, headers={"Authorization": f"Bearer {token}"}, json=payload)
126
+ resp.raise_for_status()
127
+ body = resp.json()
128
+ ttl = int(body.get("lease_ttl_seconds") or self.lease.lease_ttl_seconds)
129
+ self.lease = HostedPoolLease(
130
+ connector_key=self.lease.connector_key,
131
+ lease_expires_at=_parse_iso_datetime(body.get("lease_expires_at")),
132
+ lease_ttl_seconds=max(30, ttl),
133
+ )
134
+ return self.lease
135
+
136
+ async def revoke(self) -> None:
137
+ if not self.lease:
138
+ return
139
+ url = self._build_url(self.revoke_path)
140
+ token = await asyncio.to_thread(self._identity_token)
141
+ payload = {
142
+ "connector_key": self.lease.connector_key,
143
+ "service_name": self.service_name,
144
+ "instance_id": self.instance_id,
145
+ }
146
+ try:
147
+ async with httpx.AsyncClient(timeout=10.0) as client:
148
+ resp = await client.post(url, headers={"Authorization": f"Bearer {token}"}, json=payload)
149
+ resp.raise_for_status()
150
+ except Exception as exc: # noqa: BLE001
151
+ logger.warning("Hosted pool lease revoke failed: %s", exc)
152
+ finally:
153
+ self.lease = None
@@ -0,0 +1 @@
1
+ """Ingestion layer for Topos."""
@@ -0,0 +1,6 @@
1
+ """Checkpoint storage abstractions."""
2
+
3
+ from .checkpoint_store import CheckpointStore, IngestionCheckpoint
4
+ from .sqlite_checkpoint_store import SqliteCheckpointStore, ensure_table
5
+
6
+ __all__ = ["CheckpointStore", "IngestionCheckpoint", "SqliteCheckpointStore", "ensure_table"]
@@ -0,0 +1,24 @@
1
+ """Checkpoint store contract for ingestion."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from dataclasses import dataclass
6
+ from typing import Dict, Optional
7
+
8
+
9
+ @dataclass(frozen=True)
10
+ class IngestionCheckpoint:
11
+ dataset_id: str
12
+ schema_id: str
13
+ last_record_id: str
14
+ metadata: Dict[str, str]
15
+
16
+
17
+ class CheckpointStore:
18
+ """Persist and retrieve ingestion checkpoints."""
19
+
20
+ def get_checkpoint(self, dataset_id: str, schema_id: str) -> Optional[IngestionCheckpoint]:
21
+ raise NotImplementedError
22
+
23
+ def save_checkpoint(self, checkpoint: IngestionCheckpoint) -> None:
24
+ raise NotImplementedError
@@ -0,0 +1,82 @@
1
+ """SQLite-backed checkpoint store for ingestion (e.g. iMessage/Signal sync)."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import json
6
+ import logging
7
+ from typing import Optional
8
+
9
+ from .checkpoint_store import CheckpointStore, IngestionCheckpoint
10
+
11
+ logger = logging.getLogger("topos.ingestion.checkpoints.sqlite")
12
+
13
+ TABLE = "ingestion_checkpoints"
14
+
15
+
16
+ def ensure_table(conn) -> None:
17
+ """Create ingestion_checkpoints table if not exists."""
18
+ conn.execute(f"""
19
+ CREATE TABLE IF NOT EXISTS {TABLE} (
20
+ dataset_id TEXT NOT NULL,
21
+ schema_id TEXT NOT NULL,
22
+ last_record_id TEXT NOT NULL,
23
+ metadata_json TEXT,
24
+ updated_at TEXT DEFAULT (datetime('now')),
25
+ PRIMARY KEY (dataset_id, schema_id)
26
+ )
27
+ """)
28
+ conn.commit()
29
+
30
+
31
+ class SqliteCheckpointStore(CheckpointStore):
32
+ """Persist checkpoints to SQLite (same DB as app)."""
33
+
34
+ def __init__(self, conn) -> None:
35
+ self.conn = conn
36
+ if conn:
37
+ ensure_table(conn)
38
+
39
+ def get_checkpoint(self, dataset_id: str, schema_id: str) -> Optional[IngestionCheckpoint]:
40
+ if not self.conn:
41
+ return None
42
+ try:
43
+ ensure_table(self.conn)
44
+ row = self.conn.execute(
45
+ f"SELECT last_record_id, metadata_json FROM {TABLE} WHERE dataset_id = ? AND schema_id = ?",
46
+ (dataset_id, schema_id),
47
+ ).fetchone()
48
+ if not row:
49
+ return None
50
+ last_record_id, metadata_json = row
51
+ metadata = {}
52
+ if metadata_json:
53
+ try:
54
+ metadata = json.loads(metadata_json)
55
+ except (TypeError, json.JSONDecodeError):
56
+ pass
57
+ return IngestionCheckpoint(
58
+ dataset_id=dataset_id,
59
+ schema_id=schema_id,
60
+ last_record_id=last_record_id or "0",
61
+ metadata=metadata,
62
+ )
63
+ except Exception as e:
64
+ logger.warning("get_checkpoint failed: %s", e)
65
+ return None
66
+
67
+ def save_checkpoint(self, checkpoint: IngestionCheckpoint) -> None:
68
+ if not self.conn:
69
+ return
70
+ try:
71
+ ensure_table(self.conn)
72
+ metadata_json = json.dumps(checkpoint.metadata, ensure_ascii=False) if checkpoint.metadata else None
73
+ self.conn.execute(
74
+ f"""
75
+ INSERT OR REPLACE INTO {TABLE} (dataset_id, schema_id, last_record_id, metadata_json, updated_at)
76
+ VALUES (?, ?, ?, ?, datetime('now'))
77
+ """,
78
+ (checkpoint.dataset_id, checkpoint.schema_id, checkpoint.last_record_id, metadata_json),
79
+ )
80
+ self.conn.commit()
81
+ except Exception as e:
82
+ logger.warning("save_checkpoint failed: %s", e)