topos-node 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (249) hide show
  1. shared/__init__.py +59 -0
  2. shared/filtering.py +640 -0
  3. shared/schema_registry.py +229 -0
  4. topos/__init__.py +5 -0
  5. topos/__version__.py +6 -0
  6. topos/analytics/__init__.py +15 -0
  7. topos/analytics/duckdb_adapter.py +48 -0
  8. topos/analytics/messenger_communities.py +349 -0
  9. topos/analytics/messenger_graph.py +522 -0
  10. topos/analytics/messenger_labels.py +321 -0
  11. topos/analytics/profiles.py +22 -0
  12. topos/analytics/query_engine.py +64 -0
  13. topos/analytics/raw_queries.py +174 -0
  14. topos/api/__init__.py +1 -0
  15. topos/api/analytics.py +52 -0
  16. topos/api/app_registry.py +31 -0
  17. topos/api/backup.py +15 -0
  18. topos/api/compute_remote.py +175 -0
  19. topos/api/data_commit.py +158 -0
  20. topos/api/data_explorer_table_prefs.py +81 -0
  21. topos/api/db.py +10 -0
  22. topos/api/device.py +25 -0
  23. topos/api/enrichment.py +959 -0
  24. topos/api/filter_lab.py +195 -0
  25. topos/api/health.py +61 -0
  26. topos/api/ingestion_api.py +37 -0
  27. topos/api/ingestion_compat.py +21 -0
  28. topos/api/ingestion_sources.py +600 -0
  29. topos/api/llm.py +76 -0
  30. topos/api/local_mcp.py +46 -0
  31. topos/api/messenger_analytics.py +385 -0
  32. topos/api/query_api.py +13 -0
  33. topos/api/sanitization_ollama_config.py +64 -0
  34. topos/api/source_install.py +324 -0
  35. topos/api/sources.py +13 -0
  36. topos/api/sync.py +10 -0
  37. topos/api/ui_config.py +83 -0
  38. topos/api/uma_data.py +311 -0
  39. topos/api/usage.py +49 -0
  40. topos/api/user_identity.py +46 -0
  41. topos/app.py +239 -0
  42. topos/auth.py +17 -0
  43. topos/canonicalization/__init__.py +1 -0
  44. topos/canonicalization/mappers/__init__.py +22 -0
  45. topos/canonicalization/mappers/base.py +26 -0
  46. topos/canonicalization/mappers/chatgpt_mapper.py +40 -0
  47. topos/canonicalization/mappers/grok_mapper.py +17 -0
  48. topos/canonicalization/mappers/messenger_mapper.py +58 -0
  49. topos/canonicalization/models.py +31 -0
  50. topos/canonicalization/resolver.py +23 -0
  51. topos/cli/__init__.py +1 -0
  52. topos/cli/__main__.py +6 -0
  53. topos/cli/commands.py +132 -0
  54. topos/config/__init__.py +1 -0
  55. topos/config/sanitization_ollama.py +189 -0
  56. topos/config/settings.py +310 -0
  57. topos/contacts/__init__.py +5 -0
  58. topos/contacts/identity.py +24 -0
  59. topos/control_plane_client.py +300 -0
  60. topos/core/__init__.py +1 -0
  61. topos/core/api_models.py +128 -0
  62. topos/core/connection_resilience.py +99 -0
  63. topos/core/device_helpers.py +8 -0
  64. topos/core/errors.py +13 -0
  65. topos/core/events.py +12 -0
  66. topos/core/handlers.py +5625 -0
  67. topos/core/logging.py +175 -0
  68. topos/core/metrics.py +21 -0
  69. topos/core/startup_banner.py +62 -0
  70. topos/core/state.py +682 -0
  71. topos/core/table_layers.py +45 -0
  72. topos/core/types.py +13 -0
  73. topos/data_explorer_table_prefs.py +150 -0
  74. topos/engine/__init__.py +29 -0
  75. topos/engine/backends/__init__.py +50 -0
  76. topos/engine/backends/base.py +21 -0
  77. topos/engine/backends/huggingface.py +151 -0
  78. topos/engine/backends/ollama.py +181 -0
  79. topos/engine/backends/stub.py +22 -0
  80. topos/engine/engine.py +165 -0
  81. topos/engine/intake.py +32 -0
  82. topos/engine/queue_manager.py +112 -0
  83. topos/engine/registration.py +126 -0
  84. topos/engine/result_formatter.py +38 -0
  85. topos/engine/router.py +19 -0
  86. topos/engine/scoped_token.py +82 -0
  87. topos/engine/tasks.py +154 -0
  88. topos/engine/transport.py +44 -0
  89. topos/engine/usage_guard.py +100 -0
  90. topos/engine/usage_observation.py +129 -0
  91. topos/engine/validator.py +23 -0
  92. topos/enrichment/__init__.py +1 -0
  93. topos/enrichment/derived_tables.py +214 -0
  94. topos/enrichment/jobs/__init__.py +30 -0
  95. topos/enrichment/jobs/base.py +54 -0
  96. topos/enrichment/jobs/canonical/__init__.py +1 -0
  97. topos/enrichment/jobs/canonical/embeddings_job.py +27 -0
  98. topos/enrichment/jobs/canonical/emo_27_job.py +97 -0
  99. topos/enrichment/jobs/canonical/entities_job.py +27 -0
  100. topos/enrichment/jobs/canonical/sentiment_job.py +27 -0
  101. topos/enrichment/jobs/canonical/topics_job.py +27 -0
  102. topos/enrichment/jobs/raw/__init__.py +1 -0
  103. topos/enrichment/jobs/raw/attachments_job.py +12 -0
  104. topos/enrichment/jobs/raw/language_job.py +12 -0
  105. topos/enrichment/jobs/raw/time_normalization_job.py +12 -0
  106. topos/enrichment/jobs/raw/tool_calls_job.py +12 -0
  107. topos/enrichment/models/__init__.py +1 -0
  108. topos/enrichment/models/manager.py +8 -0
  109. topos/enrichment/models/registry.py +71 -0
  110. topos/enrichment/models/versioning.py +8 -0
  111. topos/enrichment/orchestrator.py +177 -0
  112. topos/enrichment/processor.py +17 -0
  113. topos/enrichment/progress_bar.py +122 -0
  114. topos/enrichment/website_classifier.py +31 -0
  115. topos/filter_lab/__init__.py +1 -0
  116. topos/filter_lab/bundles.py +300 -0
  117. topos/filter_lab/schema.py +86 -0
  118. topos/filter_lab/service.py +167 -0
  119. topos/filter_lab/store.py +374 -0
  120. topos/filter_lab/worker.py +250 -0
  121. topos/hosted_pool_lease.py +153 -0
  122. topos/ingestion/__init__.py +1 -0
  123. topos/ingestion/checkpoints/__init__.py +6 -0
  124. topos/ingestion/checkpoints/checkpoint_store.py +24 -0
  125. topos/ingestion/checkpoints/sqlite_checkpoint_store.py +82 -0
  126. topos/ingestion/ingest_helpers.py +504 -0
  127. topos/ingestion/jobs.py +91 -0
  128. topos/ingestion/local_sync.py +823 -0
  129. topos/ingestion/log_preview.py +21 -0
  130. topos/ingestion/manager.py +1100 -0
  131. topos/ingestion/parser.py +174 -0
  132. topos/ingestion/parsers/__init__.py +32 -0
  133. topos/ingestion/parsers/base.py +24 -0
  134. topos/ingestion/parsers/browser_parser.py +171 -0
  135. topos/ingestion/parsers/calendar_parser.py +21 -0
  136. topos/ingestion/parsers/chatgpt_conversation_flattener.py +266 -0
  137. topos/ingestion/parsers/chatgpt_parser.py +67 -0
  138. topos/ingestion/parsers/grok_parser.py +21 -0
  139. topos/ingestion/parsers/messenger_parser.py +97 -0
  140. topos/ingestion/progress.py +54 -0
  141. topos/ingestion/sources/__init__.py +20 -0
  142. topos/ingestion/sources/base.py +39 -0
  143. topos/ingestion/sources/calendar.py +29 -0
  144. topos/ingestion/sources/chatgpt.py +29 -0
  145. topos/ingestion/sources/contact_importers.py +274 -0
  146. topos/ingestion/sources/grok.py +29 -0
  147. topos/ingestion/sources/imessage_reader.py +479 -0
  148. topos/ingestion/sources/signal_export_parser.py +132 -0
  149. topos/ingestion/sources/signal_reader.py +491 -0
  150. topos/ingestion/state_machine.py +70 -0
  151. topos/ingestion/triggers/__init__.py +1 -0
  152. topos/ingestion/triggers/file_trigger.py +36 -0
  153. topos/ingestion/triggers/sqlite_trigger.py +18 -0
  154. topos/ingestion/validation/__init__.py +1 -0
  155. topos/ingestion/validation/base.py +27 -0
  156. topos/ingestion/validation/schema_registry.py +111 -0
  157. topos/ingestion/validation/schema_validator.py +13 -0
  158. topos/lineage/__init__.py +1 -0
  159. topos/lineage/provenance.py +9 -0
  160. topos/lineage/tracker.py +9 -0
  161. topos/mcp_stdio_proxy.py +83 -0
  162. topos/observability/__init__.py +1 -0
  163. topos/observability/alerts.py +7 -0
  164. topos/observability/metrics.py +25 -0
  165. topos/observability/tracing.py +18 -0
  166. topos/openai_client.py +69 -0
  167. topos/projections/__init__.py +1 -0
  168. topos/projections/vector_index/__init__.py +1 -0
  169. topos/projections/vector_index/base.py +21 -0
  170. topos/projections/vector_index/builders.py +11 -0
  171. topos/projections/vector_index/health_checks.py +5 -0
  172. topos/rate_limit.py +43 -0
  173. topos/sanitization/__init__.py +16 -0
  174. topos/sanitization/ollama_transforms.py +276 -0
  175. topos/scope_resolution.py +89 -0
  176. topos/services/__init__.py +1 -0
  177. topos/services/container.py +46 -0
  178. topos/services/embeddings/__init__.py +1 -0
  179. topos/services/embeddings/base.py +7 -0
  180. topos/services/embeddings/local.py +9 -0
  181. topos/services/embeddings/remote.py +9 -0
  182. topos/services/interfaces.py +40 -0
  183. topos/services/llm/__init__.py +1 -0
  184. topos/services/llm/base.py +7 -0
  185. topos/services/llm/openai.py +126 -0
  186. topos/services/local.py +123 -0
  187. topos/services/postgres.py +385 -0
  188. topos/sources/__init__.py +6 -0
  189. topos/sources/definitions.py +114 -0
  190. topos/sources/install_service.py +836 -0
  191. topos/sources/registry.py +263 -0
  192. topos/sources/runtime_install.py +427 -0
  193. topos/storage/__init__.py +1 -0
  194. topos/storage/canonical/__init__.py +18 -0
  195. topos/storage/canonical/ai_chat/__init__.py +22 -0
  196. topos/storage/canonical/ai_chat/canonicalizer.py +147 -0
  197. topos/storage/canonical/ai_chat/mapper.py +168 -0
  198. topos/storage/canonical/ai_chat/model.py +87 -0
  199. topos/storage/canonical/ai_chat/tables.py +179 -0
  200. topos/storage/canonical/canonical_store.py +24 -0
  201. topos/storage/canonical/conversations_tables.py +1020 -0
  202. topos/storage/canonical/mapping_store.py +30 -0
  203. topos/storage/canonical/postgres.py +10 -0
  204. topos/storage/db/__init__.py +1 -0
  205. topos/storage/db/client.py +8 -0
  206. topos/storage/db/migrations/__init__.py +1 -0
  207. topos/storage/db/migrations/stage9_column_renames.py +78 -0
  208. topos/storage/db/paths.py +122 -0
  209. topos/storage/db/postgres.py +240 -0
  210. topos/storage/db/schema.py +6 -0
  211. topos/storage/enrichment/__init__.py +1 -0
  212. topos/storage/enrichment/canonical_enrichment_store.py +7 -0
  213. topos/storage/enrichment/raw_enrichment_store.py +18 -0
  214. topos/storage/normalized/__init__.py +1 -0
  215. topos/storage/normalized/normalized_store.py +24 -0
  216. topos/storage/oplog/__init__.py +1 -0
  217. topos/storage/oplog/decision.py +6 -0
  218. topos/storage/oplog/oplog_store.py +17 -0
  219. topos/storage/oplog/postgres.py +10 -0
  220. topos/storage/projections/__init__.py +1 -0
  221. topos/storage/projections/index_ops_store.py +6 -0
  222. topos/storage/projections/vector_index_store.py +6 -0
  223. topos/storage/raw/__init__.py +1 -0
  224. topos/storage/raw/browser_flat_tables.py +303 -0
  225. topos/storage/raw/file_store.py +100 -0
  226. topos/storage/raw/raw_store.py +29 -0
  227. topos/storage/raw/raw_tables_manager.py +295 -0
  228. topos/storage/raw/sqlite_raw_store.py +17 -0
  229. topos/storage/security/encryption.py +21 -0
  230. topos/storage/signal_identity.py +71 -0
  231. topos/storage/source_settings.py +116 -0
  232. topos/storage/user_identity.py +69 -0
  233. topos/sync/__init__.py +5 -0
  234. topos/sync/client.py +272 -0
  235. topos/sync_handlers.py +70 -0
  236. topos/testing/__init__.py +1 -0
  237. topos/testing/lifespan.py +7 -0
  238. topos/uma_contact_enrichment.py +1032 -0
  239. topos/uma_filters.py +669 -0
  240. topos/uma_resource_id.py +24 -0
  241. topos/uma_rpt.py +69 -0
  242. topos/utils/base_object.py +61 -0
  243. topos/websocket_client.py +21 -0
  244. topos_node-0.1.0.dist-info/METADATA +199 -0
  245. topos_node-0.1.0.dist-info/RECORD +249 -0
  246. topos_node-0.1.0.dist-info/WHEEL +5 -0
  247. topos_node-0.1.0.dist-info/entry_points.txt +2 -0
  248. topos_node-0.1.0.dist-info/licenses/LICENSE +201 -0
  249. topos_node-0.1.0.dist-info/top_level.txt +2 -0
@@ -0,0 +1,276 @@
1
+ """
2
+ v1 LLM-backed field transforms via Ollama (e.g. llama3.2).
3
+
4
+ Configuration (precedence: device DB overrides → Settings / env → safe defaults):
5
+ - File/env: see `topos.config.settings` (SANITIZATION_OLLAMA_*).
6
+ - Device: `engine_config` key `sanitization_ollama_device` (JSON), via PUT /v1/sanitization-ollama-config.
7
+
8
+ If /api/chat returns 404 with a model-not-found error and SANITIZATION_OLLAMA_AUTO_PULL is true (default),
9
+ the engine pulls the model via Ollama /api/pull (see OllamaAdapter.ensure_model) then retries chat once.
10
+
11
+ Fail-open: on error, callers keep the original text.
12
+ """
13
+
14
+ from __future__ import annotations
15
+
16
+ import json
17
+ import logging
18
+ import re
19
+ import threading
20
+ from contextlib import contextmanager
21
+ from typing import Any, Dict, Final, Iterator, List, Optional, Tuple
22
+
23
+ from topos.config.sanitization_ollama import (
24
+ SANITIZATION_OLLAMA_TRANSFORM_IDS,
25
+ SanitizationOllamaEffective,
26
+ resolve_sanitization_ollama_effective,
27
+ )
28
+
29
+ logger = logging.getLogger("topos.sanitization.ollama")
30
+
31
+ # Serialize Ollama pulls when many UMA rows hit a missing model at once.
32
+ _pull_locks: Dict[Tuple[str, str], threading.Lock] = {}
33
+ _pull_registry_lock = threading.Lock()
34
+
35
+
36
+ @contextmanager
37
+ def _serialized_ollama_pull(host: str, model: str) -> Iterator[None]:
38
+ key = (host.rstrip("/"), model)
39
+ with _pull_registry_lock:
40
+ if key not in _pull_locks:
41
+ _pull_locks[key] = threading.Lock()
42
+ lock = _pull_locks[key]
43
+ with lock:
44
+ yield
45
+
46
+
47
+ def _response_suggests_missing_ollama_model(response: Any) -> bool:
48
+ """Heuristic: Ollama /api/chat 404 JSON error for an unknown local model tag."""
49
+ if getattr(response, "status_code", None) != 404:
50
+ return False
51
+ err = ""
52
+ try:
53
+ data = response.json()
54
+ if isinstance(data, dict):
55
+ err = str(data.get("error") or "")
56
+ except Exception:
57
+ err = str(getattr(response, "text", None) or "")
58
+ el = err.lower()
59
+ return "not found" in el and "model" in el
60
+
61
+ # Backward-compatible alias
62
+ OLLAMA_TRANSFORM_IDS: Final[tuple[str, ...]] = SANITIZATION_OLLAMA_TRANSFORM_IDS
63
+
64
+ _SYSTEM_STRICT: Final[str] = (
65
+ "You are a precise text processor for a privacy-preserving data export pipeline. "
66
+ "Follow instructions exactly. Output ONLY the requested result with no markdown fences, "
67
+ "no preamble, and no explanation unless the format explicitly asks for structured fields."
68
+ )
69
+
70
+
71
+ def _user_pii_redaction(text: str) -> str:
72
+ return f"""Redact personally identifiable information in the following text.
73
+ Replace each span of PII with a bracketed token: [NAME], [EMAIL], [PHONE], [ADDRESS], [ID], [URL], [DATE_OF_BIRTH], [ACCOUNT], [OTHER_PII].
74
+ Preserve structure (paragraphs, newlines) and non-PII wording. Do not invent content.
75
+
76
+ TEXT:
77
+ ---
78
+ {text}
79
+ ---
80
+ OUTPUT (redacted text only):"""
81
+
82
+
83
+ def _user_nsfw_sanitization(text: str) -> str:
84
+ return f"""Sanitize the following text for a general audience: remove or replace sexually explicit, graphically violent, or illegal-content descriptions with [REMOVED].
85
+ Keep the rest of the message meaning where possible. Do not add commentary.
86
+
87
+ TEXT:
88
+ ---
89
+ {text}
90
+ ---
91
+ OUTPUT (sanitized text only):"""
92
+
93
+
94
+ def _user_raw_to_summary(text: str, params: Dict[str, Any]) -> str:
95
+ style = str(params.get("style") or "neutral").strip() or "neutral"
96
+ max_len = params.get("max_length")
97
+ cap = ""
98
+ if isinstance(max_len, int) and max_len > 0:
99
+ cap = f" Maximum length: about {max_len} words."
100
+ return f"""Summarize the following text in a {style} tone.{cap}
101
+ Output a single concise paragraph.
102
+
103
+ TEXT:
104
+ ---
105
+ {text}
106
+ ---
107
+ OUTPUT (summary only):"""
108
+
109
+
110
+ def _user_raw_to_sentiment(text: str, params: Dict[str, Any]) -> str:
111
+ scale = str(params.get("scale") or "ternary").strip() or "ternary"
112
+ return f"""Classify the overall sentiment of the text on scale "{scale}".
113
+ Respond with EXACTLY one line of JSON (no markdown) with keys: "label" (string) and "confidence" (number 0-1).
114
+ Example: {{"label":"positive","confidence":0.71}}
115
+
116
+ TEXT:
117
+ ---
118
+ {text}
119
+ ---
120
+ OUTPUT (one JSON line only):"""
121
+
122
+
123
+ def _user_third_party_anonymization(text: str, params: Dict[str, Any]) -> str:
124
+ mode = str(params.get("mode") or "replace").strip() or "replace"
125
+ return f"""Anonymize third-party identities in the text (other people, companies, or clients mentioned by name).
126
+ Mode: {mode}. Replace identifiable third-party names with [PERSON_1], [ORG_1], etc. Keep the narrator's own voice and first-person references unchanged if clearly "I/me/my".
127
+ Do not remove non-identifying roles ("my therapist", "a colleague") unless they include a name.
128
+
129
+ TEXT:
130
+ ---
131
+ {text}
132
+ ---
133
+ OUTPUT (anonymized text only):"""
134
+
135
+
136
+ def _user_name_removal(text: str) -> str:
137
+ return f"""Remove personal names (people and well-known public figures) from the text. Replace each removed name with [NAME].
138
+ Keep dates, numbers, and non-name entities. Preserve readability.
139
+
140
+ TEXT:
141
+ ---
142
+ {text}
143
+ ---
144
+ OUTPUT (text only):"""
145
+
146
+
147
+ def _user_contact_removal(text: str) -> str:
148
+ return f"""Remove contact details: email addresses, phone numbers, street addresses, social handles (@user), and messaging IDs.
149
+ Replace each removed span with [CONTACT]. Keep the rest of the message.
150
+
151
+ TEXT:
152
+ ---
153
+ {text}
154
+ ---
155
+ OUTPUT (text only):"""
156
+
157
+
158
+ def _build_messages(transform_id: str, text: str, params: Dict[str, Any]) -> List[Dict[str, str]]:
159
+ params = params or {}
160
+ builders = {
161
+ "pii_redaction": lambda: _user_pii_redaction(text),
162
+ "nsfw_sanitization": lambda: _user_nsfw_sanitization(text),
163
+ "raw_to_summary": lambda: _user_raw_to_summary(text, params),
164
+ "raw_to_sentiment": lambda: _user_raw_to_sentiment(text, params),
165
+ "third_party_anonymization": lambda: _user_third_party_anonymization(text, params),
166
+ "name_removal": lambda: _user_name_removal(text),
167
+ "contact_removal": lambda: _user_contact_removal(text),
168
+ }
169
+ fn = builders.get(transform_id)
170
+ if fn is None:
171
+ raise ValueError(f"No Ollama prompt builder for transform_id={transform_id!r}")
172
+ user_content = fn()
173
+ return [
174
+ {"role": "system", "content": _SYSTEM_STRICT},
175
+ {"role": "user", "content": user_content},
176
+ ]
177
+
178
+
179
+ def ollama_sanitization_enabled() -> bool:
180
+ """Whether sanitization is enabled (Settings + device overrides)."""
181
+ from topos.config.settings import settings
182
+ from topos.core.state import get_db_connection
183
+
184
+ return resolve_sanitization_ollama_effective(settings, get_db_connection()).enabled
185
+
186
+
187
+ def _ollama_chat(
188
+ messages: List[Dict[str, str]],
189
+ *,
190
+ host: str,
191
+ model: str,
192
+ timeout_sec: float,
193
+ auto_pull: bool = True,
194
+ ) -> str:
195
+ import httpx
196
+
197
+ url = f"{host.rstrip('/')}/api/chat"
198
+ body = {"model": model, "messages": messages, "stream": False}
199
+ with httpx.Client(timeout=timeout_sec) as client:
200
+ resp = client.post(url, json=body)
201
+ if (
202
+ auto_pull
203
+ and resp.status_code == 404
204
+ and _response_suggests_missing_ollama_model(resp)
205
+ ):
206
+ logger.info(
207
+ "Ollama model %r not available at %s; pulling via /api/pull then retrying /api/chat",
208
+ model,
209
+ host.rstrip("/"),
210
+ )
211
+ with _serialized_ollama_pull(host, model):
212
+ from topos.engine.backends.ollama import OllamaAdapter
213
+
214
+ pulled = OllamaAdapter(base_url=host).ensure_model(model)
215
+ if pulled:
216
+ logger.info("Sanitization: finished pulling Ollama model %r", model)
217
+ resp = client.post(url, json=body)
218
+ resp.raise_for_status()
219
+ data = resp.json()
220
+ msg = data.get("message") or {}
221
+ content = msg.get("content")
222
+ if not isinstance(content, str):
223
+ raise ValueError("Ollama response missing message.content")
224
+ return content.strip()
225
+
226
+
227
+ def _strip_fences(s: str) -> str:
228
+ s = s.strip()
229
+ m = re.match(r"^```(?:\w+)?\s*\n?(.*?)\n?```\s*$", s, re.DOTALL)
230
+ return m.group(1).strip() if m else s
231
+
232
+
233
+ def apply_text_transform_with_ollama(
234
+ text: str,
235
+ transform_id: str,
236
+ params: Optional[Dict[str, Any]] = None,
237
+ *,
238
+ effective: SanitizationOllamaEffective,
239
+ model_override: Optional[str] = None,
240
+ ) -> str:
241
+ """
242
+ Run a single catalog transform on string `text` via Ollama.
243
+ `effective` should be from `resolve_sanitization_ollama_effective` (caller resolves once per batch).
244
+ `model_override`: when set (e.g. Filter Lab compare), use this Ollama tag instead of per-transform model.
245
+ """
246
+ if transform_id not in SANITIZATION_OLLAMA_TRANSFORM_IDS:
247
+ raise ValueError(f"transform_id {transform_id!r} is not handled by Ollama sanitization")
248
+
249
+ model = (model_override or "").strip() or effective.models.get(transform_id) or effective.default_model
250
+ max_in = effective.max_input_chars
251
+ if max_in > 0 and len(text) > max_in:
252
+ text = text[:max_in] + "\n[TRUNCATED_FOR_LLM]"
253
+
254
+ messages = _build_messages(transform_id, text, dict(params or {}))
255
+ raw = _ollama_chat(
256
+ messages,
257
+ host=effective.host,
258
+ model=model,
259
+ timeout_sec=effective.timeout_sec,
260
+ auto_pull=effective.auto_pull,
261
+ )
262
+ out = _strip_fences(raw)
263
+
264
+ if transform_id == "raw_to_sentiment":
265
+ try:
266
+ line = out.splitlines()[0].strip()
267
+ parsed = json.loads(line)
268
+ label = str(parsed.get("label", "unknown"))
269
+ conf = parsed.get("confidence")
270
+ if isinstance(conf, (int, float)):
271
+ return f"{label} ({float(conf):.2f})"
272
+ return label
273
+ except Exception:
274
+ return out[:500]
275
+
276
+ return out
@@ -0,0 +1,89 @@
1
+ # Scope → canonical table resolution (Sprint 05 Stage 1)
2
+ # Maps MVP scope IDs from RPT to canonical table names. Reference: roles_scopes/MVP_TAXONOMY.md §4
3
+
4
+ from __future__ import annotations
5
+
6
+ from typing import List, Optional, Set
7
+
8
+ # Scope ID -> canonical table(s). One scope can map to one or more tables.
9
+ # Table names match MVP_TAXONOMY §4 (messages, ai_messages, events, ai_chat, activity, journal, Profile).
10
+ SCOPE_TO_TABLES: dict[str, list[str]] = {
11
+ "messages:read": ["messages"],
12
+ "messages:write": ["messages"],
13
+ "aiMessages:read": ["ai_messages"],
14
+ "events:read": ["events"],
15
+ "events:write": ["events"],
16
+ "aiChat:read": ["ai_chat"],
17
+ "activity:read": ["activity"],
18
+ "activity:write": ["activity"],
19
+ "journal:read": ["journal"],
20
+ "journal:write": ["journal"],
21
+ "activitySummary:read": ["activity_summary"],
22
+ "wellnessSummary:read": ["wellness_summary"],
23
+ "publicBio:read": ["public_bio"],
24
+ "contacts:resolve": ["contacts", "contact_identifiers"],
25
+ "all:read": ["*"],
26
+ "all:write": ["*"],
27
+ }
28
+
29
+ # All canonical table names (for "all" resolution). Order doesn't matter.
30
+ ALL_CANONICAL_TABLES: set[str] = {
31
+ "messages",
32
+ "ai_messages",
33
+ "events",
34
+ "ai_chat",
35
+ "activity",
36
+ "journal",
37
+ "activity_summary",
38
+ "wellness_summary",
39
+ "public_bio",
40
+ "contacts",
41
+ "contact_identifiers",
42
+ }
43
+
44
+
45
+ def resolve_scopes_to_tables(scope_ids: Optional[List[str]]) -> Set[str]:
46
+ """
47
+ Map MVP scope IDs to canonical table names.
48
+ If all:read or all:write is in scope_ids, returns ALL_CANONICAL_TABLES.
49
+ Otherwise returns the union of tables for each scope in scope_ids.
50
+ """
51
+ if not scope_ids:
52
+ return set()
53
+ tables: Set[str] = set()
54
+ for s in scope_ids:
55
+ s = (s or "").strip()
56
+ if not s:
57
+ continue
58
+ if s in ("all:read", "all:write"):
59
+ return set(ALL_CANONICAL_TABLES)
60
+ if s in SCOPE_TO_TABLES:
61
+ mapped = SCOPE_TO_TABLES[s]
62
+ if "*" in mapped:
63
+ return set(ALL_CANONICAL_TABLES)
64
+ tables.update(mapped)
65
+ return tables
66
+
67
+
68
+ def may_access_table(allowed_tables: Set[str], table_or_alias: str) -> bool:
69
+ """
70
+ True if the allowed set (from resolve_scopes_to_tables) permits access to the given table.
71
+ table_or_alias can be canonical name (e.g. messages) or implementation table (e.g. ai_chat_messages).
72
+ """
73
+ if not allowed_tables:
74
+ return False
75
+ if "*" in allowed_tables or "all" in allowed_tables:
76
+ return True
77
+ # Exact match
78
+ if table_or_alias in allowed_tables:
79
+ return True
80
+ if table_or_alias == "conversation_messages" and "messages" in allowed_tables:
81
+ return True
82
+ # Implementation detail: ai_chat_messages table holds ai_chat and ai_messages data
83
+ if table_or_alias == "ai_chat_messages" and ("ai_chat" in allowed_tables or "ai_messages" in allowed_tables):
84
+ return True
85
+ if table_or_alias == "messages" and "messages" in allowed_tables:
86
+ return True
87
+ if table_or_alias in ("contacts", "contact_identifiers"):
88
+ return "contacts" in allowed_tables and "contact_identifiers" in allowed_tables
89
+ return False
@@ -0,0 +1 @@
1
+ """Service interfaces and adapters for Topos."""
@@ -0,0 +1,46 @@
1
+ from __future__ import annotations
2
+
3
+ from dataclasses import dataclass
4
+
5
+ from ..config.settings import settings
6
+ from .embeddings.base import EmbeddingsService
7
+ from .embeddings.local import LocalEmbeddingsService
8
+ from .embeddings.remote import RemoteEmbeddingsService
9
+ from .interfaces import DbService, DeviceService, LLMService, SyncService
10
+ from .local import LocalDbService, LocalDeviceService, LocalSyncService
11
+ from .llm.openai import OpenAILLMService
12
+ from .postgres import HostedDeviceService, HostedSyncService, PostgresDbService
13
+
14
+
15
+ @dataclass(frozen=True)
16
+ class Services:
17
+ db: DbService
18
+ sync: SyncService
19
+ device: DeviceService
20
+ llm: LLMService
21
+ embeddings: EmbeddingsService
22
+
23
+
24
+ _services: Services | None = None
25
+
26
+
27
+ def get_services() -> Services:
28
+ global _services
29
+ if _services is None:
30
+ if settings.topos_database_mode == "postgres":
31
+ _services = Services(
32
+ db=PostgresDbService(),
33
+ sync=HostedSyncService(),
34
+ device=HostedDeviceService(),
35
+ llm=OpenAILLMService(),
36
+ embeddings=RemoteEmbeddingsService(),
37
+ )
38
+ else:
39
+ _services = Services(
40
+ db=LocalDbService(),
41
+ sync=LocalSyncService(),
42
+ device=LocalDeviceService(),
43
+ llm=OpenAILLMService(),
44
+ embeddings=LocalEmbeddingsService(),
45
+ )
46
+ return _services
@@ -0,0 +1 @@
1
+ """Embeddings service adapters."""
@@ -0,0 +1,7 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import List, Protocol
4
+
5
+
6
+ class EmbeddingsService(Protocol):
7
+ async def embed(self, texts: List[str]) -> List[List[float]]: ...
@@ -0,0 +1,9 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import List
4
+
5
+
6
+ class LocalEmbeddingsService:
7
+ async def embed(self, texts: List[str]) -> List[List[float]]:
8
+ _ = texts
9
+ raise NotImplementedError("Local embeddings not implemented yet")
@@ -0,0 +1,9 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import List
4
+
5
+
6
+ class RemoteEmbeddingsService:
7
+ async def embed(self, texts: List[str]) -> List[List[float]]:
8
+ _ = texts
9
+ raise NotImplementedError("Remote embeddings not implemented yet")
@@ -0,0 +1,40 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import Any, Dict, Optional, Protocol
4
+
5
+ from ..core.api_models import (
6
+ DeviceInfoResponse,
7
+ DeviceNameResponse,
8
+ PairDeviceResponse,
9
+ PairingCodeResponse,
10
+ StoreMessageResponse,
11
+ SyncDatabaseResponse,
12
+ SyncResponse,
13
+ )
14
+
15
+
16
+ class DbService(Protocol):
17
+ async def store_message(self, payload: Dict[str, Any]) -> StoreMessageResponse: ...
18
+ async def get_oplog(self, dataset_id: Optional[str], limit: int, offset: int) -> Dict[str, Any]: ...
19
+ async def get_messages(self, dataset_id: Optional[str], limit: int, offset: int) -> Dict[str, Any]: ...
20
+ async def replay_projection(self, dataset_id: Optional[str]) -> Dict[str, Any]: ...
21
+ async def reset_database(self) -> Dict[str, Any]: ...
22
+ async def sync_database(self) -> SyncDatabaseResponse: ...
23
+ async def backup_database(self, encrypted: bool) -> Any: ...
24
+ async def restore_database(self, file, authenticated_user_id: str, encrypted: bool) -> Dict[str, Any]: ...
25
+
26
+
27
+ class SyncService(Protocol):
28
+ async def trigger_sync(self) -> SyncResponse: ...
29
+
30
+
31
+ class DeviceService(Protocol):
32
+ async def get_pairing_code(self) -> PairingCodeResponse: ...
33
+ async def pair_device(self, pairing_code: str, keep_existing_data: bool) -> PairDeviceResponse: ...
34
+ async def get_device_info(self) -> DeviceInfoResponse: ...
35
+ async def set_device_name(self, device_name: str) -> DeviceNameResponse: ...
36
+
37
+
38
+ class LLMService(Protocol):
39
+ async def generate(self, payload: Dict[str, Any]) -> Dict[str, Any]: ...
40
+ async def list_ollama_models(self) -> Dict[str, Any]: ...
@@ -0,0 +1 @@
1
+ """LLM service adapters."""
@@ -0,0 +1,7 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import Dict, Protocol
4
+
5
+
6
+ class LLMService(Protocol):
7
+ async def generate(self, payload: Dict[str, str]) -> Dict[str, str]: ...
@@ -0,0 +1,126 @@
1
+ from __future__ import annotations
2
+
3
+ import logging
4
+ from typing import Any, Dict
5
+
6
+ import httpx
7
+ from fastapi import HTTPException, status
8
+
9
+ from ...core import state
10
+ from ...openai_client import OpenAIError
11
+ from ...config.settings import settings
12
+
13
+ logger = logging.getLogger("topos.services.llm")
14
+
15
+
16
+ def _normalize_provider(raw: Any) -> str:
17
+ if raw is None:
18
+ return "openai"
19
+ if isinstance(raw, str):
20
+ v = raw.lower().strip()
21
+ if v in ("openai", "ollama"):
22
+ return v
23
+ return "openai"
24
+
25
+
26
+ async def _ollama_generate(payload: Dict[str, Any]) -> Dict[str, Any]:
27
+ prompt = payload.get("prompt") or ""
28
+ model_raw = payload.get("model")
29
+ model = (
30
+ (model_raw.strip() if isinstance(model_raw, str) else "")
31
+ or settings.sanitization_ollama_default_model
32
+ )
33
+ max_tokens = payload.get("max_tokens")
34
+ temperature = payload.get("temperature")
35
+ base = settings.engine_ollama_base_url.rstrip("/")
36
+ body: Dict[str, Any] = {"model": model, "prompt": prompt, "stream": False}
37
+ opts: Dict[str, Any] = {}
38
+ if max_tokens is not None:
39
+ opts["num_predict"] = max_tokens
40
+ if temperature is not None:
41
+ opts["temperature"] = temperature
42
+ if opts:
43
+ body["options"] = opts
44
+ timeout = httpx.Timeout(settings.sanitization_ollama_timeout_sec, connect=10.0)
45
+ logger.info(
46
+ "Ollama generate: model=%r base=%s prompt_chars=%d max_tokens=%s temperature=%s",
47
+ model,
48
+ base,
49
+ len(prompt),
50
+ max_tokens,
51
+ temperature,
52
+ )
53
+ try:
54
+ async with httpx.AsyncClient(timeout=timeout) as client:
55
+ r = await client.post(f"{base}/api/generate", json=body)
56
+ except httpx.RequestError as exc:
57
+ raise HTTPException(
58
+ status_code=status.HTTP_502_BAD_GATEWAY,
59
+ detail=f"Ollama unreachable at {base}: {exc}",
60
+ ) from exc
61
+ if r.status_code >= 400:
62
+ detail = (r.text or str(r.status_code))[:800]
63
+ raise HTTPException(status_code=status.HTTP_502_BAD_GATEWAY, detail=f"Ollama error: {detail}")
64
+ data = r.json()
65
+ text = (data.get("response") or "").strip()
66
+ resp_model = str(data.get("model") or model)
67
+ logger.info(
68
+ "Ollama generate complete: model=%r response_chars=%d",
69
+ resp_model,
70
+ len(text),
71
+ )
72
+ return {"output": text, "model": resp_model, "usage": {}}
73
+
74
+
75
+ async def _ollama_list_model_names() -> list[str]:
76
+ base = settings.engine_ollama_base_url.rstrip("/")
77
+ timeout = httpx.Timeout(settings.sanitization_ollama_timeout_sec, connect=10.0)
78
+ logger.info("Ollama list models: base=%s", base)
79
+ try:
80
+ async with httpx.AsyncClient(timeout=timeout) as client:
81
+ r = await client.get(f"{base}/api/tags")
82
+ except httpx.RequestError as exc:
83
+ raise HTTPException(
84
+ status_code=status.HTTP_502_BAD_GATEWAY,
85
+ detail=f"Ollama unreachable at {base}: {exc}",
86
+ ) from exc
87
+ if r.status_code >= 400:
88
+ detail = (r.text or str(r.status_code))[:800]
89
+ raise HTTPException(status_code=status.HTTP_502_BAD_GATEWAY, detail=f"Ollama error: {detail}")
90
+ data = r.json()
91
+ names = [str(m.get("name", "")).strip() for m in data.get("models", []) if m.get("name")]
92
+ unique = sorted(set(names))
93
+ logger.info("Ollama list models complete: count=%d", len(unique))
94
+ return unique
95
+
96
+
97
+ class OpenAILLMService:
98
+ async def list_ollama_models(self) -> Dict[str, Any]:
99
+ if not settings.enable_llm or state.get_engine_mode() != "full":
100
+ raise HTTPException(status_code=status.HTTP_403_FORBIDDEN, detail="LLM is disabled")
101
+ models = await _ollama_list_model_names()
102
+ return {"models": models}
103
+
104
+ async def generate(self, payload: Dict[str, Any]) -> Dict[str, Any]:
105
+ if not settings.enable_llm or state.get_engine_mode() != "full":
106
+ raise HTTPException(status_code=status.HTTP_403_FORBIDDEN, detail="LLM is disabled")
107
+
108
+ provider = _normalize_provider(payload.get("provider"))
109
+ if provider == "ollama":
110
+ logger.info("LLM generate routed to Ollama (model=%r)", payload.get("model"))
111
+ return await _ollama_generate(payload)
112
+
113
+ if not settings.openai_api_key:
114
+ raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail="OPENAI_API_KEY is not set")
115
+ try:
116
+ result = await state.openai_client.generate(
117
+ prompt=payload.get("prompt", ""),
118
+ max_tokens=payload.get("max_tokens"),
119
+ temperature=payload.get("temperature"),
120
+ )
121
+ return {"output": result["output"], "model": settings.openai_model, "usage": result["usage"]}
122
+ except OpenAIError as exc:
123
+ detail = str(exc)
124
+ if "rate_limited" in detail:
125
+ raise HTTPException(status_code=status.HTTP_429_TOO_MANY_REQUESTS, detail="LLM rate limited") from exc
126
+ raise HTTPException(status_code=status.HTTP_502_BAD_GATEWAY, detail="LLM upstream error") from exc