topos-node 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (249) hide show
  1. shared/__init__.py +59 -0
  2. shared/filtering.py +640 -0
  3. shared/schema_registry.py +229 -0
  4. topos/__init__.py +5 -0
  5. topos/__version__.py +6 -0
  6. topos/analytics/__init__.py +15 -0
  7. topos/analytics/duckdb_adapter.py +48 -0
  8. topos/analytics/messenger_communities.py +349 -0
  9. topos/analytics/messenger_graph.py +522 -0
  10. topos/analytics/messenger_labels.py +321 -0
  11. topos/analytics/profiles.py +22 -0
  12. topos/analytics/query_engine.py +64 -0
  13. topos/analytics/raw_queries.py +174 -0
  14. topos/api/__init__.py +1 -0
  15. topos/api/analytics.py +52 -0
  16. topos/api/app_registry.py +31 -0
  17. topos/api/backup.py +15 -0
  18. topos/api/compute_remote.py +175 -0
  19. topos/api/data_commit.py +158 -0
  20. topos/api/data_explorer_table_prefs.py +81 -0
  21. topos/api/db.py +10 -0
  22. topos/api/device.py +25 -0
  23. topos/api/enrichment.py +959 -0
  24. topos/api/filter_lab.py +195 -0
  25. topos/api/health.py +61 -0
  26. topos/api/ingestion_api.py +37 -0
  27. topos/api/ingestion_compat.py +21 -0
  28. topos/api/ingestion_sources.py +600 -0
  29. topos/api/llm.py +76 -0
  30. topos/api/local_mcp.py +46 -0
  31. topos/api/messenger_analytics.py +385 -0
  32. topos/api/query_api.py +13 -0
  33. topos/api/sanitization_ollama_config.py +64 -0
  34. topos/api/source_install.py +324 -0
  35. topos/api/sources.py +13 -0
  36. topos/api/sync.py +10 -0
  37. topos/api/ui_config.py +83 -0
  38. topos/api/uma_data.py +311 -0
  39. topos/api/usage.py +49 -0
  40. topos/api/user_identity.py +46 -0
  41. topos/app.py +239 -0
  42. topos/auth.py +17 -0
  43. topos/canonicalization/__init__.py +1 -0
  44. topos/canonicalization/mappers/__init__.py +22 -0
  45. topos/canonicalization/mappers/base.py +26 -0
  46. topos/canonicalization/mappers/chatgpt_mapper.py +40 -0
  47. topos/canonicalization/mappers/grok_mapper.py +17 -0
  48. topos/canonicalization/mappers/messenger_mapper.py +58 -0
  49. topos/canonicalization/models.py +31 -0
  50. topos/canonicalization/resolver.py +23 -0
  51. topos/cli/__init__.py +1 -0
  52. topos/cli/__main__.py +6 -0
  53. topos/cli/commands.py +132 -0
  54. topos/config/__init__.py +1 -0
  55. topos/config/sanitization_ollama.py +189 -0
  56. topos/config/settings.py +310 -0
  57. topos/contacts/__init__.py +5 -0
  58. topos/contacts/identity.py +24 -0
  59. topos/control_plane_client.py +300 -0
  60. topos/core/__init__.py +1 -0
  61. topos/core/api_models.py +128 -0
  62. topos/core/connection_resilience.py +99 -0
  63. topos/core/device_helpers.py +8 -0
  64. topos/core/errors.py +13 -0
  65. topos/core/events.py +12 -0
  66. topos/core/handlers.py +5625 -0
  67. topos/core/logging.py +175 -0
  68. topos/core/metrics.py +21 -0
  69. topos/core/startup_banner.py +62 -0
  70. topos/core/state.py +682 -0
  71. topos/core/table_layers.py +45 -0
  72. topos/core/types.py +13 -0
  73. topos/data_explorer_table_prefs.py +150 -0
  74. topos/engine/__init__.py +29 -0
  75. topos/engine/backends/__init__.py +50 -0
  76. topos/engine/backends/base.py +21 -0
  77. topos/engine/backends/huggingface.py +151 -0
  78. topos/engine/backends/ollama.py +181 -0
  79. topos/engine/backends/stub.py +22 -0
  80. topos/engine/engine.py +165 -0
  81. topos/engine/intake.py +32 -0
  82. topos/engine/queue_manager.py +112 -0
  83. topos/engine/registration.py +126 -0
  84. topos/engine/result_formatter.py +38 -0
  85. topos/engine/router.py +19 -0
  86. topos/engine/scoped_token.py +82 -0
  87. topos/engine/tasks.py +154 -0
  88. topos/engine/transport.py +44 -0
  89. topos/engine/usage_guard.py +100 -0
  90. topos/engine/usage_observation.py +129 -0
  91. topos/engine/validator.py +23 -0
  92. topos/enrichment/__init__.py +1 -0
  93. topos/enrichment/derived_tables.py +214 -0
  94. topos/enrichment/jobs/__init__.py +30 -0
  95. topos/enrichment/jobs/base.py +54 -0
  96. topos/enrichment/jobs/canonical/__init__.py +1 -0
  97. topos/enrichment/jobs/canonical/embeddings_job.py +27 -0
  98. topos/enrichment/jobs/canonical/emo_27_job.py +97 -0
  99. topos/enrichment/jobs/canonical/entities_job.py +27 -0
  100. topos/enrichment/jobs/canonical/sentiment_job.py +27 -0
  101. topos/enrichment/jobs/canonical/topics_job.py +27 -0
  102. topos/enrichment/jobs/raw/__init__.py +1 -0
  103. topos/enrichment/jobs/raw/attachments_job.py +12 -0
  104. topos/enrichment/jobs/raw/language_job.py +12 -0
  105. topos/enrichment/jobs/raw/time_normalization_job.py +12 -0
  106. topos/enrichment/jobs/raw/tool_calls_job.py +12 -0
  107. topos/enrichment/models/__init__.py +1 -0
  108. topos/enrichment/models/manager.py +8 -0
  109. topos/enrichment/models/registry.py +71 -0
  110. topos/enrichment/models/versioning.py +8 -0
  111. topos/enrichment/orchestrator.py +177 -0
  112. topos/enrichment/processor.py +17 -0
  113. topos/enrichment/progress_bar.py +122 -0
  114. topos/enrichment/website_classifier.py +31 -0
  115. topos/filter_lab/__init__.py +1 -0
  116. topos/filter_lab/bundles.py +300 -0
  117. topos/filter_lab/schema.py +86 -0
  118. topos/filter_lab/service.py +167 -0
  119. topos/filter_lab/store.py +374 -0
  120. topos/filter_lab/worker.py +250 -0
  121. topos/hosted_pool_lease.py +153 -0
  122. topos/ingestion/__init__.py +1 -0
  123. topos/ingestion/checkpoints/__init__.py +6 -0
  124. topos/ingestion/checkpoints/checkpoint_store.py +24 -0
  125. topos/ingestion/checkpoints/sqlite_checkpoint_store.py +82 -0
  126. topos/ingestion/ingest_helpers.py +504 -0
  127. topos/ingestion/jobs.py +91 -0
  128. topos/ingestion/local_sync.py +823 -0
  129. topos/ingestion/log_preview.py +21 -0
  130. topos/ingestion/manager.py +1100 -0
  131. topos/ingestion/parser.py +174 -0
  132. topos/ingestion/parsers/__init__.py +32 -0
  133. topos/ingestion/parsers/base.py +24 -0
  134. topos/ingestion/parsers/browser_parser.py +171 -0
  135. topos/ingestion/parsers/calendar_parser.py +21 -0
  136. topos/ingestion/parsers/chatgpt_conversation_flattener.py +266 -0
  137. topos/ingestion/parsers/chatgpt_parser.py +67 -0
  138. topos/ingestion/parsers/grok_parser.py +21 -0
  139. topos/ingestion/parsers/messenger_parser.py +97 -0
  140. topos/ingestion/progress.py +54 -0
  141. topos/ingestion/sources/__init__.py +20 -0
  142. topos/ingestion/sources/base.py +39 -0
  143. topos/ingestion/sources/calendar.py +29 -0
  144. topos/ingestion/sources/chatgpt.py +29 -0
  145. topos/ingestion/sources/contact_importers.py +274 -0
  146. topos/ingestion/sources/grok.py +29 -0
  147. topos/ingestion/sources/imessage_reader.py +479 -0
  148. topos/ingestion/sources/signal_export_parser.py +132 -0
  149. topos/ingestion/sources/signal_reader.py +491 -0
  150. topos/ingestion/state_machine.py +70 -0
  151. topos/ingestion/triggers/__init__.py +1 -0
  152. topos/ingestion/triggers/file_trigger.py +36 -0
  153. topos/ingestion/triggers/sqlite_trigger.py +18 -0
  154. topos/ingestion/validation/__init__.py +1 -0
  155. topos/ingestion/validation/base.py +27 -0
  156. topos/ingestion/validation/schema_registry.py +111 -0
  157. topos/ingestion/validation/schema_validator.py +13 -0
  158. topos/lineage/__init__.py +1 -0
  159. topos/lineage/provenance.py +9 -0
  160. topos/lineage/tracker.py +9 -0
  161. topos/mcp_stdio_proxy.py +83 -0
  162. topos/observability/__init__.py +1 -0
  163. topos/observability/alerts.py +7 -0
  164. topos/observability/metrics.py +25 -0
  165. topos/observability/tracing.py +18 -0
  166. topos/openai_client.py +69 -0
  167. topos/projections/__init__.py +1 -0
  168. topos/projections/vector_index/__init__.py +1 -0
  169. topos/projections/vector_index/base.py +21 -0
  170. topos/projections/vector_index/builders.py +11 -0
  171. topos/projections/vector_index/health_checks.py +5 -0
  172. topos/rate_limit.py +43 -0
  173. topos/sanitization/__init__.py +16 -0
  174. topos/sanitization/ollama_transforms.py +276 -0
  175. topos/scope_resolution.py +89 -0
  176. topos/services/__init__.py +1 -0
  177. topos/services/container.py +46 -0
  178. topos/services/embeddings/__init__.py +1 -0
  179. topos/services/embeddings/base.py +7 -0
  180. topos/services/embeddings/local.py +9 -0
  181. topos/services/embeddings/remote.py +9 -0
  182. topos/services/interfaces.py +40 -0
  183. topos/services/llm/__init__.py +1 -0
  184. topos/services/llm/base.py +7 -0
  185. topos/services/llm/openai.py +126 -0
  186. topos/services/local.py +123 -0
  187. topos/services/postgres.py +385 -0
  188. topos/sources/__init__.py +6 -0
  189. topos/sources/definitions.py +114 -0
  190. topos/sources/install_service.py +836 -0
  191. topos/sources/registry.py +263 -0
  192. topos/sources/runtime_install.py +427 -0
  193. topos/storage/__init__.py +1 -0
  194. topos/storage/canonical/__init__.py +18 -0
  195. topos/storage/canonical/ai_chat/__init__.py +22 -0
  196. topos/storage/canonical/ai_chat/canonicalizer.py +147 -0
  197. topos/storage/canonical/ai_chat/mapper.py +168 -0
  198. topos/storage/canonical/ai_chat/model.py +87 -0
  199. topos/storage/canonical/ai_chat/tables.py +179 -0
  200. topos/storage/canonical/canonical_store.py +24 -0
  201. topos/storage/canonical/conversations_tables.py +1020 -0
  202. topos/storage/canonical/mapping_store.py +30 -0
  203. topos/storage/canonical/postgres.py +10 -0
  204. topos/storage/db/__init__.py +1 -0
  205. topos/storage/db/client.py +8 -0
  206. topos/storage/db/migrations/__init__.py +1 -0
  207. topos/storage/db/migrations/stage9_column_renames.py +78 -0
  208. topos/storage/db/paths.py +122 -0
  209. topos/storage/db/postgres.py +240 -0
  210. topos/storage/db/schema.py +6 -0
  211. topos/storage/enrichment/__init__.py +1 -0
  212. topos/storage/enrichment/canonical_enrichment_store.py +7 -0
  213. topos/storage/enrichment/raw_enrichment_store.py +18 -0
  214. topos/storage/normalized/__init__.py +1 -0
  215. topos/storage/normalized/normalized_store.py +24 -0
  216. topos/storage/oplog/__init__.py +1 -0
  217. topos/storage/oplog/decision.py +6 -0
  218. topos/storage/oplog/oplog_store.py +17 -0
  219. topos/storage/oplog/postgres.py +10 -0
  220. topos/storage/projections/__init__.py +1 -0
  221. topos/storage/projections/index_ops_store.py +6 -0
  222. topos/storage/projections/vector_index_store.py +6 -0
  223. topos/storage/raw/__init__.py +1 -0
  224. topos/storage/raw/browser_flat_tables.py +303 -0
  225. topos/storage/raw/file_store.py +100 -0
  226. topos/storage/raw/raw_store.py +29 -0
  227. topos/storage/raw/raw_tables_manager.py +295 -0
  228. topos/storage/raw/sqlite_raw_store.py +17 -0
  229. topos/storage/security/encryption.py +21 -0
  230. topos/storage/signal_identity.py +71 -0
  231. topos/storage/source_settings.py +116 -0
  232. topos/storage/user_identity.py +69 -0
  233. topos/sync/__init__.py +5 -0
  234. topos/sync/client.py +272 -0
  235. topos/sync_handlers.py +70 -0
  236. topos/testing/__init__.py +1 -0
  237. topos/testing/lifespan.py +7 -0
  238. topos/uma_contact_enrichment.py +1032 -0
  239. topos/uma_filters.py +669 -0
  240. topos/uma_resource_id.py +24 -0
  241. topos/uma_rpt.py +69 -0
  242. topos/utils/base_object.py +61 -0
  243. topos/websocket_client.py +21 -0
  244. topos_node-0.1.0.dist-info/METADATA +199 -0
  245. topos_node-0.1.0.dist-info/RECORD +249 -0
  246. topos_node-0.1.0.dist-info/WHEEL +5 -0
  247. topos_node-0.1.0.dist-info/entry_points.txt +2 -0
  248. topos_node-0.1.0.dist-info/licenses/LICENSE +201 -0
  249. topos_node-0.1.0.dist-info/top_level.txt +2 -0
@@ -0,0 +1,22 @@
1
+ """Canonical mapper registry."""
2
+
3
+ from .base import CanonicalMapper
4
+ from .chatgpt_mapper import ChatGPTCanonicalMapper
5
+ from .grok_mapper import GrokCanonicalMapper
6
+ from .messenger_mapper import ImessageCanonicalMapper, SignalCanonicalMapper
7
+
8
+ MAPPER_REGISTRY = {
9
+ "chatgpt": ChatGPTCanonicalMapper,
10
+ "grok": GrokCanonicalMapper,
11
+ "imessage": ImessageCanonicalMapper, # Sprint 02: conversations canonical group
12
+ "signal": SignalCanonicalMapper,
13
+ }
14
+
15
+ __all__ = [
16
+ "CanonicalMapper",
17
+ "ChatGPTCanonicalMapper",
18
+ "GrokCanonicalMapper",
19
+ "ImessageCanonicalMapper",
20
+ "SignalCanonicalMapper",
21
+ "MAPPER_REGISTRY",
22
+ ]
@@ -0,0 +1,26 @@
1
+ from __future__ import annotations
2
+
3
+ from dataclasses import dataclass
4
+ from typing import Dict
5
+
6
+ from ...ingestion.parsers.base import NormalizedRecord
7
+
8
+
9
+ @dataclass(frozen=True)
10
+ class CanonicalRecord:
11
+ record_id: str
12
+ payload: Dict[str, str]
13
+
14
+
15
+ @dataclass(frozen=True)
16
+ class MappingMetadata:
17
+ source_id: str
18
+ mapping_version: str
19
+
20
+
21
+ class CanonicalMapper:
22
+ def map(self, normalized: NormalizedRecord) -> CanonicalRecord:
23
+ raise NotImplementedError
24
+
25
+ def mapping_metadata(self, normalized: NormalizedRecord) -> MappingMetadata:
26
+ raise NotImplementedError
@@ -0,0 +1,40 @@
1
+ from __future__ import annotations
2
+
3
+ import hashlib
4
+ from dataclasses import dataclass
5
+
6
+ from ...ingestion.parsers.base import NormalizedRecord
7
+ from ..models import CanonicalMessage
8
+ from .base import CanonicalMapper, CanonicalRecord, MappingMetadata
9
+
10
+
11
+ @dataclass
12
+ class ChatGPTCanonicalMapper(CanonicalMapper):
13
+ version: str = "v1"
14
+
15
+ def map(self, normalized: NormalizedRecord) -> CanonicalRecord:
16
+ payload = normalized.payload
17
+ content = payload.get("content", "")
18
+ content_hash = hashlib.sha256(content.encode("utf-8")).hexdigest()
19
+ message_id = payload.get("message_id", normalized.record_id)
20
+
21
+ # Preserve _metadata for conversation tree reconstruction
22
+ metadata = {"mapper_version": self.version}
23
+ if "_metadata" in payload:
24
+ # Merge _metadata into metadata (preserves parent_id, node_id, etc.)
25
+ metadata.update(payload["_metadata"])
26
+
27
+ canonical = CanonicalMessage(
28
+ message_id=message_id,
29
+ conversation_id=payload.get("thread_id", ""),
30
+ sender_type=payload.get("sender_type", ""),
31
+ content=content,
32
+ ts=payload.get("ts"),
33
+ source_id="chatgpt",
34
+ content_hash=content_hash,
35
+ metadata=metadata,
36
+ )
37
+ return CanonicalRecord(record_id=canonical.message_id, payload=canonical.__dict__)
38
+
39
+ def mapping_metadata(self, normalized: NormalizedRecord) -> MappingMetadata:
40
+ return MappingMetadata(source_id="chatgpt", mapping_version=self.version)
@@ -0,0 +1,17 @@
1
+ from __future__ import annotations
2
+
3
+ from dataclasses import dataclass
4
+
5
+ from ...ingestion.parsers.base import NormalizedRecord
6
+ from .base import CanonicalMapper, CanonicalRecord, MappingMetadata
7
+
8
+
9
+ @dataclass
10
+ class GrokCanonicalMapper(CanonicalMapper):
11
+ version: str = "v1"
12
+
13
+ def map(self, normalized: NormalizedRecord) -> CanonicalRecord:
14
+ return CanonicalRecord(record_id=normalized.record_id, payload=normalized.payload)
15
+
16
+ def mapping_metadata(self, normalized: NormalizedRecord) -> MappingMetadata:
17
+ return MappingMetadata(source_id="grok", mapping_version=self.version)
@@ -0,0 +1,58 @@
1
+ """Canonical mappers for messenger sources (imessage, signal).
2
+
3
+ Produce the shape expected by conversation_messages. The ingestion pipeline
4
+ routes canonical_group_id=conversations to ConversationsTablesManager and
5
+ builds staging records from normalized payloads directly; these mappers
6
+ are for registry completeness and any code that looks up imessage/signal by mapper_id.
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ from dataclasses import dataclass
12
+
13
+ from ...ingestion.parsers.base import NormalizedRecord
14
+ from .base import CanonicalMapper, CanonicalRecord, MappingMetadata
15
+
16
+
17
+ def _normalized_to_canonical_payload(normalized: NormalizedRecord, source_id: str) -> dict:
18
+ """Convert normalized messenger record to canonical message shape for conversation_messages."""
19
+ p = normalized.payload
20
+ payload = {
21
+ "message_id": p.get("message_id", normalized.record_id),
22
+ "conversation_id": p.get("thread_id") or p.get("conversation_id") or "",
23
+ "sender_type": p.get("sender_type", "human"),
24
+ "sender_id": p.get("sender_id"),
25
+ "reply_to_message_id": p.get("reply_to_message_id"),
26
+ "message_type": p.get("message_type"),
27
+ "event_type": p.get("event_type"),
28
+ "content": p.get("content", ""),
29
+ "ts": p.get("ts", ""),
30
+ "source_id": source_id,
31
+ }
32
+ if "_metadata" in p:
33
+ payload["_metadata"] = p["_metadata"]
34
+ return payload
35
+
36
+
37
+ @dataclass
38
+ class ImessageCanonicalMapper(CanonicalMapper):
39
+ """Maps normalized iMessage record to canonical shape for conversation_messages."""
40
+
41
+ def map(self, normalized: NormalizedRecord) -> CanonicalRecord:
42
+ payload = _normalized_to_canonical_payload(normalized, "imessage")
43
+ return CanonicalRecord(record_id=payload["message_id"], payload=payload)
44
+
45
+ def mapping_metadata(self, normalized: NormalizedRecord) -> MappingMetadata:
46
+ return MappingMetadata(source_id="imessage", mapping_version="v1")
47
+
48
+
49
+ @dataclass
50
+ class SignalCanonicalMapper(CanonicalMapper):
51
+ """Maps normalized Signal record to canonical shape for conversation_messages."""
52
+
53
+ def map(self, normalized: NormalizedRecord) -> CanonicalRecord:
54
+ payload = _normalized_to_canonical_payload(normalized, "signal")
55
+ return CanonicalRecord(record_id=payload["message_id"], payload=payload)
56
+
57
+ def mapping_metadata(self, normalized: NormalizedRecord) -> MappingMetadata:
58
+ return MappingMetadata(source_id="signal", mapping_version="v1")
@@ -0,0 +1,31 @@
1
+ from __future__ import annotations
2
+
3
+ from dataclasses import dataclass
4
+ from typing import Dict, Optional
5
+
6
+
7
+ @dataclass(frozen=True)
8
+ class CanonicalConversation:
9
+ conversation_id: str
10
+ source_id: str
11
+ metadata: Dict[str, str]
12
+
13
+
14
+ @dataclass(frozen=True)
15
+ class CanonicalParticipant:
16
+ participant_id: str
17
+ conversation_id: str
18
+ role: str
19
+ metadata: Dict[str, str]
20
+
21
+
22
+ @dataclass(frozen=True)
23
+ class CanonicalMessage:
24
+ message_id: str
25
+ conversation_id: str
26
+ sender_type: str
27
+ content: str
28
+ ts: Optional[str]
29
+ source_id: str
30
+ content_hash: str
31
+ metadata: Dict[str, str]
@@ -0,0 +1,23 @@
1
+ from __future__ import annotations
2
+
3
+ import hashlib
4
+ from dataclasses import dataclass
5
+ from typing import Dict
6
+
7
+ from .mappers.base import CanonicalRecord
8
+
9
+
10
+ def deterministic_id(namespace: str, value: str) -> str:
11
+ seed = f"{namespace}:{value}".encode("utf-8")
12
+ return hashlib.sha256(seed).hexdigest()
13
+
14
+
15
+ @dataclass
16
+ class CanonicalResolver:
17
+ """Resolves canonical records and handles collisions (stub)."""
18
+
19
+ def resolve_message(self, payload: Dict[str, str]) -> CanonicalRecord:
20
+ content = payload.get("content", "")
21
+ source_id = payload.get("source_id", "unknown")
22
+ message_id = deterministic_id(source_id, content)
23
+ return CanonicalRecord(record_id=message_id, payload=payload)
topos/cli/__init__.py ADDED
@@ -0,0 +1 @@
1
+ """CLI package for Topos."""
topos/cli/__main__.py ADDED
@@ -0,0 +1,6 @@
1
+ from __future__ import annotations
2
+
3
+ from .commands import main
4
+
5
+ if __name__ == "__main__":
6
+ main()
topos/cli/commands.py ADDED
@@ -0,0 +1,132 @@
1
+ """CLI entry points for Topos."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import os
6
+ import sys
7
+ from pathlib import Path
8
+
9
+ import click
10
+ import uvicorn
11
+
12
+ # Add parent directory to path for imports
13
+ sys.path.insert(0, str(Path(__file__).parent.parent.parent))
14
+
15
+ from topos.storage.db.paths import discover_databases
16
+
17
+ USER_ENV_PATH = Path.home() / ".topos" / ".env"
18
+ LEGACY_ENV_PATH = Path(__file__).resolve().parent.parent / ".env"
19
+
20
+
21
+ def _load_env_file(env_path: Path) -> None:
22
+ if not env_path.exists():
23
+ return
24
+ for line in env_path.read_text(encoding="utf-8").splitlines():
25
+ stripped = line.strip()
26
+ if not stripped or stripped.startswith("#") or "=" not in stripped:
27
+ continue
28
+ key, value = stripped.split("=", 1)
29
+ key = key.strip()
30
+ value = value.strip().strip('"').strip("'")
31
+ if key and key not in os.environ:
32
+ os.environ[key] = value
33
+
34
+
35
+ def _save_topos_key(topos_key: str, env_path: Path = USER_ENV_PATH) -> Path:
36
+ env_path.parent.mkdir(parents=True, exist_ok=True)
37
+ lines: list[str] = []
38
+ if env_path.exists():
39
+ lines = env_path.read_text(encoding="utf-8").splitlines()
40
+
41
+ updated = False
42
+ new_lines: list[str] = []
43
+ for line in lines:
44
+ stripped = line.strip()
45
+ if stripped.startswith("TOPOS_KEY="):
46
+ new_lines.append(f"TOPOS_KEY={topos_key}")
47
+ updated = True
48
+ else:
49
+ new_lines.append(line)
50
+
51
+ if not updated:
52
+ if new_lines and new_lines[-1].strip():
53
+ new_lines.append("")
54
+ new_lines.append(f"TOPOS_KEY={topos_key}")
55
+
56
+ env_path.write_text("\n".join(new_lines) + "\n", encoding="utf-8")
57
+ try:
58
+ env_path.chmod(0o600)
59
+ except OSError:
60
+ # Best-effort permissions (e.g. may fail on some filesystems).
61
+ pass
62
+ return env_path
63
+
64
+
65
+ @click.command()
66
+ @click.option(
67
+ "--db-path",
68
+ help="Database file path (SQLite). If not specified, uses auto-discovery.",
69
+ )
70
+ @click.option(
71
+ "--topos-key",
72
+ help="Topos key for this run (overrides saved key).",
73
+ )
74
+ @click.option(
75
+ "--set-topos-key",
76
+ metavar="KEY",
77
+ help="Save TOPOS_KEY to ~/.topos/.env and exit.",
78
+ )
79
+ @click.option(
80
+ "--discover",
81
+ is_flag=True,
82
+ help="Show discovered databases and exit",
83
+ )
84
+ @click.option(
85
+ "--port",
86
+ default=9000,
87
+ help="Server port (default: 9000)",
88
+ )
89
+ @click.option(
90
+ "--host",
91
+ default="0.0.0.0",
92
+ help="Host to bind to (default: 0.0.0.0)",
93
+ )
94
+ def main(db_path, topos_key, set_topos_key, discover, port, host) -> None:
95
+ """Topos Control Plane API entry point."""
96
+ if set_topos_key:
97
+ env_path = _save_topos_key(set_topos_key)
98
+ click.echo(f"Saved TOPOS_KEY to {env_path}")
99
+ click.echo("You can now run: topos-node")
100
+ return
101
+
102
+ if discover:
103
+ databases = discover_databases()
104
+ if databases:
105
+ click.echo("Discovered databases:")
106
+ for db in databases:
107
+ click.echo(f" - {db}")
108
+ else:
109
+ click.echo("No existing databases found")
110
+ return
111
+
112
+ _load_env_file(USER_ENV_PATH)
113
+ _load_env_file(LEGACY_ENV_PATH)
114
+
115
+ if topos_key:
116
+ os.environ["TOPOS_KEY"] = topos_key
117
+ elif not os.getenv("TOPOS_KEY"):
118
+ os.environ["TOPOS_KEY"] = "dev-key"
119
+ click.echo("TOPOS_KEY not set; using local dev key")
120
+
121
+ from topos.app import app
122
+
123
+ if db_path:
124
+ os.environ["TOPOS_DATABASE_PATH"] = db_path
125
+ click.echo(f"Database path: {db_path}")
126
+
127
+ click.echo(f"Starting topos API on {host}:{port}")
128
+ uvicorn.run(app, host=host, port=port)
129
+
130
+
131
+ if __name__ == "__main__":
132
+ main()
@@ -0,0 +1 @@
1
+ """Configuration package for Topos."""
@@ -0,0 +1,189 @@
1
+ """
2
+ Sanitization LLM (Ollama) configuration: file/env defaults + device DB overrides.
3
+
4
+ Device overrides are stored in SQLite `engine_config` under
5
+ `ENGINE_CONFIG_KEY_SANITIZATION_OLLAMA_DEVICE` as JSON (see DeviceSanitizationOllamaOverrides).
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import json
11
+ import logging
12
+ import sqlite3
13
+ from typing import TYPE_CHECKING, Any, Dict, Optional
14
+
15
+ from pydantic import BaseModel, ConfigDict, Field
16
+
17
+ if TYPE_CHECKING:
18
+ from topos.config.settings import Settings
19
+
20
+ logger = logging.getLogger("topos.config.sanitization_ollama")
21
+
22
+ # Transform IDs that use Ollama in topos.sanitization.ollama_transforms
23
+ SANITIZATION_OLLAMA_TRANSFORM_IDS: tuple[str, ...] = (
24
+ "pii_redaction",
25
+ "nsfw_sanitization",
26
+ "raw_to_summary",
27
+ "raw_to_sentiment",
28
+ "third_party_anonymization",
29
+ "name_removal",
30
+ "contact_removal",
31
+ )
32
+
33
+ ENGINE_CONFIG_KEY_SANITIZATION_OLLAMA_DEVICE = "sanitization_ollama_device"
34
+
35
+
36
+ class DeviceSanitizationOllamaOverrides(BaseModel):
37
+ """Partial overrides stored on device (engine_config JSON). Omitted keys keep file/env defaults."""
38
+
39
+ model_config = ConfigDict(extra="ignore")
40
+
41
+ version: int = Field(1, ge=1)
42
+ enabled: Optional[bool] = None
43
+ host: Optional[str] = None
44
+ default_model: Optional[str] = None
45
+ timeout_sec: Optional[float] = None
46
+ max_input_chars: Optional[int] = None
47
+ models: Optional[Dict[str, str]] = None
48
+
49
+
50
+ class SanitizationOllamaEffective(BaseModel):
51
+ """Fully resolved config used at runtime (after merge)."""
52
+
53
+ enabled: bool
54
+ host: str
55
+ default_model: str
56
+ timeout_sec: float
57
+ auto_pull: bool
58
+ max_input_chars: int
59
+ models: Dict[str, str]
60
+
61
+
62
+ def _settings_transform_model_map(settings: Any) -> Dict[str, Optional[str]]:
63
+ """Map transform_id -> optional per-transform model from Settings."""
64
+ return {
65
+ "pii_redaction": getattr(settings, "sanitization_ollama_model_pii_redaction", None),
66
+ "nsfw_sanitization": getattr(settings, "sanitization_ollama_model_nsfw_sanitization", None),
67
+ "raw_to_summary": getattr(settings, "sanitization_ollama_model_raw_to_summary", None),
68
+ "raw_to_sentiment": getattr(settings, "sanitization_ollama_model_raw_to_sentiment", None),
69
+ "third_party_anonymization": getattr(settings, "sanitization_ollama_model_third_party_anonymization", None),
70
+ "name_removal": getattr(settings, "sanitization_ollama_model_name_removal", None),
71
+ "contact_removal": getattr(settings, "sanitization_ollama_model_contact_removal", None),
72
+ }
73
+
74
+
75
+ def _read_engine_config_value(conn: sqlite3.Connection, key: str) -> Optional[str]:
76
+ """Read engine_config without importing topos.core.state (avoids circular imports)."""
77
+ try:
78
+ row = conn.execute("SELECT value FROM engine_config WHERE key = ?", (key,)).fetchone()
79
+ if not row:
80
+ return None
81
+ return str(row[0] if not isinstance(row, sqlite3.Row) else row["value"])
82
+ except Exception as exc: # noqa: BLE001
83
+ logger.debug("engine_config read failed for %s: %s", key, exc)
84
+ return None
85
+
86
+
87
+ def parse_device_overrides_json(raw: Optional[str]) -> DeviceSanitizationOllamaOverrides:
88
+ if not raw or not str(raw).strip():
89
+ return DeviceSanitizationOllamaOverrides()
90
+ try:
91
+ data = json.loads(raw)
92
+ if not isinstance(data, dict):
93
+ return DeviceSanitizationOllamaOverrides()
94
+ return DeviceSanitizationOllamaOverrides.model_validate(data)
95
+ except Exception as exc: # noqa: BLE001
96
+ logger.warning("Invalid sanitization_ollama device JSON, ignoring: %s", exc)
97
+ return DeviceSanitizationOllamaOverrides()
98
+
99
+
100
+ def resolve_sanitization_ollama_effective(
101
+ settings: Settings,
102
+ conn: Optional[sqlite3.Connection],
103
+ ) -> SanitizationOllamaEffective:
104
+ device = DeviceSanitizationOllamaOverrides()
105
+ if conn is not None:
106
+ raw = _read_engine_config_value(conn, ENGINE_CONFIG_KEY_SANITIZATION_OLLAMA_DEVICE)
107
+ device = parse_device_overrides_json(raw)
108
+
109
+ enabled = settings.sanitization_ollama_enabled
110
+ if device.enabled is not None:
111
+ enabled = device.enabled
112
+
113
+ host = (device.host or settings.sanitization_ollama_host or settings.engine_ollama_base_url or "").strip()
114
+ if not host:
115
+ host = "http://127.0.0.1:11434"
116
+
117
+ default_model = (device.default_model or settings.sanitization_ollama_default_model or "llama3.2").strip()
118
+
119
+ timeout_sec = float(device.timeout_sec if device.timeout_sec is not None else settings.sanitization_ollama_timeout_sec)
120
+ auto_pull = bool(getattr(settings, "sanitization_ollama_auto_pull", True))
121
+ max_input_chars = int(
122
+ device.max_input_chars if device.max_input_chars is not None else settings.sanitization_ollama_max_input_chars
123
+ )
124
+
125
+ st_map = _settings_transform_model_map(settings)
126
+ models: Dict[str, str] = {}
127
+ for tid in SANITIZATION_OLLAMA_TRANSFORM_IDS:
128
+ m: Optional[str] = None
129
+ if device.models and tid in device.models and device.models[tid]:
130
+ m = str(device.models[tid]).strip()
131
+ if not m:
132
+ sm = st_map.get(tid)
133
+ if sm and str(sm).strip():
134
+ m = str(sm).strip()
135
+ if not m:
136
+ m = default_model
137
+ models[tid] = m
138
+
139
+ return SanitizationOllamaEffective(
140
+ enabled=enabled,
141
+ host=host,
142
+ default_model=default_model,
143
+ timeout_sec=timeout_sec,
144
+ auto_pull=auto_pull,
145
+ max_input_chars=max_input_chars,
146
+ models=models,
147
+ )
148
+
149
+
150
+ def effective_config_for_api(settings: Settings, conn: Optional[sqlite3.Connection]) -> dict[str, Any]:
151
+ """Payload for GET /v1/sanitization-ollama-config (no secrets)."""
152
+ eff = resolve_sanitization_ollama_effective(settings, conn)
153
+ device_raw = None
154
+ device_obj: Optional[DeviceSanitizationOllamaOverrides] = None
155
+ if conn is not None:
156
+ device_raw = _read_engine_config_value(conn, ENGINE_CONFIG_KEY_SANITIZATION_OLLAMA_DEVICE)
157
+ device_obj = parse_device_overrides_json(device_raw)
158
+
159
+ defaults = {
160
+ "enabled": settings.sanitization_ollama_enabled,
161
+ "host": settings.sanitization_ollama_host or settings.engine_ollama_base_url,
162
+ "default_model": settings.sanitization_ollama_default_model,
163
+ "timeout_sec": settings.sanitization_ollama_timeout_sec,
164
+ "auto_pull": settings.sanitization_ollama_auto_pull,
165
+ "max_input_chars": settings.sanitization_ollama_max_input_chars,
166
+ "models": {k: v for k, v in _settings_transform_model_map(settings).items() if v},
167
+ }
168
+ return {
169
+ "transform_ids": list(SANITIZATION_OLLAMA_TRANSFORM_IDS),
170
+ "defaults_from_settings": defaults,
171
+ "device_overrides": device_obj.model_dump(exclude_none=True) if device_obj else {},
172
+ "effective": eff.model_dump(),
173
+ }
174
+
175
+
176
+ def normalize_put_device_overrides(body: dict[str, Any]) -> str:
177
+ """Validate and return JSON string for engine_config."""
178
+ raw = body.get("device_overrides")
179
+ if raw is None:
180
+ raise ValueError("device_overrides is required")
181
+ if not isinstance(raw, dict):
182
+ raise ValueError("device_overrides must be an object")
183
+ parsed = DeviceSanitizationOllamaOverrides.model_validate(raw)
184
+ for tid, name in (parsed.models or {}).items():
185
+ if tid not in SANITIZATION_OLLAMA_TRANSFORM_IDS:
186
+ raise ValueError(f"Unknown transform_id in models: {tid!r}")
187
+ if not str(name).strip():
188
+ raise ValueError(f"Empty model for transform_id {tid!r}")
189
+ return json.dumps(parsed.model_dump(exclude_none=True))