topos-node 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (249) hide show
  1. shared/__init__.py +59 -0
  2. shared/filtering.py +640 -0
  3. shared/schema_registry.py +229 -0
  4. topos/__init__.py +5 -0
  5. topos/__version__.py +6 -0
  6. topos/analytics/__init__.py +15 -0
  7. topos/analytics/duckdb_adapter.py +48 -0
  8. topos/analytics/messenger_communities.py +349 -0
  9. topos/analytics/messenger_graph.py +522 -0
  10. topos/analytics/messenger_labels.py +321 -0
  11. topos/analytics/profiles.py +22 -0
  12. topos/analytics/query_engine.py +64 -0
  13. topos/analytics/raw_queries.py +174 -0
  14. topos/api/__init__.py +1 -0
  15. topos/api/analytics.py +52 -0
  16. topos/api/app_registry.py +31 -0
  17. topos/api/backup.py +15 -0
  18. topos/api/compute_remote.py +175 -0
  19. topos/api/data_commit.py +158 -0
  20. topos/api/data_explorer_table_prefs.py +81 -0
  21. topos/api/db.py +10 -0
  22. topos/api/device.py +25 -0
  23. topos/api/enrichment.py +959 -0
  24. topos/api/filter_lab.py +195 -0
  25. topos/api/health.py +61 -0
  26. topos/api/ingestion_api.py +37 -0
  27. topos/api/ingestion_compat.py +21 -0
  28. topos/api/ingestion_sources.py +600 -0
  29. topos/api/llm.py +76 -0
  30. topos/api/local_mcp.py +46 -0
  31. topos/api/messenger_analytics.py +385 -0
  32. topos/api/query_api.py +13 -0
  33. topos/api/sanitization_ollama_config.py +64 -0
  34. topos/api/source_install.py +324 -0
  35. topos/api/sources.py +13 -0
  36. topos/api/sync.py +10 -0
  37. topos/api/ui_config.py +83 -0
  38. topos/api/uma_data.py +311 -0
  39. topos/api/usage.py +49 -0
  40. topos/api/user_identity.py +46 -0
  41. topos/app.py +239 -0
  42. topos/auth.py +17 -0
  43. topos/canonicalization/__init__.py +1 -0
  44. topos/canonicalization/mappers/__init__.py +22 -0
  45. topos/canonicalization/mappers/base.py +26 -0
  46. topos/canonicalization/mappers/chatgpt_mapper.py +40 -0
  47. topos/canonicalization/mappers/grok_mapper.py +17 -0
  48. topos/canonicalization/mappers/messenger_mapper.py +58 -0
  49. topos/canonicalization/models.py +31 -0
  50. topos/canonicalization/resolver.py +23 -0
  51. topos/cli/__init__.py +1 -0
  52. topos/cli/__main__.py +6 -0
  53. topos/cli/commands.py +132 -0
  54. topos/config/__init__.py +1 -0
  55. topos/config/sanitization_ollama.py +189 -0
  56. topos/config/settings.py +310 -0
  57. topos/contacts/__init__.py +5 -0
  58. topos/contacts/identity.py +24 -0
  59. topos/control_plane_client.py +300 -0
  60. topos/core/__init__.py +1 -0
  61. topos/core/api_models.py +128 -0
  62. topos/core/connection_resilience.py +99 -0
  63. topos/core/device_helpers.py +8 -0
  64. topos/core/errors.py +13 -0
  65. topos/core/events.py +12 -0
  66. topos/core/handlers.py +5625 -0
  67. topos/core/logging.py +175 -0
  68. topos/core/metrics.py +21 -0
  69. topos/core/startup_banner.py +62 -0
  70. topos/core/state.py +682 -0
  71. topos/core/table_layers.py +45 -0
  72. topos/core/types.py +13 -0
  73. topos/data_explorer_table_prefs.py +150 -0
  74. topos/engine/__init__.py +29 -0
  75. topos/engine/backends/__init__.py +50 -0
  76. topos/engine/backends/base.py +21 -0
  77. topos/engine/backends/huggingface.py +151 -0
  78. topos/engine/backends/ollama.py +181 -0
  79. topos/engine/backends/stub.py +22 -0
  80. topos/engine/engine.py +165 -0
  81. topos/engine/intake.py +32 -0
  82. topos/engine/queue_manager.py +112 -0
  83. topos/engine/registration.py +126 -0
  84. topos/engine/result_formatter.py +38 -0
  85. topos/engine/router.py +19 -0
  86. topos/engine/scoped_token.py +82 -0
  87. topos/engine/tasks.py +154 -0
  88. topos/engine/transport.py +44 -0
  89. topos/engine/usage_guard.py +100 -0
  90. topos/engine/usage_observation.py +129 -0
  91. topos/engine/validator.py +23 -0
  92. topos/enrichment/__init__.py +1 -0
  93. topos/enrichment/derived_tables.py +214 -0
  94. topos/enrichment/jobs/__init__.py +30 -0
  95. topos/enrichment/jobs/base.py +54 -0
  96. topos/enrichment/jobs/canonical/__init__.py +1 -0
  97. topos/enrichment/jobs/canonical/embeddings_job.py +27 -0
  98. topos/enrichment/jobs/canonical/emo_27_job.py +97 -0
  99. topos/enrichment/jobs/canonical/entities_job.py +27 -0
  100. topos/enrichment/jobs/canonical/sentiment_job.py +27 -0
  101. topos/enrichment/jobs/canonical/topics_job.py +27 -0
  102. topos/enrichment/jobs/raw/__init__.py +1 -0
  103. topos/enrichment/jobs/raw/attachments_job.py +12 -0
  104. topos/enrichment/jobs/raw/language_job.py +12 -0
  105. topos/enrichment/jobs/raw/time_normalization_job.py +12 -0
  106. topos/enrichment/jobs/raw/tool_calls_job.py +12 -0
  107. topos/enrichment/models/__init__.py +1 -0
  108. topos/enrichment/models/manager.py +8 -0
  109. topos/enrichment/models/registry.py +71 -0
  110. topos/enrichment/models/versioning.py +8 -0
  111. topos/enrichment/orchestrator.py +177 -0
  112. topos/enrichment/processor.py +17 -0
  113. topos/enrichment/progress_bar.py +122 -0
  114. topos/enrichment/website_classifier.py +31 -0
  115. topos/filter_lab/__init__.py +1 -0
  116. topos/filter_lab/bundles.py +300 -0
  117. topos/filter_lab/schema.py +86 -0
  118. topos/filter_lab/service.py +167 -0
  119. topos/filter_lab/store.py +374 -0
  120. topos/filter_lab/worker.py +250 -0
  121. topos/hosted_pool_lease.py +153 -0
  122. topos/ingestion/__init__.py +1 -0
  123. topos/ingestion/checkpoints/__init__.py +6 -0
  124. topos/ingestion/checkpoints/checkpoint_store.py +24 -0
  125. topos/ingestion/checkpoints/sqlite_checkpoint_store.py +82 -0
  126. topos/ingestion/ingest_helpers.py +504 -0
  127. topos/ingestion/jobs.py +91 -0
  128. topos/ingestion/local_sync.py +823 -0
  129. topos/ingestion/log_preview.py +21 -0
  130. topos/ingestion/manager.py +1100 -0
  131. topos/ingestion/parser.py +174 -0
  132. topos/ingestion/parsers/__init__.py +32 -0
  133. topos/ingestion/parsers/base.py +24 -0
  134. topos/ingestion/parsers/browser_parser.py +171 -0
  135. topos/ingestion/parsers/calendar_parser.py +21 -0
  136. topos/ingestion/parsers/chatgpt_conversation_flattener.py +266 -0
  137. topos/ingestion/parsers/chatgpt_parser.py +67 -0
  138. topos/ingestion/parsers/grok_parser.py +21 -0
  139. topos/ingestion/parsers/messenger_parser.py +97 -0
  140. topos/ingestion/progress.py +54 -0
  141. topos/ingestion/sources/__init__.py +20 -0
  142. topos/ingestion/sources/base.py +39 -0
  143. topos/ingestion/sources/calendar.py +29 -0
  144. topos/ingestion/sources/chatgpt.py +29 -0
  145. topos/ingestion/sources/contact_importers.py +274 -0
  146. topos/ingestion/sources/grok.py +29 -0
  147. topos/ingestion/sources/imessage_reader.py +479 -0
  148. topos/ingestion/sources/signal_export_parser.py +132 -0
  149. topos/ingestion/sources/signal_reader.py +491 -0
  150. topos/ingestion/state_machine.py +70 -0
  151. topos/ingestion/triggers/__init__.py +1 -0
  152. topos/ingestion/triggers/file_trigger.py +36 -0
  153. topos/ingestion/triggers/sqlite_trigger.py +18 -0
  154. topos/ingestion/validation/__init__.py +1 -0
  155. topos/ingestion/validation/base.py +27 -0
  156. topos/ingestion/validation/schema_registry.py +111 -0
  157. topos/ingestion/validation/schema_validator.py +13 -0
  158. topos/lineage/__init__.py +1 -0
  159. topos/lineage/provenance.py +9 -0
  160. topos/lineage/tracker.py +9 -0
  161. topos/mcp_stdio_proxy.py +83 -0
  162. topos/observability/__init__.py +1 -0
  163. topos/observability/alerts.py +7 -0
  164. topos/observability/metrics.py +25 -0
  165. topos/observability/tracing.py +18 -0
  166. topos/openai_client.py +69 -0
  167. topos/projections/__init__.py +1 -0
  168. topos/projections/vector_index/__init__.py +1 -0
  169. topos/projections/vector_index/base.py +21 -0
  170. topos/projections/vector_index/builders.py +11 -0
  171. topos/projections/vector_index/health_checks.py +5 -0
  172. topos/rate_limit.py +43 -0
  173. topos/sanitization/__init__.py +16 -0
  174. topos/sanitization/ollama_transforms.py +276 -0
  175. topos/scope_resolution.py +89 -0
  176. topos/services/__init__.py +1 -0
  177. topos/services/container.py +46 -0
  178. topos/services/embeddings/__init__.py +1 -0
  179. topos/services/embeddings/base.py +7 -0
  180. topos/services/embeddings/local.py +9 -0
  181. topos/services/embeddings/remote.py +9 -0
  182. topos/services/interfaces.py +40 -0
  183. topos/services/llm/__init__.py +1 -0
  184. topos/services/llm/base.py +7 -0
  185. topos/services/llm/openai.py +126 -0
  186. topos/services/local.py +123 -0
  187. topos/services/postgres.py +385 -0
  188. topos/sources/__init__.py +6 -0
  189. topos/sources/definitions.py +114 -0
  190. topos/sources/install_service.py +836 -0
  191. topos/sources/registry.py +263 -0
  192. topos/sources/runtime_install.py +427 -0
  193. topos/storage/__init__.py +1 -0
  194. topos/storage/canonical/__init__.py +18 -0
  195. topos/storage/canonical/ai_chat/__init__.py +22 -0
  196. topos/storage/canonical/ai_chat/canonicalizer.py +147 -0
  197. topos/storage/canonical/ai_chat/mapper.py +168 -0
  198. topos/storage/canonical/ai_chat/model.py +87 -0
  199. topos/storage/canonical/ai_chat/tables.py +179 -0
  200. topos/storage/canonical/canonical_store.py +24 -0
  201. topos/storage/canonical/conversations_tables.py +1020 -0
  202. topos/storage/canonical/mapping_store.py +30 -0
  203. topos/storage/canonical/postgres.py +10 -0
  204. topos/storage/db/__init__.py +1 -0
  205. topos/storage/db/client.py +8 -0
  206. topos/storage/db/migrations/__init__.py +1 -0
  207. topos/storage/db/migrations/stage9_column_renames.py +78 -0
  208. topos/storage/db/paths.py +122 -0
  209. topos/storage/db/postgres.py +240 -0
  210. topos/storage/db/schema.py +6 -0
  211. topos/storage/enrichment/__init__.py +1 -0
  212. topos/storage/enrichment/canonical_enrichment_store.py +7 -0
  213. topos/storage/enrichment/raw_enrichment_store.py +18 -0
  214. topos/storage/normalized/__init__.py +1 -0
  215. topos/storage/normalized/normalized_store.py +24 -0
  216. topos/storage/oplog/__init__.py +1 -0
  217. topos/storage/oplog/decision.py +6 -0
  218. topos/storage/oplog/oplog_store.py +17 -0
  219. topos/storage/oplog/postgres.py +10 -0
  220. topos/storage/projections/__init__.py +1 -0
  221. topos/storage/projections/index_ops_store.py +6 -0
  222. topos/storage/projections/vector_index_store.py +6 -0
  223. topos/storage/raw/__init__.py +1 -0
  224. topos/storage/raw/browser_flat_tables.py +303 -0
  225. topos/storage/raw/file_store.py +100 -0
  226. topos/storage/raw/raw_store.py +29 -0
  227. topos/storage/raw/raw_tables_manager.py +295 -0
  228. topos/storage/raw/sqlite_raw_store.py +17 -0
  229. topos/storage/security/encryption.py +21 -0
  230. topos/storage/signal_identity.py +71 -0
  231. topos/storage/source_settings.py +116 -0
  232. topos/storage/user_identity.py +69 -0
  233. topos/sync/__init__.py +5 -0
  234. topos/sync/client.py +272 -0
  235. topos/sync_handlers.py +70 -0
  236. topos/testing/__init__.py +1 -0
  237. topos/testing/lifespan.py +7 -0
  238. topos/uma_contact_enrichment.py +1032 -0
  239. topos/uma_filters.py +669 -0
  240. topos/uma_resource_id.py +24 -0
  241. topos/uma_rpt.py +69 -0
  242. topos/utils/base_object.py +61 -0
  243. topos/websocket_client.py +21 -0
  244. topos_node-0.1.0.dist-info/METADATA +199 -0
  245. topos_node-0.1.0.dist-info/RECORD +249 -0
  246. topos_node-0.1.0.dist-info/WHEEL +5 -0
  247. topos_node-0.1.0.dist-info/entry_points.txt +2 -0
  248. topos_node-0.1.0.dist-info/licenses/LICENSE +201 -0
  249. topos_node-0.1.0.dist-info/top_level.txt +2 -0
@@ -0,0 +1,30 @@
1
+ from __future__ import annotations
2
+
3
+ from dataclasses import dataclass
4
+ from typing import Dict, Optional
5
+
6
+
7
+ @dataclass(frozen=True)
8
+ class MappingRecord:
9
+ source_id: str
10
+ source_record_id: str
11
+ canonical_id: str
12
+
13
+
14
+ class MappingStore:
15
+ def get_mapping(self, source_id: str, source_record_id: str) -> Optional[MappingRecord]:
16
+ raise NotImplementedError
17
+
18
+ def save_mapping(self, record: MappingRecord) -> None:
19
+ raise NotImplementedError
20
+
21
+
22
+ class InMemoryMappingStore(MappingStore):
23
+ def __init__(self):
24
+ self._records: Dict[str, MappingRecord] = {}
25
+
26
+ def get_mapping(self, source_id: str, source_record_id: str) -> Optional[MappingRecord]:
27
+ return self._records.get(f"{source_id}:{source_record_id}")
28
+
29
+ def save_mapping(self, record: MappingRecord) -> None:
30
+ self._records[f"{record.source_id}:{record.source_record_id}"] = record
@@ -0,0 +1,10 @@
1
+ from __future__ import annotations
2
+
3
+
4
+ class PostgresCanonicalStore:
5
+ def __init__(self, conn):
6
+ self.conn = conn
7
+
8
+ def upsert(self, record):
9
+ _ = record
10
+ raise NotImplementedError("PostgresCanonicalStore not implemented yet")
@@ -0,0 +1 @@
1
+ """Database helpers for Topos storage."""
@@ -0,0 +1,8 @@
1
+ from __future__ import annotations
2
+
3
+
4
+ class DBClient:
5
+ """Database client placeholder."""
6
+
7
+ def __init__(self, conn):
8
+ self.conn = conn
@@ -0,0 +1 @@
1
+ """Migration package placeholder."""
@@ -0,0 +1,78 @@
1
+ """
2
+ Stage 9 column renames: apply standardized names to existing DBs.
3
+ SQLite 3.25.1+ RENAME COLUMN. Idempotent: skips if new name already exists or old missing.
4
+ Source: docs/SCHEMA_CONVENTIONS.md §7.0.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ import logging
10
+ import sqlite3
11
+ from typing import Any, List, Optional, Tuple
12
+
13
+ logger = logging.getLogger("topos.storage.db.migrations.stage9")
14
+
15
+ # (table, old_column, new_column) in dependency order
16
+ RENAME_TARGETS: List[Tuple[str, str, str]] = [
17
+ ("ai_chat_conversations", "source", "source_id"),
18
+ ("conversation_messages", "ts", "event_at"),
19
+ ("conversation_messages", "from_self", "is_from_self"),
20
+ ("ai_chat_messages", "ts", "event_at"),
21
+ ("ai_chat_messages", "seq", "sequence"),
22
+ ("message_emotions", "model", "model_name"),
23
+ ("message_emotions", "all_emotions", "all_emotions_json"),
24
+ ("browser_url_classification", "source_table", "enriched_from_table"),
25
+ ]
26
+
27
+
28
+ def _column_exists(conn: sqlite3.Connection, table: str, column: str) -> bool:
29
+ cursor = conn.execute(f"PRAGMA table_info({table})")
30
+ rows = cursor.fetchall()
31
+ # PRAGMA table_info returns (cid, name, type, notnull, dflt_value, pk)
32
+ names = [row[1] for row in rows]
33
+ return column in names
34
+
35
+
36
+ def _table_exists(conn: sqlite3.Connection, table: str) -> bool:
37
+ cursor = conn.execute(
38
+ "SELECT name FROM sqlite_master WHERE type='table' AND name=?",
39
+ (table,),
40
+ )
41
+ return cursor.fetchone() is not None
42
+
43
+
44
+ def apply_stage9_renames(conn: sqlite3.Connection) -> dict[str, Any]:
45
+ """
46
+ Apply Stage 9 column renames. Idempotent.
47
+ Returns dict with applied=[(table, old, new), ...], skipped=[...], errors=[...].
48
+ """
49
+ applied: List[Tuple[str, str, str]] = []
50
+ skipped: List[Tuple[str, str, str]] = []
51
+ errors: List[str] = []
52
+
53
+ for table, old_col, new_col in RENAME_TARGETS:
54
+ if not _table_exists(conn, table):
55
+ skipped.append((table, old_col, new_col))
56
+ continue
57
+ if not _column_exists(conn, table, old_col):
58
+ skipped.append((table, old_col, new_col))
59
+ continue
60
+ if _column_exists(conn, table, new_col):
61
+ skipped.append((table, old_col, new_col))
62
+ continue
63
+ try:
64
+ conn.execute(f'ALTER TABLE "{table}" RENAME COLUMN "{old_col}" TO "{new_col}"')
65
+ conn.commit()
66
+ applied.append((table, old_col, new_col))
67
+ logger.info("Stage9 migration: %s.%s -> %s", table, old_col, new_col)
68
+ except Exception as e:
69
+ conn.rollback()
70
+ errors.append(f"{table}.{old_col}->{new_col}: {e}")
71
+ logger.warning("Stage9 migration failed: %s.%s -> %s: %s", table, old_col, new_col, e)
72
+
73
+ return {"applied": applied, "skipped": skipped, "errors": errors}
74
+
75
+
76
+ def run_stage9_migrations(conn: sqlite3.Connection) -> dict[str, Any]:
77
+ """Run Stage 9 migrations (currently only column renames)."""
78
+ return apply_stage9_renames(conn)
@@ -0,0 +1,122 @@
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ import logging
5
+ import os
6
+ import platform
7
+ import shutil
8
+ from pathlib import Path
9
+ from typing import Optional
10
+
11
+ logger = logging.getLogger("topos.storage.db.paths")
12
+
13
+
14
+ def get_database_path(base_path: Optional[str]) -> Path:
15
+ if base_path:
16
+ return Path(base_path)
17
+ return Path.home() / ".topos" / "database.db"
18
+
19
+
20
+ def get_config_file() -> Path:
21
+ return get_data_directory() / "config.json"
22
+
23
+
24
+ def load_config() -> dict:
25
+ config_file = get_config_file()
26
+ if config_file.exists():
27
+ try:
28
+ with open(config_file, "r", encoding="utf-8") as f:
29
+ return json.load(f)
30
+ except Exception as exc:
31
+ logger.warning("Failed to load config from %s: %s", config_file, exc)
32
+ return {}
33
+
34
+
35
+ def save_config(config: dict) -> None:
36
+ config_file = get_config_file()
37
+ config_file.parent.mkdir(parents=True, exist_ok=True)
38
+ try:
39
+ with open(config_file, "w", encoding="utf-8") as f:
40
+ json.dump(config, f, indent=2)
41
+ except Exception as exc:
42
+ logger.error("Failed to save config to %s: %s", config_file, exc)
43
+ raise
44
+
45
+
46
+ def discover_databases() -> list[Path]:
47
+ databases: list[Path] = []
48
+ config = load_config()
49
+ if "database_path" in config:
50
+ db_path = Path(config["database_path"])
51
+ if db_path.exists() and db_path.is_file():
52
+ databases.append(db_path)
53
+
54
+ data_dir = get_data_directory()
55
+ default_db = data_dir / "database.db"
56
+ if default_db.exists() and default_db not in databases:
57
+ databases.append(default_db)
58
+
59
+ legacy_db = Path.home() / ".topos_engine" / "database.db"
60
+ if legacy_db.exists() and legacy_db not in databases:
61
+ databases.append(legacy_db)
62
+
63
+ env_path = os.getenv("TOPOS_DATABASE_PATH")
64
+ if env_path:
65
+ env_db = Path(env_path)
66
+ if env_db.exists() and env_db.is_file() and env_db not in databases:
67
+ databases.append(env_db)
68
+
69
+ return databases
70
+
71
+
72
+ def get_data_directory() -> Path:
73
+ system = platform.system()
74
+ if system == "Windows":
75
+ base = Path(os.getenv("APPDATA", Path.home() / "AppData" / "Roaming"))
76
+ return base / "ToposControlPlane"
77
+ if system == "Darwin":
78
+ return Path.home() / "Library" / "Application Support" / "ToposControlPlane"
79
+ xdg_data = os.getenv("XDG_DATA_HOME")
80
+ if xdg_data:
81
+ return Path(xdg_data) / "topos-control-plane"
82
+ return Path.home() / ".local" / "share" / "topos-control-plane"
83
+
84
+
85
+ def migrate_legacy_database() -> Optional[Path]:
86
+ legacy_path = Path.home() / ".topos_engine" / "database.db"
87
+ if not legacy_path.exists():
88
+ return None
89
+
90
+ new_path = get_data_directory() / "database.db"
91
+ if new_path.exists():
92
+ logger.warning("Both legacy (%s) and new (%s) database exist. Using new location.", legacy_path, new_path)
93
+ return new_path
94
+
95
+ try:
96
+ new_path.parent.mkdir(parents=True, exist_ok=True)
97
+ shutil.copy2(legacy_path, new_path)
98
+ config = load_config()
99
+ config["database_path"] = str(new_path)
100
+ save_config(config)
101
+ logger.info("Migrated database from %s to %s", legacy_path, new_path)
102
+ return new_path
103
+ except Exception as exc:
104
+ logger.error("Failed to migrate database: %s", exc)
105
+ return None
106
+
107
+
108
+ def validate_database(db_path: Path) -> bool:
109
+ if not db_path.exists() or not db_path.is_file():
110
+ return False
111
+ try:
112
+ import sqlite3
113
+
114
+ conn = sqlite3.connect(str(db_path))
115
+ cursor = conn.execute("SELECT name FROM sqlite_master WHERE type='table'")
116
+ existing_tables = {row[0] for row in cursor}
117
+ conn.close()
118
+ required_tables = {"oplog", "messages", "engine_config"}
119
+ return required_tables.issubset(existing_tables)
120
+ except Exception as exc:
121
+ logger.warning("Failed to validate database %s: %s", db_path, exc)
122
+ return False
@@ -0,0 +1,240 @@
1
+ from __future__ import annotations
2
+
3
+ from contextlib import contextmanager
4
+ import logging
5
+ from typing import Any, Iterable, Sequence, Tuple
6
+
7
+ from ...config.settings import settings
8
+
9
+ logger = logging.getLogger("topos.storage.db.postgres")
10
+
11
+
12
+ class PostgresConfigurationError(RuntimeError):
13
+ pass
14
+
15
+
16
+ def _build_postgres_dsn() -> str:
17
+ if settings.topos_postgres_dsn:
18
+ return settings.topos_postgres_dsn
19
+ if settings.topos_database_service_url:
20
+ return settings.topos_database_service_url
21
+
22
+ required = {
23
+ "TOPOS_POSTGRES_HOST": settings.topos_postgres_host,
24
+ "TOPOS_POSTGRES_PORT": settings.topos_postgres_port,
25
+ "TOPOS_POSTGRES_DB": settings.topos_postgres_db,
26
+ "TOPOS_POSTGRES_USER": settings.topos_postgres_user,
27
+ "TOPOS_POSTGRES_PASSWORD": settings.topos_postgres_password,
28
+ }
29
+ missing = [key for key, value in required.items() if value in (None, "")]
30
+ if missing:
31
+ raise PostgresConfigurationError(
32
+ "Missing Postgres settings: " + ", ".join(sorted(missing))
33
+ )
34
+ return (
35
+ f"postgresql://{settings.topos_postgres_user}:{settings.topos_postgres_password}"
36
+ f"@{settings.topos_postgres_host}:{int(settings.topos_postgres_port)}/{settings.topos_postgres_db}"
37
+ )
38
+
39
+
40
+ def _is_sqlite_connection(conn: Any) -> bool:
41
+ module_name = conn.__class__.__module__.lower()
42
+ return "sqlite" in module_name
43
+
44
+
45
+ def _normalize_query_for_connection(conn: Any, query: str) -> str:
46
+ if _is_sqlite_connection(conn):
47
+ return query.replace("%s", "?")
48
+ return query
49
+
50
+
51
+ def _table_exists(conn: Any, table_name: str) -> bool:
52
+ if _is_sqlite_connection(conn):
53
+ row = fetch_one(
54
+ conn,
55
+ "SELECT name FROM sqlite_master WHERE type='table' AND name = ?",
56
+ (table_name,),
57
+ )
58
+ return bool(row)
59
+ row = fetch_one(
60
+ conn,
61
+ """
62
+ SELECT 1
63
+ FROM information_schema.tables
64
+ WHERE table_schema = 'public' AND table_name = %s
65
+ LIMIT 1
66
+ """,
67
+ (table_name,),
68
+ )
69
+ return bool(row)
70
+
71
+
72
+ def _table_has_column(conn: Any, table_name: str, column_name: str) -> bool:
73
+ if not _table_exists(conn, table_name):
74
+ return False
75
+ if _is_sqlite_connection(conn):
76
+ rows = fetch_all(conn, f'PRAGMA table_info("{table_name}")')
77
+ for row in rows:
78
+ col = row["name"] if isinstance(row, dict) else row[1]
79
+ if str(col) == column_name:
80
+ return True
81
+ return False
82
+ row = fetch_one(
83
+ conn,
84
+ """
85
+ SELECT 1
86
+ FROM information_schema.columns
87
+ WHERE table_schema = 'public' AND table_name = %s AND column_name = %s
88
+ LIMIT 1
89
+ """,
90
+ (table_name, column_name),
91
+ )
92
+ return bool(row)
93
+
94
+
95
+ def _drop_table_if_exists(conn: Any, table_name: str) -> None:
96
+ execute_query(conn, f'DROP TABLE IF EXISTS "{table_name}" CASCADE')
97
+
98
+
99
+ def _ensure_postgres_tenant_schema_compat(conn: Any) -> None:
100
+ """
101
+ Detect legacy hosted schemas that predate tenant-aware columns.
102
+
103
+ In dev/test we allow a destructive reset because the user has explicitly
104
+ accepted data loss. For safety, reset is opt-in via
105
+ TOPOS_POSTGRES_RESET_INCOMPATIBLE_SCHEMA.
106
+ """
107
+ if _is_sqlite_connection(conn):
108
+ return
109
+
110
+ requires_reset = any(
111
+ _table_exists(conn, table) and not _table_has_column(conn, table, "tenant_id")
112
+ for table in ("messages", "oplog", "engine_config")
113
+ )
114
+ if not requires_reset:
115
+ return
116
+
117
+ if not settings.topos_postgres_reset_incompatible_schema:
118
+ raise PostgresConfigurationError(
119
+ "Detected legacy Postgres schema without tenant_id on messages/oplog/engine_config. "
120
+ "Set TOPOS_POSTGRES_RESET_INCOMPATIBLE_SCHEMA=true to reset these dev tables, or run a manual migration."
121
+ )
122
+
123
+ logger.warning(
124
+ "Resetting legacy Postgres tables (messages, oplog, engine_config) to tenant-aware schema."
125
+ )
126
+ _drop_table_if_exists(conn, "messages")
127
+ _drop_table_if_exists(conn, "oplog")
128
+ _drop_table_if_exists(conn, "engine_config")
129
+
130
+
131
+ def execute_query(conn: Any, query: str, params: Sequence[Any] = ()) -> None:
132
+ normalized = _normalize_query_for_connection(conn, query)
133
+ with _cursor(conn) as cursor:
134
+ cursor.execute(normalized, tuple(params))
135
+
136
+
137
+ def fetch_all(conn: Any, query: str, params: Sequence[Any] = ()) -> list[Tuple[Any, ...]]:
138
+ normalized = _normalize_query_for_connection(conn, query)
139
+ with _cursor(conn) as cursor:
140
+ cursor.execute(normalized, tuple(params))
141
+ rows = cursor.fetchall()
142
+ return list(rows or [])
143
+
144
+
145
+ def fetch_one(conn: Any, query: str, params: Sequence[Any] = ()) -> Tuple[Any, ...] | None:
146
+ normalized = _normalize_query_for_connection(conn, query)
147
+ with _cursor(conn) as cursor:
148
+ cursor.execute(normalized, tuple(params))
149
+ row = cursor.fetchone()
150
+ return row
151
+
152
+
153
+ @contextmanager
154
+ def _cursor(conn: Any):
155
+ cursor = conn.cursor()
156
+ try:
157
+ yield cursor
158
+ finally:
159
+ try:
160
+ cursor.close()
161
+ except Exception:
162
+ pass
163
+
164
+
165
+ def ensure_postgres_schema(conn: Any) -> None:
166
+ _ensure_postgres_tenant_schema_compat(conn)
167
+ statements: Iterable[str] = (
168
+ """
169
+ CREATE TABLE IF NOT EXISTS messages (
170
+ tenant_id TEXT NOT NULL,
171
+ dataset_id TEXT NOT NULL,
172
+ message_id TEXT PRIMARY KEY,
173
+ sender_type TEXT NOT NULL,
174
+ content TEXT NOT NULL,
175
+ ts TEXT NOT NULL,
176
+ user_id TEXT
177
+ )
178
+ """,
179
+ "CREATE INDEX IF NOT EXISTS idx_messages_tenant_dataset_ts ON messages (tenant_id, dataset_id, ts DESC)",
180
+ """
181
+ CREATE TABLE IF NOT EXISTS oplog (
182
+ tenant_id TEXT NOT NULL,
183
+ dataset_id TEXT NOT NULL,
184
+ op_id TEXT PRIMARY KEY,
185
+ op_type TEXT NOT NULL,
186
+ payload_json TEXT NOT NULL,
187
+ hlc_ts TEXT NOT NULL
188
+ )
189
+ """,
190
+ "CREATE INDEX IF NOT EXISTS idx_oplog_tenant_dataset_ts ON oplog (tenant_id, dataset_id, hlc_ts DESC)",
191
+ """
192
+ CREATE TABLE IF NOT EXISTS engine_config (
193
+ tenant_id TEXT NOT NULL,
194
+ key TEXT NOT NULL,
195
+ value TEXT NOT NULL,
196
+ PRIMARY KEY (tenant_id, key)
197
+ )
198
+ """,
199
+ )
200
+ for statement in statements:
201
+ execute_query(conn, statement)
202
+
203
+
204
+ @contextmanager
205
+ def connect_postgres():
206
+ dsn = _build_postgres_dsn()
207
+ if dsn.startswith("sqlite://"):
208
+ import sqlite3
209
+
210
+ sqlite_path = dsn.replace("sqlite://", "", 1)
211
+ conn = sqlite3.connect(sqlite_path)
212
+ conn.row_factory = sqlite3.Row
213
+ try:
214
+ ensure_postgres_schema(conn)
215
+ yield conn
216
+ conn.commit()
217
+ except Exception:
218
+ conn.rollback()
219
+ raise
220
+ finally:
221
+ conn.close()
222
+ return
223
+
224
+ try:
225
+ import psycopg
226
+ except Exception as exc: # pragma: no cover - depends on environment extras
227
+ raise PostgresConfigurationError(
228
+ "psycopg is required for postgres database_mode. Install optional dependency: psycopg[binary]."
229
+ ) from exc
230
+
231
+ conn = psycopg.connect(dsn)
232
+ try:
233
+ ensure_postgres_schema(conn)
234
+ yield conn
235
+ conn.commit()
236
+ except Exception:
237
+ conn.rollback()
238
+ raise
239
+ finally:
240
+ conn.close()
@@ -0,0 +1,6 @@
1
+ from __future__ import annotations
2
+
3
+
4
+ def ensure_schema(conn) -> None:
5
+ """Ensure DB schema exists (stub)."""
6
+ _ = conn
@@ -0,0 +1 @@
1
+ """Enrichment storage abstractions."""
@@ -0,0 +1,7 @@
1
+ from __future__ import annotations
2
+
3
+ from .raw_enrichment_store import EnrichmentStore
4
+
5
+
6
+ class CanonicalEnrichmentStore(EnrichmentStore):
7
+ """Store for canonical enrichment results."""
@@ -0,0 +1,18 @@
1
+ from __future__ import annotations
2
+
3
+ from dataclasses import dataclass
4
+ from typing import Dict
5
+
6
+
7
+ @dataclass(frozen=True)
8
+ class EnrichmentRef:
9
+ record_id: str
10
+
11
+
12
+ class EnrichmentStore:
13
+ def write(self, result: Dict[str, str]) -> EnrichmentRef:
14
+ raise NotImplementedError
15
+
16
+
17
+ class RawEnrichmentStore(EnrichmentStore):
18
+ """Store for raw enrichment results."""
@@ -0,0 +1 @@
1
+ """Normalized storage abstractions."""
@@ -0,0 +1,24 @@
1
+ from __future__ import annotations
2
+
3
+ from dataclasses import dataclass
4
+ from typing import Dict
5
+
6
+
7
+ @dataclass(frozen=True)
8
+ class NormalizedRef:
9
+ record_id: str
10
+
11
+
12
+ class NormalizedStore:
13
+ def write(self, record: Dict[str, str]) -> NormalizedRef:
14
+ raise NotImplementedError
15
+
16
+
17
+ class InMemoryNormalizedStore(NormalizedStore):
18
+ def __init__(self):
19
+ self._records: Dict[str, Dict[str, str]] = {}
20
+
21
+ def write(self, record: Dict[str, str]) -> NormalizedRef:
22
+ record_id = record.get("record_id") or record.get("message_id") or ""
23
+ self._records[record_id] = record
24
+ return NormalizedRef(record_id=record_id)
@@ -0,0 +1 @@
1
+ """Oplog storage abstractions."""
@@ -0,0 +1,6 @@
1
+ from __future__ import annotations
2
+
3
+
4
+ def should_write_to_oplog(op_type: str, payload: dict, enable_sync: bool) -> bool:
5
+ _ = (op_type, payload, enable_sync)
6
+ return True
@@ -0,0 +1,17 @@
1
+ from __future__ import annotations
2
+
3
+ from dataclasses import dataclass
4
+ from typing import Dict
5
+
6
+
7
+ @dataclass(frozen=True)
8
+ class OplogEntry:
9
+ op_id: str
10
+ dataset_id: str
11
+ op_type: str
12
+ payload: Dict[str, str]
13
+
14
+
15
+ class OplogStore:
16
+ def append(self, entry: OplogEntry) -> None:
17
+ raise NotImplementedError
@@ -0,0 +1,10 @@
1
+ from __future__ import annotations
2
+
3
+
4
+ class PostgresOplogStore:
5
+ def __init__(self, conn):
6
+ self.conn = conn
7
+
8
+ def append(self, entry):
9
+ _ = entry
10
+ raise NotImplementedError("PostgresOplogStore not implemented yet")
@@ -0,0 +1 @@
1
+ """Projection storage abstractions."""
@@ -0,0 +1,6 @@
1
+ from __future__ import annotations
2
+
3
+
4
+ class IndexOpsStore:
5
+ def record_metric(self, metric: dict) -> None:
6
+ _ = metric
@@ -0,0 +1,6 @@
1
+ from __future__ import annotations
2
+
3
+
4
+ class VectorIndexStore:
5
+ def record_run(self, metadata: dict) -> None:
6
+ _ = metadata
@@ -0,0 +1 @@
1
+ """Raw storage abstractions."""