topos-node 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- shared/__init__.py +59 -0
- shared/filtering.py +640 -0
- shared/schema_registry.py +229 -0
- topos/__init__.py +5 -0
- topos/__version__.py +6 -0
- topos/analytics/__init__.py +15 -0
- topos/analytics/duckdb_adapter.py +48 -0
- topos/analytics/messenger_communities.py +349 -0
- topos/analytics/messenger_graph.py +522 -0
- topos/analytics/messenger_labels.py +321 -0
- topos/analytics/profiles.py +22 -0
- topos/analytics/query_engine.py +64 -0
- topos/analytics/raw_queries.py +174 -0
- topos/api/__init__.py +1 -0
- topos/api/analytics.py +52 -0
- topos/api/app_registry.py +31 -0
- topos/api/backup.py +15 -0
- topos/api/compute_remote.py +175 -0
- topos/api/data_commit.py +158 -0
- topos/api/data_explorer_table_prefs.py +81 -0
- topos/api/db.py +10 -0
- topos/api/device.py +25 -0
- topos/api/enrichment.py +959 -0
- topos/api/filter_lab.py +195 -0
- topos/api/health.py +61 -0
- topos/api/ingestion_api.py +37 -0
- topos/api/ingestion_compat.py +21 -0
- topos/api/ingestion_sources.py +600 -0
- topos/api/llm.py +76 -0
- topos/api/local_mcp.py +46 -0
- topos/api/messenger_analytics.py +385 -0
- topos/api/query_api.py +13 -0
- topos/api/sanitization_ollama_config.py +64 -0
- topos/api/source_install.py +324 -0
- topos/api/sources.py +13 -0
- topos/api/sync.py +10 -0
- topos/api/ui_config.py +83 -0
- topos/api/uma_data.py +311 -0
- topos/api/usage.py +49 -0
- topos/api/user_identity.py +46 -0
- topos/app.py +239 -0
- topos/auth.py +17 -0
- topos/canonicalization/__init__.py +1 -0
- topos/canonicalization/mappers/__init__.py +22 -0
- topos/canonicalization/mappers/base.py +26 -0
- topos/canonicalization/mappers/chatgpt_mapper.py +40 -0
- topos/canonicalization/mappers/grok_mapper.py +17 -0
- topos/canonicalization/mappers/messenger_mapper.py +58 -0
- topos/canonicalization/models.py +31 -0
- topos/canonicalization/resolver.py +23 -0
- topos/cli/__init__.py +1 -0
- topos/cli/__main__.py +6 -0
- topos/cli/commands.py +132 -0
- topos/config/__init__.py +1 -0
- topos/config/sanitization_ollama.py +189 -0
- topos/config/settings.py +310 -0
- topos/contacts/__init__.py +5 -0
- topos/contacts/identity.py +24 -0
- topos/control_plane_client.py +300 -0
- topos/core/__init__.py +1 -0
- topos/core/api_models.py +128 -0
- topos/core/connection_resilience.py +99 -0
- topos/core/device_helpers.py +8 -0
- topos/core/errors.py +13 -0
- topos/core/events.py +12 -0
- topos/core/handlers.py +5625 -0
- topos/core/logging.py +175 -0
- topos/core/metrics.py +21 -0
- topos/core/startup_banner.py +62 -0
- topos/core/state.py +682 -0
- topos/core/table_layers.py +45 -0
- topos/core/types.py +13 -0
- topos/data_explorer_table_prefs.py +150 -0
- topos/engine/__init__.py +29 -0
- topos/engine/backends/__init__.py +50 -0
- topos/engine/backends/base.py +21 -0
- topos/engine/backends/huggingface.py +151 -0
- topos/engine/backends/ollama.py +181 -0
- topos/engine/backends/stub.py +22 -0
- topos/engine/engine.py +165 -0
- topos/engine/intake.py +32 -0
- topos/engine/queue_manager.py +112 -0
- topos/engine/registration.py +126 -0
- topos/engine/result_formatter.py +38 -0
- topos/engine/router.py +19 -0
- topos/engine/scoped_token.py +82 -0
- topos/engine/tasks.py +154 -0
- topos/engine/transport.py +44 -0
- topos/engine/usage_guard.py +100 -0
- topos/engine/usage_observation.py +129 -0
- topos/engine/validator.py +23 -0
- topos/enrichment/__init__.py +1 -0
- topos/enrichment/derived_tables.py +214 -0
- topos/enrichment/jobs/__init__.py +30 -0
- topos/enrichment/jobs/base.py +54 -0
- topos/enrichment/jobs/canonical/__init__.py +1 -0
- topos/enrichment/jobs/canonical/embeddings_job.py +27 -0
- topos/enrichment/jobs/canonical/emo_27_job.py +97 -0
- topos/enrichment/jobs/canonical/entities_job.py +27 -0
- topos/enrichment/jobs/canonical/sentiment_job.py +27 -0
- topos/enrichment/jobs/canonical/topics_job.py +27 -0
- topos/enrichment/jobs/raw/__init__.py +1 -0
- topos/enrichment/jobs/raw/attachments_job.py +12 -0
- topos/enrichment/jobs/raw/language_job.py +12 -0
- topos/enrichment/jobs/raw/time_normalization_job.py +12 -0
- topos/enrichment/jobs/raw/tool_calls_job.py +12 -0
- topos/enrichment/models/__init__.py +1 -0
- topos/enrichment/models/manager.py +8 -0
- topos/enrichment/models/registry.py +71 -0
- topos/enrichment/models/versioning.py +8 -0
- topos/enrichment/orchestrator.py +177 -0
- topos/enrichment/processor.py +17 -0
- topos/enrichment/progress_bar.py +122 -0
- topos/enrichment/website_classifier.py +31 -0
- topos/filter_lab/__init__.py +1 -0
- topos/filter_lab/bundles.py +300 -0
- topos/filter_lab/schema.py +86 -0
- topos/filter_lab/service.py +167 -0
- topos/filter_lab/store.py +374 -0
- topos/filter_lab/worker.py +250 -0
- topos/hosted_pool_lease.py +153 -0
- topos/ingestion/__init__.py +1 -0
- topos/ingestion/checkpoints/__init__.py +6 -0
- topos/ingestion/checkpoints/checkpoint_store.py +24 -0
- topos/ingestion/checkpoints/sqlite_checkpoint_store.py +82 -0
- topos/ingestion/ingest_helpers.py +504 -0
- topos/ingestion/jobs.py +91 -0
- topos/ingestion/local_sync.py +823 -0
- topos/ingestion/log_preview.py +21 -0
- topos/ingestion/manager.py +1100 -0
- topos/ingestion/parser.py +174 -0
- topos/ingestion/parsers/__init__.py +32 -0
- topos/ingestion/parsers/base.py +24 -0
- topos/ingestion/parsers/browser_parser.py +171 -0
- topos/ingestion/parsers/calendar_parser.py +21 -0
- topos/ingestion/parsers/chatgpt_conversation_flattener.py +266 -0
- topos/ingestion/parsers/chatgpt_parser.py +67 -0
- topos/ingestion/parsers/grok_parser.py +21 -0
- topos/ingestion/parsers/messenger_parser.py +97 -0
- topos/ingestion/progress.py +54 -0
- topos/ingestion/sources/__init__.py +20 -0
- topos/ingestion/sources/base.py +39 -0
- topos/ingestion/sources/calendar.py +29 -0
- topos/ingestion/sources/chatgpt.py +29 -0
- topos/ingestion/sources/contact_importers.py +274 -0
- topos/ingestion/sources/grok.py +29 -0
- topos/ingestion/sources/imessage_reader.py +479 -0
- topos/ingestion/sources/signal_export_parser.py +132 -0
- topos/ingestion/sources/signal_reader.py +491 -0
- topos/ingestion/state_machine.py +70 -0
- topos/ingestion/triggers/__init__.py +1 -0
- topos/ingestion/triggers/file_trigger.py +36 -0
- topos/ingestion/triggers/sqlite_trigger.py +18 -0
- topos/ingestion/validation/__init__.py +1 -0
- topos/ingestion/validation/base.py +27 -0
- topos/ingestion/validation/schema_registry.py +111 -0
- topos/ingestion/validation/schema_validator.py +13 -0
- topos/lineage/__init__.py +1 -0
- topos/lineage/provenance.py +9 -0
- topos/lineage/tracker.py +9 -0
- topos/mcp_stdio_proxy.py +83 -0
- topos/observability/__init__.py +1 -0
- topos/observability/alerts.py +7 -0
- topos/observability/metrics.py +25 -0
- topos/observability/tracing.py +18 -0
- topos/openai_client.py +69 -0
- topos/projections/__init__.py +1 -0
- topos/projections/vector_index/__init__.py +1 -0
- topos/projections/vector_index/base.py +21 -0
- topos/projections/vector_index/builders.py +11 -0
- topos/projections/vector_index/health_checks.py +5 -0
- topos/rate_limit.py +43 -0
- topos/sanitization/__init__.py +16 -0
- topos/sanitization/ollama_transforms.py +276 -0
- topos/scope_resolution.py +89 -0
- topos/services/__init__.py +1 -0
- topos/services/container.py +46 -0
- topos/services/embeddings/__init__.py +1 -0
- topos/services/embeddings/base.py +7 -0
- topos/services/embeddings/local.py +9 -0
- topos/services/embeddings/remote.py +9 -0
- topos/services/interfaces.py +40 -0
- topos/services/llm/__init__.py +1 -0
- topos/services/llm/base.py +7 -0
- topos/services/llm/openai.py +126 -0
- topos/services/local.py +123 -0
- topos/services/postgres.py +385 -0
- topos/sources/__init__.py +6 -0
- topos/sources/definitions.py +114 -0
- topos/sources/install_service.py +836 -0
- topos/sources/registry.py +263 -0
- topos/sources/runtime_install.py +427 -0
- topos/storage/__init__.py +1 -0
- topos/storage/canonical/__init__.py +18 -0
- topos/storage/canonical/ai_chat/__init__.py +22 -0
- topos/storage/canonical/ai_chat/canonicalizer.py +147 -0
- topos/storage/canonical/ai_chat/mapper.py +168 -0
- topos/storage/canonical/ai_chat/model.py +87 -0
- topos/storage/canonical/ai_chat/tables.py +179 -0
- topos/storage/canonical/canonical_store.py +24 -0
- topos/storage/canonical/conversations_tables.py +1020 -0
- topos/storage/canonical/mapping_store.py +30 -0
- topos/storage/canonical/postgres.py +10 -0
- topos/storage/db/__init__.py +1 -0
- topos/storage/db/client.py +8 -0
- topos/storage/db/migrations/__init__.py +1 -0
- topos/storage/db/migrations/stage9_column_renames.py +78 -0
- topos/storage/db/paths.py +122 -0
- topos/storage/db/postgres.py +240 -0
- topos/storage/db/schema.py +6 -0
- topos/storage/enrichment/__init__.py +1 -0
- topos/storage/enrichment/canonical_enrichment_store.py +7 -0
- topos/storage/enrichment/raw_enrichment_store.py +18 -0
- topos/storage/normalized/__init__.py +1 -0
- topos/storage/normalized/normalized_store.py +24 -0
- topos/storage/oplog/__init__.py +1 -0
- topos/storage/oplog/decision.py +6 -0
- topos/storage/oplog/oplog_store.py +17 -0
- topos/storage/oplog/postgres.py +10 -0
- topos/storage/projections/__init__.py +1 -0
- topos/storage/projections/index_ops_store.py +6 -0
- topos/storage/projections/vector_index_store.py +6 -0
- topos/storage/raw/__init__.py +1 -0
- topos/storage/raw/browser_flat_tables.py +303 -0
- topos/storage/raw/file_store.py +100 -0
- topos/storage/raw/raw_store.py +29 -0
- topos/storage/raw/raw_tables_manager.py +295 -0
- topos/storage/raw/sqlite_raw_store.py +17 -0
- topos/storage/security/encryption.py +21 -0
- topos/storage/signal_identity.py +71 -0
- topos/storage/source_settings.py +116 -0
- topos/storage/user_identity.py +69 -0
- topos/sync/__init__.py +5 -0
- topos/sync/client.py +272 -0
- topos/sync_handlers.py +70 -0
- topos/testing/__init__.py +1 -0
- topos/testing/lifespan.py +7 -0
- topos/uma_contact_enrichment.py +1032 -0
- topos/uma_filters.py +669 -0
- topos/uma_resource_id.py +24 -0
- topos/uma_rpt.py +69 -0
- topos/utils/base_object.py +61 -0
- topos/websocket_client.py +21 -0
- topos_node-0.1.0.dist-info/METADATA +199 -0
- topos_node-0.1.0.dist-info/RECORD +249 -0
- topos_node-0.1.0.dist-info/WHEEL +5 -0
- topos_node-0.1.0.dist-info/entry_points.txt +2 -0
- topos_node-0.1.0.dist-info/licenses/LICENSE +201 -0
- topos_node-0.1.0.dist-info/top_level.txt +2 -0
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from dataclasses import dataclass
|
|
4
|
+
from typing import Dict, Optional
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
@dataclass(frozen=True)
|
|
8
|
+
class MappingRecord:
|
|
9
|
+
source_id: str
|
|
10
|
+
source_record_id: str
|
|
11
|
+
canonical_id: str
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class MappingStore:
|
|
15
|
+
def get_mapping(self, source_id: str, source_record_id: str) -> Optional[MappingRecord]:
|
|
16
|
+
raise NotImplementedError
|
|
17
|
+
|
|
18
|
+
def save_mapping(self, record: MappingRecord) -> None:
|
|
19
|
+
raise NotImplementedError
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class InMemoryMappingStore(MappingStore):
|
|
23
|
+
def __init__(self):
|
|
24
|
+
self._records: Dict[str, MappingRecord] = {}
|
|
25
|
+
|
|
26
|
+
def get_mapping(self, source_id: str, source_record_id: str) -> Optional[MappingRecord]:
|
|
27
|
+
return self._records.get(f"{source_id}:{source_record_id}")
|
|
28
|
+
|
|
29
|
+
def save_mapping(self, record: MappingRecord) -> None:
|
|
30
|
+
self._records[f"{record.source_id}:{record.source_record_id}"] = record
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Database helpers for Topos storage."""
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Migration package placeholder."""
|
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Stage 9 column renames: apply standardized names to existing DBs.
|
|
3
|
+
SQLite 3.25.1+ RENAME COLUMN. Idempotent: skips if new name already exists or old missing.
|
|
4
|
+
Source: docs/SCHEMA_CONVENTIONS.md §7.0.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
import logging
|
|
10
|
+
import sqlite3
|
|
11
|
+
from typing import Any, List, Optional, Tuple
|
|
12
|
+
|
|
13
|
+
logger = logging.getLogger("topos.storage.db.migrations.stage9")
|
|
14
|
+
|
|
15
|
+
# (table, old_column, new_column) in dependency order
|
|
16
|
+
RENAME_TARGETS: List[Tuple[str, str, str]] = [
|
|
17
|
+
("ai_chat_conversations", "source", "source_id"),
|
|
18
|
+
("conversation_messages", "ts", "event_at"),
|
|
19
|
+
("conversation_messages", "from_self", "is_from_self"),
|
|
20
|
+
("ai_chat_messages", "ts", "event_at"),
|
|
21
|
+
("ai_chat_messages", "seq", "sequence"),
|
|
22
|
+
("message_emotions", "model", "model_name"),
|
|
23
|
+
("message_emotions", "all_emotions", "all_emotions_json"),
|
|
24
|
+
("browser_url_classification", "source_table", "enriched_from_table"),
|
|
25
|
+
]
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def _column_exists(conn: sqlite3.Connection, table: str, column: str) -> bool:
|
|
29
|
+
cursor = conn.execute(f"PRAGMA table_info({table})")
|
|
30
|
+
rows = cursor.fetchall()
|
|
31
|
+
# PRAGMA table_info returns (cid, name, type, notnull, dflt_value, pk)
|
|
32
|
+
names = [row[1] for row in rows]
|
|
33
|
+
return column in names
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def _table_exists(conn: sqlite3.Connection, table: str) -> bool:
|
|
37
|
+
cursor = conn.execute(
|
|
38
|
+
"SELECT name FROM sqlite_master WHERE type='table' AND name=?",
|
|
39
|
+
(table,),
|
|
40
|
+
)
|
|
41
|
+
return cursor.fetchone() is not None
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def apply_stage9_renames(conn: sqlite3.Connection) -> dict[str, Any]:
|
|
45
|
+
"""
|
|
46
|
+
Apply Stage 9 column renames. Idempotent.
|
|
47
|
+
Returns dict with applied=[(table, old, new), ...], skipped=[...], errors=[...].
|
|
48
|
+
"""
|
|
49
|
+
applied: List[Tuple[str, str, str]] = []
|
|
50
|
+
skipped: List[Tuple[str, str, str]] = []
|
|
51
|
+
errors: List[str] = []
|
|
52
|
+
|
|
53
|
+
for table, old_col, new_col in RENAME_TARGETS:
|
|
54
|
+
if not _table_exists(conn, table):
|
|
55
|
+
skipped.append((table, old_col, new_col))
|
|
56
|
+
continue
|
|
57
|
+
if not _column_exists(conn, table, old_col):
|
|
58
|
+
skipped.append((table, old_col, new_col))
|
|
59
|
+
continue
|
|
60
|
+
if _column_exists(conn, table, new_col):
|
|
61
|
+
skipped.append((table, old_col, new_col))
|
|
62
|
+
continue
|
|
63
|
+
try:
|
|
64
|
+
conn.execute(f'ALTER TABLE "{table}" RENAME COLUMN "{old_col}" TO "{new_col}"')
|
|
65
|
+
conn.commit()
|
|
66
|
+
applied.append((table, old_col, new_col))
|
|
67
|
+
logger.info("Stage9 migration: %s.%s -> %s", table, old_col, new_col)
|
|
68
|
+
except Exception as e:
|
|
69
|
+
conn.rollback()
|
|
70
|
+
errors.append(f"{table}.{old_col}->{new_col}: {e}")
|
|
71
|
+
logger.warning("Stage9 migration failed: %s.%s -> %s: %s", table, old_col, new_col, e)
|
|
72
|
+
|
|
73
|
+
return {"applied": applied, "skipped": skipped, "errors": errors}
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
def run_stage9_migrations(conn: sqlite3.Connection) -> dict[str, Any]:
|
|
77
|
+
"""Run Stage 9 migrations (currently only column renames)."""
|
|
78
|
+
return apply_stage9_renames(conn)
|
|
@@ -0,0 +1,122 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
import logging
|
|
5
|
+
import os
|
|
6
|
+
import platform
|
|
7
|
+
import shutil
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
from typing import Optional
|
|
10
|
+
|
|
11
|
+
logger = logging.getLogger("topos.storage.db.paths")
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def get_database_path(base_path: Optional[str]) -> Path:
|
|
15
|
+
if base_path:
|
|
16
|
+
return Path(base_path)
|
|
17
|
+
return Path.home() / ".topos" / "database.db"
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def get_config_file() -> Path:
|
|
21
|
+
return get_data_directory() / "config.json"
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def load_config() -> dict:
|
|
25
|
+
config_file = get_config_file()
|
|
26
|
+
if config_file.exists():
|
|
27
|
+
try:
|
|
28
|
+
with open(config_file, "r", encoding="utf-8") as f:
|
|
29
|
+
return json.load(f)
|
|
30
|
+
except Exception as exc:
|
|
31
|
+
logger.warning("Failed to load config from %s: %s", config_file, exc)
|
|
32
|
+
return {}
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def save_config(config: dict) -> None:
|
|
36
|
+
config_file = get_config_file()
|
|
37
|
+
config_file.parent.mkdir(parents=True, exist_ok=True)
|
|
38
|
+
try:
|
|
39
|
+
with open(config_file, "w", encoding="utf-8") as f:
|
|
40
|
+
json.dump(config, f, indent=2)
|
|
41
|
+
except Exception as exc:
|
|
42
|
+
logger.error("Failed to save config to %s: %s", config_file, exc)
|
|
43
|
+
raise
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def discover_databases() -> list[Path]:
|
|
47
|
+
databases: list[Path] = []
|
|
48
|
+
config = load_config()
|
|
49
|
+
if "database_path" in config:
|
|
50
|
+
db_path = Path(config["database_path"])
|
|
51
|
+
if db_path.exists() and db_path.is_file():
|
|
52
|
+
databases.append(db_path)
|
|
53
|
+
|
|
54
|
+
data_dir = get_data_directory()
|
|
55
|
+
default_db = data_dir / "database.db"
|
|
56
|
+
if default_db.exists() and default_db not in databases:
|
|
57
|
+
databases.append(default_db)
|
|
58
|
+
|
|
59
|
+
legacy_db = Path.home() / ".topos_engine" / "database.db"
|
|
60
|
+
if legacy_db.exists() and legacy_db not in databases:
|
|
61
|
+
databases.append(legacy_db)
|
|
62
|
+
|
|
63
|
+
env_path = os.getenv("TOPOS_DATABASE_PATH")
|
|
64
|
+
if env_path:
|
|
65
|
+
env_db = Path(env_path)
|
|
66
|
+
if env_db.exists() and env_db.is_file() and env_db not in databases:
|
|
67
|
+
databases.append(env_db)
|
|
68
|
+
|
|
69
|
+
return databases
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
def get_data_directory() -> Path:
|
|
73
|
+
system = platform.system()
|
|
74
|
+
if system == "Windows":
|
|
75
|
+
base = Path(os.getenv("APPDATA", Path.home() / "AppData" / "Roaming"))
|
|
76
|
+
return base / "ToposControlPlane"
|
|
77
|
+
if system == "Darwin":
|
|
78
|
+
return Path.home() / "Library" / "Application Support" / "ToposControlPlane"
|
|
79
|
+
xdg_data = os.getenv("XDG_DATA_HOME")
|
|
80
|
+
if xdg_data:
|
|
81
|
+
return Path(xdg_data) / "topos-control-plane"
|
|
82
|
+
return Path.home() / ".local" / "share" / "topos-control-plane"
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
def migrate_legacy_database() -> Optional[Path]:
|
|
86
|
+
legacy_path = Path.home() / ".topos_engine" / "database.db"
|
|
87
|
+
if not legacy_path.exists():
|
|
88
|
+
return None
|
|
89
|
+
|
|
90
|
+
new_path = get_data_directory() / "database.db"
|
|
91
|
+
if new_path.exists():
|
|
92
|
+
logger.warning("Both legacy (%s) and new (%s) database exist. Using new location.", legacy_path, new_path)
|
|
93
|
+
return new_path
|
|
94
|
+
|
|
95
|
+
try:
|
|
96
|
+
new_path.parent.mkdir(parents=True, exist_ok=True)
|
|
97
|
+
shutil.copy2(legacy_path, new_path)
|
|
98
|
+
config = load_config()
|
|
99
|
+
config["database_path"] = str(new_path)
|
|
100
|
+
save_config(config)
|
|
101
|
+
logger.info("Migrated database from %s to %s", legacy_path, new_path)
|
|
102
|
+
return new_path
|
|
103
|
+
except Exception as exc:
|
|
104
|
+
logger.error("Failed to migrate database: %s", exc)
|
|
105
|
+
return None
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
def validate_database(db_path: Path) -> bool:
|
|
109
|
+
if not db_path.exists() or not db_path.is_file():
|
|
110
|
+
return False
|
|
111
|
+
try:
|
|
112
|
+
import sqlite3
|
|
113
|
+
|
|
114
|
+
conn = sqlite3.connect(str(db_path))
|
|
115
|
+
cursor = conn.execute("SELECT name FROM sqlite_master WHERE type='table'")
|
|
116
|
+
existing_tables = {row[0] for row in cursor}
|
|
117
|
+
conn.close()
|
|
118
|
+
required_tables = {"oplog", "messages", "engine_config"}
|
|
119
|
+
return required_tables.issubset(existing_tables)
|
|
120
|
+
except Exception as exc:
|
|
121
|
+
logger.warning("Failed to validate database %s: %s", db_path, exc)
|
|
122
|
+
return False
|
|
@@ -0,0 +1,240 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from contextlib import contextmanager
|
|
4
|
+
import logging
|
|
5
|
+
from typing import Any, Iterable, Sequence, Tuple
|
|
6
|
+
|
|
7
|
+
from ...config.settings import settings
|
|
8
|
+
|
|
9
|
+
logger = logging.getLogger("topos.storage.db.postgres")
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class PostgresConfigurationError(RuntimeError):
|
|
13
|
+
pass
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def _build_postgres_dsn() -> str:
|
|
17
|
+
if settings.topos_postgres_dsn:
|
|
18
|
+
return settings.topos_postgres_dsn
|
|
19
|
+
if settings.topos_database_service_url:
|
|
20
|
+
return settings.topos_database_service_url
|
|
21
|
+
|
|
22
|
+
required = {
|
|
23
|
+
"TOPOS_POSTGRES_HOST": settings.topos_postgres_host,
|
|
24
|
+
"TOPOS_POSTGRES_PORT": settings.topos_postgres_port,
|
|
25
|
+
"TOPOS_POSTGRES_DB": settings.topos_postgres_db,
|
|
26
|
+
"TOPOS_POSTGRES_USER": settings.topos_postgres_user,
|
|
27
|
+
"TOPOS_POSTGRES_PASSWORD": settings.topos_postgres_password,
|
|
28
|
+
}
|
|
29
|
+
missing = [key for key, value in required.items() if value in (None, "")]
|
|
30
|
+
if missing:
|
|
31
|
+
raise PostgresConfigurationError(
|
|
32
|
+
"Missing Postgres settings: " + ", ".join(sorted(missing))
|
|
33
|
+
)
|
|
34
|
+
return (
|
|
35
|
+
f"postgresql://{settings.topos_postgres_user}:{settings.topos_postgres_password}"
|
|
36
|
+
f"@{settings.topos_postgres_host}:{int(settings.topos_postgres_port)}/{settings.topos_postgres_db}"
|
|
37
|
+
)
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def _is_sqlite_connection(conn: Any) -> bool:
|
|
41
|
+
module_name = conn.__class__.__module__.lower()
|
|
42
|
+
return "sqlite" in module_name
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def _normalize_query_for_connection(conn: Any, query: str) -> str:
|
|
46
|
+
if _is_sqlite_connection(conn):
|
|
47
|
+
return query.replace("%s", "?")
|
|
48
|
+
return query
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def _table_exists(conn: Any, table_name: str) -> bool:
|
|
52
|
+
if _is_sqlite_connection(conn):
|
|
53
|
+
row = fetch_one(
|
|
54
|
+
conn,
|
|
55
|
+
"SELECT name FROM sqlite_master WHERE type='table' AND name = ?",
|
|
56
|
+
(table_name,),
|
|
57
|
+
)
|
|
58
|
+
return bool(row)
|
|
59
|
+
row = fetch_one(
|
|
60
|
+
conn,
|
|
61
|
+
"""
|
|
62
|
+
SELECT 1
|
|
63
|
+
FROM information_schema.tables
|
|
64
|
+
WHERE table_schema = 'public' AND table_name = %s
|
|
65
|
+
LIMIT 1
|
|
66
|
+
""",
|
|
67
|
+
(table_name,),
|
|
68
|
+
)
|
|
69
|
+
return bool(row)
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
def _table_has_column(conn: Any, table_name: str, column_name: str) -> bool:
|
|
73
|
+
if not _table_exists(conn, table_name):
|
|
74
|
+
return False
|
|
75
|
+
if _is_sqlite_connection(conn):
|
|
76
|
+
rows = fetch_all(conn, f'PRAGMA table_info("{table_name}")')
|
|
77
|
+
for row in rows:
|
|
78
|
+
col = row["name"] if isinstance(row, dict) else row[1]
|
|
79
|
+
if str(col) == column_name:
|
|
80
|
+
return True
|
|
81
|
+
return False
|
|
82
|
+
row = fetch_one(
|
|
83
|
+
conn,
|
|
84
|
+
"""
|
|
85
|
+
SELECT 1
|
|
86
|
+
FROM information_schema.columns
|
|
87
|
+
WHERE table_schema = 'public' AND table_name = %s AND column_name = %s
|
|
88
|
+
LIMIT 1
|
|
89
|
+
""",
|
|
90
|
+
(table_name, column_name),
|
|
91
|
+
)
|
|
92
|
+
return bool(row)
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
def _drop_table_if_exists(conn: Any, table_name: str) -> None:
|
|
96
|
+
execute_query(conn, f'DROP TABLE IF EXISTS "{table_name}" CASCADE')
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
def _ensure_postgres_tenant_schema_compat(conn: Any) -> None:
|
|
100
|
+
"""
|
|
101
|
+
Detect legacy hosted schemas that predate tenant-aware columns.
|
|
102
|
+
|
|
103
|
+
In dev/test we allow a destructive reset because the user has explicitly
|
|
104
|
+
accepted data loss. For safety, reset is opt-in via
|
|
105
|
+
TOPOS_POSTGRES_RESET_INCOMPATIBLE_SCHEMA.
|
|
106
|
+
"""
|
|
107
|
+
if _is_sqlite_connection(conn):
|
|
108
|
+
return
|
|
109
|
+
|
|
110
|
+
requires_reset = any(
|
|
111
|
+
_table_exists(conn, table) and not _table_has_column(conn, table, "tenant_id")
|
|
112
|
+
for table in ("messages", "oplog", "engine_config")
|
|
113
|
+
)
|
|
114
|
+
if not requires_reset:
|
|
115
|
+
return
|
|
116
|
+
|
|
117
|
+
if not settings.topos_postgres_reset_incompatible_schema:
|
|
118
|
+
raise PostgresConfigurationError(
|
|
119
|
+
"Detected legacy Postgres schema without tenant_id on messages/oplog/engine_config. "
|
|
120
|
+
"Set TOPOS_POSTGRES_RESET_INCOMPATIBLE_SCHEMA=true to reset these dev tables, or run a manual migration."
|
|
121
|
+
)
|
|
122
|
+
|
|
123
|
+
logger.warning(
|
|
124
|
+
"Resetting legacy Postgres tables (messages, oplog, engine_config) to tenant-aware schema."
|
|
125
|
+
)
|
|
126
|
+
_drop_table_if_exists(conn, "messages")
|
|
127
|
+
_drop_table_if_exists(conn, "oplog")
|
|
128
|
+
_drop_table_if_exists(conn, "engine_config")
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
def execute_query(conn: Any, query: str, params: Sequence[Any] = ()) -> None:
|
|
132
|
+
normalized = _normalize_query_for_connection(conn, query)
|
|
133
|
+
with _cursor(conn) as cursor:
|
|
134
|
+
cursor.execute(normalized, tuple(params))
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
def fetch_all(conn: Any, query: str, params: Sequence[Any] = ()) -> list[Tuple[Any, ...]]:
|
|
138
|
+
normalized = _normalize_query_for_connection(conn, query)
|
|
139
|
+
with _cursor(conn) as cursor:
|
|
140
|
+
cursor.execute(normalized, tuple(params))
|
|
141
|
+
rows = cursor.fetchall()
|
|
142
|
+
return list(rows or [])
|
|
143
|
+
|
|
144
|
+
|
|
145
|
+
def fetch_one(conn: Any, query: str, params: Sequence[Any] = ()) -> Tuple[Any, ...] | None:
|
|
146
|
+
normalized = _normalize_query_for_connection(conn, query)
|
|
147
|
+
with _cursor(conn) as cursor:
|
|
148
|
+
cursor.execute(normalized, tuple(params))
|
|
149
|
+
row = cursor.fetchone()
|
|
150
|
+
return row
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
@contextmanager
|
|
154
|
+
def _cursor(conn: Any):
|
|
155
|
+
cursor = conn.cursor()
|
|
156
|
+
try:
|
|
157
|
+
yield cursor
|
|
158
|
+
finally:
|
|
159
|
+
try:
|
|
160
|
+
cursor.close()
|
|
161
|
+
except Exception:
|
|
162
|
+
pass
|
|
163
|
+
|
|
164
|
+
|
|
165
|
+
def ensure_postgres_schema(conn: Any) -> None:
|
|
166
|
+
_ensure_postgres_tenant_schema_compat(conn)
|
|
167
|
+
statements: Iterable[str] = (
|
|
168
|
+
"""
|
|
169
|
+
CREATE TABLE IF NOT EXISTS messages (
|
|
170
|
+
tenant_id TEXT NOT NULL,
|
|
171
|
+
dataset_id TEXT NOT NULL,
|
|
172
|
+
message_id TEXT PRIMARY KEY,
|
|
173
|
+
sender_type TEXT NOT NULL,
|
|
174
|
+
content TEXT NOT NULL,
|
|
175
|
+
ts TEXT NOT NULL,
|
|
176
|
+
user_id TEXT
|
|
177
|
+
)
|
|
178
|
+
""",
|
|
179
|
+
"CREATE INDEX IF NOT EXISTS idx_messages_tenant_dataset_ts ON messages (tenant_id, dataset_id, ts DESC)",
|
|
180
|
+
"""
|
|
181
|
+
CREATE TABLE IF NOT EXISTS oplog (
|
|
182
|
+
tenant_id TEXT NOT NULL,
|
|
183
|
+
dataset_id TEXT NOT NULL,
|
|
184
|
+
op_id TEXT PRIMARY KEY,
|
|
185
|
+
op_type TEXT NOT NULL,
|
|
186
|
+
payload_json TEXT NOT NULL,
|
|
187
|
+
hlc_ts TEXT NOT NULL
|
|
188
|
+
)
|
|
189
|
+
""",
|
|
190
|
+
"CREATE INDEX IF NOT EXISTS idx_oplog_tenant_dataset_ts ON oplog (tenant_id, dataset_id, hlc_ts DESC)",
|
|
191
|
+
"""
|
|
192
|
+
CREATE TABLE IF NOT EXISTS engine_config (
|
|
193
|
+
tenant_id TEXT NOT NULL,
|
|
194
|
+
key TEXT NOT NULL,
|
|
195
|
+
value TEXT NOT NULL,
|
|
196
|
+
PRIMARY KEY (tenant_id, key)
|
|
197
|
+
)
|
|
198
|
+
""",
|
|
199
|
+
)
|
|
200
|
+
for statement in statements:
|
|
201
|
+
execute_query(conn, statement)
|
|
202
|
+
|
|
203
|
+
|
|
204
|
+
@contextmanager
|
|
205
|
+
def connect_postgres():
|
|
206
|
+
dsn = _build_postgres_dsn()
|
|
207
|
+
if dsn.startswith("sqlite://"):
|
|
208
|
+
import sqlite3
|
|
209
|
+
|
|
210
|
+
sqlite_path = dsn.replace("sqlite://", "", 1)
|
|
211
|
+
conn = sqlite3.connect(sqlite_path)
|
|
212
|
+
conn.row_factory = sqlite3.Row
|
|
213
|
+
try:
|
|
214
|
+
ensure_postgres_schema(conn)
|
|
215
|
+
yield conn
|
|
216
|
+
conn.commit()
|
|
217
|
+
except Exception:
|
|
218
|
+
conn.rollback()
|
|
219
|
+
raise
|
|
220
|
+
finally:
|
|
221
|
+
conn.close()
|
|
222
|
+
return
|
|
223
|
+
|
|
224
|
+
try:
|
|
225
|
+
import psycopg
|
|
226
|
+
except Exception as exc: # pragma: no cover - depends on environment extras
|
|
227
|
+
raise PostgresConfigurationError(
|
|
228
|
+
"psycopg is required for postgres database_mode. Install optional dependency: psycopg[binary]."
|
|
229
|
+
) from exc
|
|
230
|
+
|
|
231
|
+
conn = psycopg.connect(dsn)
|
|
232
|
+
try:
|
|
233
|
+
ensure_postgres_schema(conn)
|
|
234
|
+
yield conn
|
|
235
|
+
conn.commit()
|
|
236
|
+
except Exception:
|
|
237
|
+
conn.rollback()
|
|
238
|
+
raise
|
|
239
|
+
finally:
|
|
240
|
+
conn.close()
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Enrichment storage abstractions."""
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from dataclasses import dataclass
|
|
4
|
+
from typing import Dict
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
@dataclass(frozen=True)
|
|
8
|
+
class EnrichmentRef:
|
|
9
|
+
record_id: str
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class EnrichmentStore:
|
|
13
|
+
def write(self, result: Dict[str, str]) -> EnrichmentRef:
|
|
14
|
+
raise NotImplementedError
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class RawEnrichmentStore(EnrichmentStore):
|
|
18
|
+
"""Store for raw enrichment results."""
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Normalized storage abstractions."""
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from dataclasses import dataclass
|
|
4
|
+
from typing import Dict
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
@dataclass(frozen=True)
|
|
8
|
+
class NormalizedRef:
|
|
9
|
+
record_id: str
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class NormalizedStore:
|
|
13
|
+
def write(self, record: Dict[str, str]) -> NormalizedRef:
|
|
14
|
+
raise NotImplementedError
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class InMemoryNormalizedStore(NormalizedStore):
|
|
18
|
+
def __init__(self):
|
|
19
|
+
self._records: Dict[str, Dict[str, str]] = {}
|
|
20
|
+
|
|
21
|
+
def write(self, record: Dict[str, str]) -> NormalizedRef:
|
|
22
|
+
record_id = record.get("record_id") or record.get("message_id") or ""
|
|
23
|
+
self._records[record_id] = record
|
|
24
|
+
return NormalizedRef(record_id=record_id)
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Oplog storage abstractions."""
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from dataclasses import dataclass
|
|
4
|
+
from typing import Dict
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
@dataclass(frozen=True)
|
|
8
|
+
class OplogEntry:
|
|
9
|
+
op_id: str
|
|
10
|
+
dataset_id: str
|
|
11
|
+
op_type: str
|
|
12
|
+
payload: Dict[str, str]
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class OplogStore:
|
|
16
|
+
def append(self, entry: OplogEntry) -> None:
|
|
17
|
+
raise NotImplementedError
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Projection storage abstractions."""
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Raw storage abstractions."""
|