topos-node 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- shared/__init__.py +59 -0
- shared/filtering.py +640 -0
- shared/schema_registry.py +229 -0
- topos/__init__.py +5 -0
- topos/__version__.py +6 -0
- topos/analytics/__init__.py +15 -0
- topos/analytics/duckdb_adapter.py +48 -0
- topos/analytics/messenger_communities.py +349 -0
- topos/analytics/messenger_graph.py +522 -0
- topos/analytics/messenger_labels.py +321 -0
- topos/analytics/profiles.py +22 -0
- topos/analytics/query_engine.py +64 -0
- topos/analytics/raw_queries.py +174 -0
- topos/api/__init__.py +1 -0
- topos/api/analytics.py +52 -0
- topos/api/app_registry.py +31 -0
- topos/api/backup.py +15 -0
- topos/api/compute_remote.py +175 -0
- topos/api/data_commit.py +158 -0
- topos/api/data_explorer_table_prefs.py +81 -0
- topos/api/db.py +10 -0
- topos/api/device.py +25 -0
- topos/api/enrichment.py +959 -0
- topos/api/filter_lab.py +195 -0
- topos/api/health.py +61 -0
- topos/api/ingestion_api.py +37 -0
- topos/api/ingestion_compat.py +21 -0
- topos/api/ingestion_sources.py +600 -0
- topos/api/llm.py +76 -0
- topos/api/local_mcp.py +46 -0
- topos/api/messenger_analytics.py +385 -0
- topos/api/query_api.py +13 -0
- topos/api/sanitization_ollama_config.py +64 -0
- topos/api/source_install.py +324 -0
- topos/api/sources.py +13 -0
- topos/api/sync.py +10 -0
- topos/api/ui_config.py +83 -0
- topos/api/uma_data.py +311 -0
- topos/api/usage.py +49 -0
- topos/api/user_identity.py +46 -0
- topos/app.py +239 -0
- topos/auth.py +17 -0
- topos/canonicalization/__init__.py +1 -0
- topos/canonicalization/mappers/__init__.py +22 -0
- topos/canonicalization/mappers/base.py +26 -0
- topos/canonicalization/mappers/chatgpt_mapper.py +40 -0
- topos/canonicalization/mappers/grok_mapper.py +17 -0
- topos/canonicalization/mappers/messenger_mapper.py +58 -0
- topos/canonicalization/models.py +31 -0
- topos/canonicalization/resolver.py +23 -0
- topos/cli/__init__.py +1 -0
- topos/cli/__main__.py +6 -0
- topos/cli/commands.py +132 -0
- topos/config/__init__.py +1 -0
- topos/config/sanitization_ollama.py +189 -0
- topos/config/settings.py +310 -0
- topos/contacts/__init__.py +5 -0
- topos/contacts/identity.py +24 -0
- topos/control_plane_client.py +300 -0
- topos/core/__init__.py +1 -0
- topos/core/api_models.py +128 -0
- topos/core/connection_resilience.py +99 -0
- topos/core/device_helpers.py +8 -0
- topos/core/errors.py +13 -0
- topos/core/events.py +12 -0
- topos/core/handlers.py +5625 -0
- topos/core/logging.py +175 -0
- topos/core/metrics.py +21 -0
- topos/core/startup_banner.py +62 -0
- topos/core/state.py +682 -0
- topos/core/table_layers.py +45 -0
- topos/core/types.py +13 -0
- topos/data_explorer_table_prefs.py +150 -0
- topos/engine/__init__.py +29 -0
- topos/engine/backends/__init__.py +50 -0
- topos/engine/backends/base.py +21 -0
- topos/engine/backends/huggingface.py +151 -0
- topos/engine/backends/ollama.py +181 -0
- topos/engine/backends/stub.py +22 -0
- topos/engine/engine.py +165 -0
- topos/engine/intake.py +32 -0
- topos/engine/queue_manager.py +112 -0
- topos/engine/registration.py +126 -0
- topos/engine/result_formatter.py +38 -0
- topos/engine/router.py +19 -0
- topos/engine/scoped_token.py +82 -0
- topos/engine/tasks.py +154 -0
- topos/engine/transport.py +44 -0
- topos/engine/usage_guard.py +100 -0
- topos/engine/usage_observation.py +129 -0
- topos/engine/validator.py +23 -0
- topos/enrichment/__init__.py +1 -0
- topos/enrichment/derived_tables.py +214 -0
- topos/enrichment/jobs/__init__.py +30 -0
- topos/enrichment/jobs/base.py +54 -0
- topos/enrichment/jobs/canonical/__init__.py +1 -0
- topos/enrichment/jobs/canonical/embeddings_job.py +27 -0
- topos/enrichment/jobs/canonical/emo_27_job.py +97 -0
- topos/enrichment/jobs/canonical/entities_job.py +27 -0
- topos/enrichment/jobs/canonical/sentiment_job.py +27 -0
- topos/enrichment/jobs/canonical/topics_job.py +27 -0
- topos/enrichment/jobs/raw/__init__.py +1 -0
- topos/enrichment/jobs/raw/attachments_job.py +12 -0
- topos/enrichment/jobs/raw/language_job.py +12 -0
- topos/enrichment/jobs/raw/time_normalization_job.py +12 -0
- topos/enrichment/jobs/raw/tool_calls_job.py +12 -0
- topos/enrichment/models/__init__.py +1 -0
- topos/enrichment/models/manager.py +8 -0
- topos/enrichment/models/registry.py +71 -0
- topos/enrichment/models/versioning.py +8 -0
- topos/enrichment/orchestrator.py +177 -0
- topos/enrichment/processor.py +17 -0
- topos/enrichment/progress_bar.py +122 -0
- topos/enrichment/website_classifier.py +31 -0
- topos/filter_lab/__init__.py +1 -0
- topos/filter_lab/bundles.py +300 -0
- topos/filter_lab/schema.py +86 -0
- topos/filter_lab/service.py +167 -0
- topos/filter_lab/store.py +374 -0
- topos/filter_lab/worker.py +250 -0
- topos/hosted_pool_lease.py +153 -0
- topos/ingestion/__init__.py +1 -0
- topos/ingestion/checkpoints/__init__.py +6 -0
- topos/ingestion/checkpoints/checkpoint_store.py +24 -0
- topos/ingestion/checkpoints/sqlite_checkpoint_store.py +82 -0
- topos/ingestion/ingest_helpers.py +504 -0
- topos/ingestion/jobs.py +91 -0
- topos/ingestion/local_sync.py +823 -0
- topos/ingestion/log_preview.py +21 -0
- topos/ingestion/manager.py +1100 -0
- topos/ingestion/parser.py +174 -0
- topos/ingestion/parsers/__init__.py +32 -0
- topos/ingestion/parsers/base.py +24 -0
- topos/ingestion/parsers/browser_parser.py +171 -0
- topos/ingestion/parsers/calendar_parser.py +21 -0
- topos/ingestion/parsers/chatgpt_conversation_flattener.py +266 -0
- topos/ingestion/parsers/chatgpt_parser.py +67 -0
- topos/ingestion/parsers/grok_parser.py +21 -0
- topos/ingestion/parsers/messenger_parser.py +97 -0
- topos/ingestion/progress.py +54 -0
- topos/ingestion/sources/__init__.py +20 -0
- topos/ingestion/sources/base.py +39 -0
- topos/ingestion/sources/calendar.py +29 -0
- topos/ingestion/sources/chatgpt.py +29 -0
- topos/ingestion/sources/contact_importers.py +274 -0
- topos/ingestion/sources/grok.py +29 -0
- topos/ingestion/sources/imessage_reader.py +479 -0
- topos/ingestion/sources/signal_export_parser.py +132 -0
- topos/ingestion/sources/signal_reader.py +491 -0
- topos/ingestion/state_machine.py +70 -0
- topos/ingestion/triggers/__init__.py +1 -0
- topos/ingestion/triggers/file_trigger.py +36 -0
- topos/ingestion/triggers/sqlite_trigger.py +18 -0
- topos/ingestion/validation/__init__.py +1 -0
- topos/ingestion/validation/base.py +27 -0
- topos/ingestion/validation/schema_registry.py +111 -0
- topos/ingestion/validation/schema_validator.py +13 -0
- topos/lineage/__init__.py +1 -0
- topos/lineage/provenance.py +9 -0
- topos/lineage/tracker.py +9 -0
- topos/mcp_stdio_proxy.py +83 -0
- topos/observability/__init__.py +1 -0
- topos/observability/alerts.py +7 -0
- topos/observability/metrics.py +25 -0
- topos/observability/tracing.py +18 -0
- topos/openai_client.py +69 -0
- topos/projections/__init__.py +1 -0
- topos/projections/vector_index/__init__.py +1 -0
- topos/projections/vector_index/base.py +21 -0
- topos/projections/vector_index/builders.py +11 -0
- topos/projections/vector_index/health_checks.py +5 -0
- topos/rate_limit.py +43 -0
- topos/sanitization/__init__.py +16 -0
- topos/sanitization/ollama_transforms.py +276 -0
- topos/scope_resolution.py +89 -0
- topos/services/__init__.py +1 -0
- topos/services/container.py +46 -0
- topos/services/embeddings/__init__.py +1 -0
- topos/services/embeddings/base.py +7 -0
- topos/services/embeddings/local.py +9 -0
- topos/services/embeddings/remote.py +9 -0
- topos/services/interfaces.py +40 -0
- topos/services/llm/__init__.py +1 -0
- topos/services/llm/base.py +7 -0
- topos/services/llm/openai.py +126 -0
- topos/services/local.py +123 -0
- topos/services/postgres.py +385 -0
- topos/sources/__init__.py +6 -0
- topos/sources/definitions.py +114 -0
- topos/sources/install_service.py +836 -0
- topos/sources/registry.py +263 -0
- topos/sources/runtime_install.py +427 -0
- topos/storage/__init__.py +1 -0
- topos/storage/canonical/__init__.py +18 -0
- topos/storage/canonical/ai_chat/__init__.py +22 -0
- topos/storage/canonical/ai_chat/canonicalizer.py +147 -0
- topos/storage/canonical/ai_chat/mapper.py +168 -0
- topos/storage/canonical/ai_chat/model.py +87 -0
- topos/storage/canonical/ai_chat/tables.py +179 -0
- topos/storage/canonical/canonical_store.py +24 -0
- topos/storage/canonical/conversations_tables.py +1020 -0
- topos/storage/canonical/mapping_store.py +30 -0
- topos/storage/canonical/postgres.py +10 -0
- topos/storage/db/__init__.py +1 -0
- topos/storage/db/client.py +8 -0
- topos/storage/db/migrations/__init__.py +1 -0
- topos/storage/db/migrations/stage9_column_renames.py +78 -0
- topos/storage/db/paths.py +122 -0
- topos/storage/db/postgres.py +240 -0
- topos/storage/db/schema.py +6 -0
- topos/storage/enrichment/__init__.py +1 -0
- topos/storage/enrichment/canonical_enrichment_store.py +7 -0
- topos/storage/enrichment/raw_enrichment_store.py +18 -0
- topos/storage/normalized/__init__.py +1 -0
- topos/storage/normalized/normalized_store.py +24 -0
- topos/storage/oplog/__init__.py +1 -0
- topos/storage/oplog/decision.py +6 -0
- topos/storage/oplog/oplog_store.py +17 -0
- topos/storage/oplog/postgres.py +10 -0
- topos/storage/projections/__init__.py +1 -0
- topos/storage/projections/index_ops_store.py +6 -0
- topos/storage/projections/vector_index_store.py +6 -0
- topos/storage/raw/__init__.py +1 -0
- topos/storage/raw/browser_flat_tables.py +303 -0
- topos/storage/raw/file_store.py +100 -0
- topos/storage/raw/raw_store.py +29 -0
- topos/storage/raw/raw_tables_manager.py +295 -0
- topos/storage/raw/sqlite_raw_store.py +17 -0
- topos/storage/security/encryption.py +21 -0
- topos/storage/signal_identity.py +71 -0
- topos/storage/source_settings.py +116 -0
- topos/storage/user_identity.py +69 -0
- topos/sync/__init__.py +5 -0
- topos/sync/client.py +272 -0
- topos/sync_handlers.py +70 -0
- topos/testing/__init__.py +1 -0
- topos/testing/lifespan.py +7 -0
- topos/uma_contact_enrichment.py +1032 -0
- topos/uma_filters.py +669 -0
- topos/uma_resource_id.py +24 -0
- topos/uma_rpt.py +69 -0
- topos/utils/base_object.py +61 -0
- topos/websocket_client.py +21 -0
- topos_node-0.1.0.dist-info/METADATA +199 -0
- topos_node-0.1.0.dist-info/RECORD +249 -0
- topos_node-0.1.0.dist-info/WHEEL +5 -0
- topos_node-0.1.0.dist-info/entry_points.txt +2 -0
- topos_node-0.1.0.dist-info/licenses/LICENSE +201 -0
- topos_node-0.1.0.dist-info/top_level.txt +2 -0
topos/api/local_mcp.py
ADDED
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
"""Local API for MCP-style tools (no Control Plane). Same auth as engine; for same-device/offline use."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import uuid
|
|
6
|
+
|
|
7
|
+
from fastapi import APIRouter, Body, Depends
|
|
8
|
+
|
|
9
|
+
from ..auth import require_api_key
|
|
10
|
+
from ..core.handlers import handle_control_plane_request
|
|
11
|
+
|
|
12
|
+
router = APIRouter(prefix="/api/local", tags=["local-mcp"])
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def _local_mcp_payload(extra: dict | None = None) -> dict:
|
|
16
|
+
"""Payload for local MCP requests; source=claude_desktop so engine counts per source."""
|
|
17
|
+
p = {"mcp_source": "claude_desktop"}
|
|
18
|
+
if extra:
|
|
19
|
+
p.update(extra)
|
|
20
|
+
return p
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
@router.post("/list_database_tables")
|
|
24
|
+
async def local_list_database_tables(_: None = Depends(require_api_key)) -> dict: # noqa: B008
|
|
25
|
+
"""List tables (same as CP-forwarded tool). Requires Bearer TOPOS_KEY."""
|
|
26
|
+
msg = {"id": str(uuid.uuid4()), "type": "list_database_tables", "payload": _local_mcp_payload()}
|
|
27
|
+
out = await handle_control_plane_request(msg)
|
|
28
|
+
if out.get("status") == "error":
|
|
29
|
+
return {"status": "error", "error": out.get("error", "unknown")}
|
|
30
|
+
return out.get("payload", {})
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
@router.post("/get_table_schema")
|
|
34
|
+
async def local_get_table_schema(
|
|
35
|
+
body: dict = Body(default_factory=dict),
|
|
36
|
+
_: None = Depends(require_api_key), # noqa: B008
|
|
37
|
+
) -> dict:
|
|
38
|
+
"""Get table schema (same as CP-forwarded tool). Body: {"table_name": "..."}. Requires Bearer TOPOS_KEY."""
|
|
39
|
+
table_name = (body.get("table_name") or "").strip()
|
|
40
|
+
if not table_name:
|
|
41
|
+
return {"status": "error", "error": "table_name required"}
|
|
42
|
+
msg = {"id": str(uuid.uuid4()), "type": "get_table_schema", "payload": _local_mcp_payload({"table_name": table_name})}
|
|
43
|
+
out = await handle_control_plane_request(msg)
|
|
44
|
+
if out.get("status") == "error":
|
|
45
|
+
return {"status": "error", "error": out.get("error", "unknown")}
|
|
46
|
+
return out.get("payload", {})
|
|
@@ -0,0 +1,385 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import Any, Dict, List, Optional, Sequence
|
|
4
|
+
|
|
5
|
+
from fastapi import APIRouter, Depends, Query
|
|
6
|
+
|
|
7
|
+
from ..analytics.messenger_communities import (
|
|
8
|
+
MESSENGER_COMMUNITIES_TABLE,
|
|
9
|
+
MESSENGER_PARTICIPANT_IMPORTANCE_TABLE,
|
|
10
|
+
MESSENGER_SOCIAL_EDGES_TABLE,
|
|
11
|
+
compute_and_persist_messenger_analytics,
|
|
12
|
+
ensure_messenger_analytics_tables,
|
|
13
|
+
)
|
|
14
|
+
from ..analytics.messenger_graph import extract_messenger_graph
|
|
15
|
+
from ..analytics.messenger_labels import resolve_participant_labels
|
|
16
|
+
from ..auth import require_api_key
|
|
17
|
+
from ..core.state import get_db_connection
|
|
18
|
+
|
|
19
|
+
router = APIRouter()
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def _normalize_source_filter(
|
|
23
|
+
source_id: Optional[str],
|
|
24
|
+
source_ids: Optional[str],
|
|
25
|
+
) -> List[str]:
|
|
26
|
+
out: List[str] = []
|
|
27
|
+
if source_id and source_id.strip():
|
|
28
|
+
out.append(source_id.strip())
|
|
29
|
+
if source_ids:
|
|
30
|
+
for value in source_ids.split(","):
|
|
31
|
+
if value.strip():
|
|
32
|
+
out.append(value.strip())
|
|
33
|
+
return sorted(set(out))
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def _source_scope(source_filter: Sequence[str]) -> str:
|
|
37
|
+
if not source_filter:
|
|
38
|
+
return "all"
|
|
39
|
+
return ",".join(sorted(set(source_filter)))
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def _rows_to_dicts(rows: Sequence[Any]) -> List[Dict[str, Any]]:
|
|
43
|
+
out: List[Dict[str, Any]] = []
|
|
44
|
+
for row in rows:
|
|
45
|
+
if hasattr(row, "keys"):
|
|
46
|
+
out.append({k: row[k] for k in row.keys()})
|
|
47
|
+
else:
|
|
48
|
+
out.append(dict(row))
|
|
49
|
+
return out
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
@router.get("/messenger-analytics/recompute", dependencies=[Depends(require_api_key)])
|
|
53
|
+
async def recompute_messenger_analytics_get_alias() -> Dict[str, Any]:
|
|
54
|
+
"""Method helper for users accidentally using GET on recompute."""
|
|
55
|
+
return {"status": "error", "error": "Use POST /v1/messenger-analytics/recompute"}
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
@router.post("/messenger-analytics/recompute", dependencies=[Depends(require_api_key)])
|
|
59
|
+
async def recompute_messenger_analytics(
|
|
60
|
+
dataset_id: str = Query(...),
|
|
61
|
+
period_granularity: str = Query("month"),
|
|
62
|
+
source_id: Optional[str] = Query(None),
|
|
63
|
+
source_ids: Optional[str] = Query(None),
|
|
64
|
+
start_ts: Optional[str] = Query(None),
|
|
65
|
+
end_ts: Optional[str] = Query(None),
|
|
66
|
+
cumulative: bool = Query(False),
|
|
67
|
+
) -> Dict[str, Any]:
|
|
68
|
+
conn = get_db_connection()
|
|
69
|
+
if not conn:
|
|
70
|
+
return {"status": "error", "error": "Database not available"}
|
|
71
|
+
source_filter = _normalize_source_filter(source_id, source_ids)
|
|
72
|
+
result = compute_and_persist_messenger_analytics(
|
|
73
|
+
dataset_id=dataset_id,
|
|
74
|
+
conn=conn,
|
|
75
|
+
start_ts=start_ts,
|
|
76
|
+
end_ts=end_ts,
|
|
77
|
+
source_ids=source_filter or None,
|
|
78
|
+
period_granularity=period_granularity,
|
|
79
|
+
cumulative=cumulative,
|
|
80
|
+
)
|
|
81
|
+
return {"status": "ok", **result}
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
def _maybe_compute_if_missing(
|
|
85
|
+
*,
|
|
86
|
+
conn: Any,
|
|
87
|
+
dataset_id: str,
|
|
88
|
+
period_key: str,
|
|
89
|
+
source_filter: Sequence[str],
|
|
90
|
+
ensure_data: bool,
|
|
91
|
+
) -> None:
|
|
92
|
+
if not ensure_data:
|
|
93
|
+
return
|
|
94
|
+
source_scope = _source_scope(source_filter)
|
|
95
|
+
row = conn.execute(
|
|
96
|
+
f"""
|
|
97
|
+
SELECT 1
|
|
98
|
+
FROM {MESSENGER_PARTICIPANT_IMPORTANCE_TABLE}
|
|
99
|
+
WHERE dataset_id = ? AND period_key = ? AND source_scope = ?
|
|
100
|
+
LIMIT 1
|
|
101
|
+
""",
|
|
102
|
+
(dataset_id, period_key, source_scope),
|
|
103
|
+
).fetchone()
|
|
104
|
+
if row:
|
|
105
|
+
return
|
|
106
|
+
compute_and_persist_messenger_analytics(
|
|
107
|
+
dataset_id=dataset_id,
|
|
108
|
+
conn=conn,
|
|
109
|
+
source_ids=source_filter or None,
|
|
110
|
+
period_granularity="month",
|
|
111
|
+
)
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
@router.get("/messenger-analytics/graph", dependencies=[Depends(require_api_key)])
|
|
115
|
+
async def get_messenger_graph(
|
|
116
|
+
dataset_id: str = Query(...),
|
|
117
|
+
period: str = Query(...),
|
|
118
|
+
source_id: Optional[str] = Query(None),
|
|
119
|
+
source_ids: Optional[str] = Query(None),
|
|
120
|
+
ensure_data: bool = Query(True),
|
|
121
|
+
) -> Dict[str, Any]:
|
|
122
|
+
conn = get_db_connection()
|
|
123
|
+
if not conn:
|
|
124
|
+
return {"status": "error", "error": "Database not available"}
|
|
125
|
+
ensure_messenger_analytics_tables(conn)
|
|
126
|
+
source_filter = _normalize_source_filter(source_id, source_ids)
|
|
127
|
+
_maybe_compute_if_missing(
|
|
128
|
+
conn=conn,
|
|
129
|
+
dataset_id=dataset_id,
|
|
130
|
+
period_key=period,
|
|
131
|
+
source_filter=source_filter,
|
|
132
|
+
ensure_data=ensure_data,
|
|
133
|
+
)
|
|
134
|
+
source_scope = _source_scope(source_filter)
|
|
135
|
+
rows = _rows_to_dicts(
|
|
136
|
+
conn.execute(
|
|
137
|
+
f"""
|
|
138
|
+
SELECT source_id, target_id, weight, edge_type, edge_type_counts_json
|
|
139
|
+
FROM {MESSENGER_SOCIAL_EDGES_TABLE}
|
|
140
|
+
WHERE dataset_id = ? AND period_key = ? AND source_scope = ?
|
|
141
|
+
ORDER BY source_id, target_id
|
|
142
|
+
""",
|
|
143
|
+
(dataset_id, period, source_scope),
|
|
144
|
+
).fetchall()
|
|
145
|
+
)
|
|
146
|
+
nodes = _rows_to_dicts(
|
|
147
|
+
conn.execute(
|
|
148
|
+
f"""
|
|
149
|
+
SELECT i.participant_id, i.centrality_degree, c.community_id
|
|
150
|
+
FROM {MESSENGER_PARTICIPANT_IMPORTANCE_TABLE} i
|
|
151
|
+
LEFT JOIN {MESSENGER_COMMUNITIES_TABLE} c
|
|
152
|
+
ON c.dataset_id = i.dataset_id
|
|
153
|
+
AND c.period_key = i.period_key
|
|
154
|
+
AND c.source_scope = i.source_scope
|
|
155
|
+
AND c.participant_id = i.participant_id
|
|
156
|
+
WHERE i.dataset_id = ? AND i.period_key = ? AND i.source_scope = ?
|
|
157
|
+
ORDER BY i.centrality_degree DESC, i.participant_id
|
|
158
|
+
""",
|
|
159
|
+
(dataset_id, period, source_scope),
|
|
160
|
+
).fetchall()
|
|
161
|
+
)
|
|
162
|
+
labels_by_participant = resolve_participant_labels(
|
|
163
|
+
conn,
|
|
164
|
+
dataset_id=dataset_id,
|
|
165
|
+
participant_ids=[str(row["participant_id"]) for row in nodes if row.get("participant_id")],
|
|
166
|
+
)
|
|
167
|
+
graph_nodes = [
|
|
168
|
+
{
|
|
169
|
+
"id": row["participant_id"],
|
|
170
|
+
"label": labels_by_participant.get(str(row["participant_id"]), {}).get("label", row["participant_id"]),
|
|
171
|
+
"display_name": labels_by_participant.get(str(row["participant_id"]), {}).get("display_name"),
|
|
172
|
+
"identifier": labels_by_participant.get(str(row["participant_id"]), {}).get("identifier"),
|
|
173
|
+
"importance": float(row.get("centrality_degree") or 0.0),
|
|
174
|
+
"community_id": row.get("community_id"),
|
|
175
|
+
}
|
|
176
|
+
for row in nodes
|
|
177
|
+
]
|
|
178
|
+
graph_edges = []
|
|
179
|
+
for row in rows:
|
|
180
|
+
counts_raw = row.get("edge_type_counts_json")
|
|
181
|
+
counts = {}
|
|
182
|
+
if counts_raw:
|
|
183
|
+
try:
|
|
184
|
+
import json
|
|
185
|
+
|
|
186
|
+
counts = json.loads(counts_raw)
|
|
187
|
+
except Exception:
|
|
188
|
+
counts = {}
|
|
189
|
+
graph_edges.append(
|
|
190
|
+
{
|
|
191
|
+
"source": row["source_id"],
|
|
192
|
+
"target": row["target_id"],
|
|
193
|
+
"weight": float(row.get("weight") or 0.0),
|
|
194
|
+
"edge_type": row.get("edge_type"),
|
|
195
|
+
"edge_type_counts": counts,
|
|
196
|
+
}
|
|
197
|
+
)
|
|
198
|
+
return {
|
|
199
|
+
"status": "ok",
|
|
200
|
+
"dataset_id": dataset_id,
|
|
201
|
+
"period": period,
|
|
202
|
+
"source_scope": source_scope,
|
|
203
|
+
"nodes": graph_nodes,
|
|
204
|
+
"edges": graph_edges,
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
|
|
208
|
+
@router.get("/messenger-analytics/importance", dependencies=[Depends(require_api_key)])
|
|
209
|
+
async def get_messenger_importance(
|
|
210
|
+
dataset_id: str = Query(...),
|
|
211
|
+
period: str = Query(...),
|
|
212
|
+
source_id: Optional[str] = Query(None),
|
|
213
|
+
source_ids: Optional[str] = Query(None),
|
|
214
|
+
ensure_data: bool = Query(True),
|
|
215
|
+
) -> Dict[str, Any]:
|
|
216
|
+
conn = get_db_connection()
|
|
217
|
+
if not conn:
|
|
218
|
+
return {"status": "error", "error": "Database not available"}
|
|
219
|
+
ensure_messenger_analytics_tables(conn)
|
|
220
|
+
source_filter = _normalize_source_filter(source_id, source_ids)
|
|
221
|
+
_maybe_compute_if_missing(
|
|
222
|
+
conn=conn,
|
|
223
|
+
dataset_id=dataset_id,
|
|
224
|
+
period_key=period,
|
|
225
|
+
source_filter=source_filter,
|
|
226
|
+
ensure_data=ensure_data,
|
|
227
|
+
)
|
|
228
|
+
source_scope = _source_scope(source_filter)
|
|
229
|
+
rows = _rows_to_dicts(
|
|
230
|
+
conn.execute(
|
|
231
|
+
f"""
|
|
232
|
+
SELECT participant_id, centrality_degree, centrality_betweenness
|
|
233
|
+
FROM {MESSENGER_PARTICIPANT_IMPORTANCE_TABLE}
|
|
234
|
+
WHERE dataset_id = ? AND period_key = ? AND source_scope = ?
|
|
235
|
+
ORDER BY centrality_degree DESC, centrality_betweenness DESC
|
|
236
|
+
""",
|
|
237
|
+
(dataset_id, period, source_scope),
|
|
238
|
+
).fetchall()
|
|
239
|
+
)
|
|
240
|
+
labels_by_participant = resolve_participant_labels(
|
|
241
|
+
conn,
|
|
242
|
+
dataset_id=dataset_id,
|
|
243
|
+
participant_ids=[str(row["participant_id"]) for row in rows if row.get("participant_id")],
|
|
244
|
+
)
|
|
245
|
+
importance = []
|
|
246
|
+
for row in rows:
|
|
247
|
+
participant_id = str(row["participant_id"])
|
|
248
|
+
labels = labels_by_participant.get(participant_id, {})
|
|
249
|
+
importance.append(
|
|
250
|
+
{
|
|
251
|
+
**row,
|
|
252
|
+
"participant_label": labels.get("label", participant_id),
|
|
253
|
+
"participant_display_name": labels.get("display_name"),
|
|
254
|
+
"participant_identifier": labels.get("identifier"),
|
|
255
|
+
}
|
|
256
|
+
)
|
|
257
|
+
return {
|
|
258
|
+
"status": "ok",
|
|
259
|
+
"dataset_id": dataset_id,
|
|
260
|
+
"period": period,
|
|
261
|
+
"source_scope": source_scope,
|
|
262
|
+
"importance": importance,
|
|
263
|
+
}
|
|
264
|
+
|
|
265
|
+
|
|
266
|
+
@router.get("/messenger-analytics/communities", dependencies=[Depends(require_api_key)])
|
|
267
|
+
async def get_messenger_communities(
|
|
268
|
+
dataset_id: str = Query(...),
|
|
269
|
+
period: str = Query(...),
|
|
270
|
+
source_id: Optional[str] = Query(None),
|
|
271
|
+
source_ids: Optional[str] = Query(None),
|
|
272
|
+
ensure_data: bool = Query(True),
|
|
273
|
+
) -> Dict[str, Any]:
|
|
274
|
+
conn = get_db_connection()
|
|
275
|
+
if not conn:
|
|
276
|
+
return {"status": "error", "error": "Database not available"}
|
|
277
|
+
ensure_messenger_analytics_tables(conn)
|
|
278
|
+
source_filter = _normalize_source_filter(source_id, source_ids)
|
|
279
|
+
_maybe_compute_if_missing(
|
|
280
|
+
conn=conn,
|
|
281
|
+
dataset_id=dataset_id,
|
|
282
|
+
period_key=period,
|
|
283
|
+
source_filter=source_filter,
|
|
284
|
+
ensure_data=ensure_data,
|
|
285
|
+
)
|
|
286
|
+
source_scope = _source_scope(source_filter)
|
|
287
|
+
rows = _rows_to_dicts(
|
|
288
|
+
conn.execute(
|
|
289
|
+
f"""
|
|
290
|
+
SELECT participant_id, community_id
|
|
291
|
+
FROM {MESSENGER_COMMUNITIES_TABLE}
|
|
292
|
+
WHERE dataset_id = ? AND period_key = ? AND source_scope = ?
|
|
293
|
+
ORDER BY community_id, participant_id
|
|
294
|
+
""",
|
|
295
|
+
(dataset_id, period, source_scope),
|
|
296
|
+
).fetchall()
|
|
297
|
+
)
|
|
298
|
+
participant_ids = [str(row["participant_id"]) for row in rows if row.get("participant_id")]
|
|
299
|
+
labels_by_participant = resolve_participant_labels(
|
|
300
|
+
conn,
|
|
301
|
+
dataset_id=dataset_id,
|
|
302
|
+
participant_ids=participant_ids,
|
|
303
|
+
)
|
|
304
|
+
grouped: Dict[int, List[str]] = {}
|
|
305
|
+
for row in rows:
|
|
306
|
+
community_id = int(row["community_id"])
|
|
307
|
+
grouped.setdefault(community_id, []).append(row["participant_id"])
|
|
308
|
+
communities = [
|
|
309
|
+
{
|
|
310
|
+
"community_id": cid,
|
|
311
|
+
"participants": participants,
|
|
312
|
+
"participants_labeled": [
|
|
313
|
+
{
|
|
314
|
+
"id": pid,
|
|
315
|
+
"label": labels_by_participant.get(str(pid), {}).get("label", pid),
|
|
316
|
+
}
|
|
317
|
+
for pid in participants
|
|
318
|
+
],
|
|
319
|
+
}
|
|
320
|
+
for cid, participants in sorted(grouped.items(), key=lambda item: item[0])
|
|
321
|
+
]
|
|
322
|
+
return {
|
|
323
|
+
"status": "ok",
|
|
324
|
+
"dataset_id": dataset_id,
|
|
325
|
+
"period": period,
|
|
326
|
+
"source_scope": source_scope,
|
|
327
|
+
"communities": communities,
|
|
328
|
+
}
|
|
329
|
+
|
|
330
|
+
|
|
331
|
+
@router.get("/messenger-analytics/periods", dependencies=[Depends(require_api_key)])
|
|
332
|
+
async def get_messenger_periods(
|
|
333
|
+
dataset_id: str = Query(...),
|
|
334
|
+
source_id: Optional[str] = Query(None),
|
|
335
|
+
source_ids: Optional[str] = Query(None),
|
|
336
|
+
include_empty: bool = Query(False),
|
|
337
|
+
) -> Dict[str, Any]:
|
|
338
|
+
conn = get_db_connection()
|
|
339
|
+
if not conn:
|
|
340
|
+
return {"status": "error", "error": "Database not available"}
|
|
341
|
+
ensure_messenger_analytics_tables(conn)
|
|
342
|
+
source_filter = _normalize_source_filter(source_id, source_ids)
|
|
343
|
+
source_scope = _source_scope(source_filter)
|
|
344
|
+
rows = _rows_to_dicts(
|
|
345
|
+
conn.execute(
|
|
346
|
+
f"""
|
|
347
|
+
SELECT DISTINCT period_key
|
|
348
|
+
FROM {MESSENGER_PARTICIPANT_IMPORTANCE_TABLE}
|
|
349
|
+
WHERE dataset_id = ? AND source_scope = ?
|
|
350
|
+
ORDER BY period_key
|
|
351
|
+
""",
|
|
352
|
+
(dataset_id, source_scope),
|
|
353
|
+
).fetchall()
|
|
354
|
+
)
|
|
355
|
+
periods = [row["period_key"] for row in rows]
|
|
356
|
+
if include_empty or periods:
|
|
357
|
+
return {"status": "ok", "dataset_id": dataset_id, "source_scope": source_scope, "periods": periods}
|
|
358
|
+
|
|
359
|
+
extraction = extract_messenger_graph(
|
|
360
|
+
dataset_id=dataset_id,
|
|
361
|
+
conn=conn,
|
|
362
|
+
source_ids=source_filter or None,
|
|
363
|
+
period_granularity="month",
|
|
364
|
+
)
|
|
365
|
+
fallback_periods = [p["period_key"] for p in extraction.get("periods", [])]
|
|
366
|
+
return {"status": "ok", "dataset_id": dataset_id, "source_scope": source_scope, "periods": fallback_periods}
|
|
367
|
+
|
|
368
|
+
|
|
369
|
+
@router.get("/messenger-analytics/sources", dependencies=[Depends(require_api_key)])
|
|
370
|
+
async def get_messenger_sources(dataset_id: str = Query(...)) -> Dict[str, Any]:
|
|
371
|
+
conn = get_db_connection()
|
|
372
|
+
if not conn:
|
|
373
|
+
return {"status": "error", "error": "Database not available"}
|
|
374
|
+
rows = _rows_to_dicts(
|
|
375
|
+
conn.execute(
|
|
376
|
+
"""
|
|
377
|
+
SELECT DISTINCT source_id
|
|
378
|
+
FROM conversation_messages
|
|
379
|
+
WHERE dataset_id = ?
|
|
380
|
+
ORDER BY source_id
|
|
381
|
+
""",
|
|
382
|
+
(dataset_id,),
|
|
383
|
+
).fetchall()
|
|
384
|
+
)
|
|
385
|
+
return {"status": "ok", "dataset_id": dataset_id, "sources": [r["source_id"] for r in rows]}
|
topos/api/query_api.py
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from fastapi import APIRouter
|
|
4
|
+
|
|
5
|
+
from ..analytics.duckdb_adapter import DuckDBAdapter
|
|
6
|
+
|
|
7
|
+
router = APIRouter()
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
@router.post("/query")
|
|
11
|
+
async def run_query() -> dict:
|
|
12
|
+
_ = DuckDBAdapter
|
|
13
|
+
return {"status": "stub"}
|
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
"""Read/update device-local Ollama sanitization overrides (engine_config JSON)."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import logging
|
|
6
|
+
from typing import Any
|
|
7
|
+
|
|
8
|
+
from fastapi import APIRouter, Body, Depends, HTTPException
|
|
9
|
+
|
|
10
|
+
from ..auth import require_api_key
|
|
11
|
+
from ..config.sanitization_ollama import (
|
|
12
|
+
ENGINE_CONFIG_KEY_SANITIZATION_OLLAMA_DEVICE,
|
|
13
|
+
effective_config_for_api,
|
|
14
|
+
normalize_put_device_overrides,
|
|
15
|
+
)
|
|
16
|
+
from ..config.settings import settings
|
|
17
|
+
from ..core.state import get_db_connection, set_engine_config_value
|
|
18
|
+
|
|
19
|
+
logger = logging.getLogger("topos.api.sanitization_ollama_config")
|
|
20
|
+
|
|
21
|
+
router = APIRouter(tags=["sanitization-ollama"])
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
@router.get("/v1/sanitization-ollama-config", dependencies=[Depends(require_api_key)])
|
|
25
|
+
async def get_sanitization_ollama_config() -> dict[str, Any]:
|
|
26
|
+
conn = get_db_connection()
|
|
27
|
+
if not conn:
|
|
28
|
+
raise HTTPException(status_code=503, detail="Database not available")
|
|
29
|
+
try:
|
|
30
|
+
return {"status": "ok", **effective_config_for_api(settings, conn)}
|
|
31
|
+
except Exception as exc: # noqa: BLE001
|
|
32
|
+
logger.warning("get_sanitization_ollama_config failed: %s", exc)
|
|
33
|
+
raise HTTPException(status_code=500, detail="Failed to read config") from exc
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
@router.put("/v1/sanitization-ollama-config", dependencies=[Depends(require_api_key)])
|
|
37
|
+
async def put_sanitization_ollama_config(body: dict[str, Any] = Body(default=None)) -> dict[str, Any]:
|
|
38
|
+
conn = get_db_connection()
|
|
39
|
+
if not conn:
|
|
40
|
+
raise HTTPException(status_code=503, detail="Database not available")
|
|
41
|
+
try:
|
|
42
|
+
payload = body or {}
|
|
43
|
+
json_str = normalize_put_device_overrides(payload)
|
|
44
|
+
set_engine_config_value(conn, ENGINE_CONFIG_KEY_SANITIZATION_OLLAMA_DEVICE, json_str)
|
|
45
|
+
return {"status": "ok", **effective_config_for_api(settings, conn)}
|
|
46
|
+
except ValueError as exc:
|
|
47
|
+
raise HTTPException(status_code=400, detail=str(exc)) from exc
|
|
48
|
+
except Exception as exc: # noqa: BLE001
|
|
49
|
+
logger.warning("put_sanitization_ollama_config failed: %s", exc)
|
|
50
|
+
raise HTTPException(status_code=500, detail="Failed to save config") from exc
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
@router.delete("/v1/sanitization-ollama-config", dependencies=[Depends(require_api_key)])
|
|
54
|
+
async def delete_sanitization_ollama_device_overrides() -> dict[str, Any]:
|
|
55
|
+
"""Clear device overrides; effective config reverts to Settings / env only."""
|
|
56
|
+
conn = get_db_connection()
|
|
57
|
+
if not conn:
|
|
58
|
+
raise HTTPException(status_code=503, detail="Database not available")
|
|
59
|
+
try:
|
|
60
|
+
set_engine_config_value(conn, ENGINE_CONFIG_KEY_SANITIZATION_OLLAMA_DEVICE, "{}")
|
|
61
|
+
return {"status": "ok", **effective_config_for_api(settings, conn)}
|
|
62
|
+
except Exception as exc: # noqa: BLE001
|
|
63
|
+
logger.warning("delete sanitization_ollama overrides failed: %s", exc)
|
|
64
|
+
raise HTTPException(status_code=500, detail="Failed to clear config") from exc
|