topos-node 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- shared/__init__.py +59 -0
- shared/filtering.py +640 -0
- shared/schema_registry.py +229 -0
- topos/__init__.py +5 -0
- topos/__version__.py +6 -0
- topos/analytics/__init__.py +15 -0
- topos/analytics/duckdb_adapter.py +48 -0
- topos/analytics/messenger_communities.py +349 -0
- topos/analytics/messenger_graph.py +522 -0
- topos/analytics/messenger_labels.py +321 -0
- topos/analytics/profiles.py +22 -0
- topos/analytics/query_engine.py +64 -0
- topos/analytics/raw_queries.py +174 -0
- topos/api/__init__.py +1 -0
- topos/api/analytics.py +52 -0
- topos/api/app_registry.py +31 -0
- topos/api/backup.py +15 -0
- topos/api/compute_remote.py +175 -0
- topos/api/data_commit.py +158 -0
- topos/api/data_explorer_table_prefs.py +81 -0
- topos/api/db.py +10 -0
- topos/api/device.py +25 -0
- topos/api/enrichment.py +959 -0
- topos/api/filter_lab.py +195 -0
- topos/api/health.py +61 -0
- topos/api/ingestion_api.py +37 -0
- topos/api/ingestion_compat.py +21 -0
- topos/api/ingestion_sources.py +600 -0
- topos/api/llm.py +76 -0
- topos/api/local_mcp.py +46 -0
- topos/api/messenger_analytics.py +385 -0
- topos/api/query_api.py +13 -0
- topos/api/sanitization_ollama_config.py +64 -0
- topos/api/source_install.py +324 -0
- topos/api/sources.py +13 -0
- topos/api/sync.py +10 -0
- topos/api/ui_config.py +83 -0
- topos/api/uma_data.py +311 -0
- topos/api/usage.py +49 -0
- topos/api/user_identity.py +46 -0
- topos/app.py +239 -0
- topos/auth.py +17 -0
- topos/canonicalization/__init__.py +1 -0
- topos/canonicalization/mappers/__init__.py +22 -0
- topos/canonicalization/mappers/base.py +26 -0
- topos/canonicalization/mappers/chatgpt_mapper.py +40 -0
- topos/canonicalization/mappers/grok_mapper.py +17 -0
- topos/canonicalization/mappers/messenger_mapper.py +58 -0
- topos/canonicalization/models.py +31 -0
- topos/canonicalization/resolver.py +23 -0
- topos/cli/__init__.py +1 -0
- topos/cli/__main__.py +6 -0
- topos/cli/commands.py +132 -0
- topos/config/__init__.py +1 -0
- topos/config/sanitization_ollama.py +189 -0
- topos/config/settings.py +310 -0
- topos/contacts/__init__.py +5 -0
- topos/contacts/identity.py +24 -0
- topos/control_plane_client.py +300 -0
- topos/core/__init__.py +1 -0
- topos/core/api_models.py +128 -0
- topos/core/connection_resilience.py +99 -0
- topos/core/device_helpers.py +8 -0
- topos/core/errors.py +13 -0
- topos/core/events.py +12 -0
- topos/core/handlers.py +5625 -0
- topos/core/logging.py +175 -0
- topos/core/metrics.py +21 -0
- topos/core/startup_banner.py +62 -0
- topos/core/state.py +682 -0
- topos/core/table_layers.py +45 -0
- topos/core/types.py +13 -0
- topos/data_explorer_table_prefs.py +150 -0
- topos/engine/__init__.py +29 -0
- topos/engine/backends/__init__.py +50 -0
- topos/engine/backends/base.py +21 -0
- topos/engine/backends/huggingface.py +151 -0
- topos/engine/backends/ollama.py +181 -0
- topos/engine/backends/stub.py +22 -0
- topos/engine/engine.py +165 -0
- topos/engine/intake.py +32 -0
- topos/engine/queue_manager.py +112 -0
- topos/engine/registration.py +126 -0
- topos/engine/result_formatter.py +38 -0
- topos/engine/router.py +19 -0
- topos/engine/scoped_token.py +82 -0
- topos/engine/tasks.py +154 -0
- topos/engine/transport.py +44 -0
- topos/engine/usage_guard.py +100 -0
- topos/engine/usage_observation.py +129 -0
- topos/engine/validator.py +23 -0
- topos/enrichment/__init__.py +1 -0
- topos/enrichment/derived_tables.py +214 -0
- topos/enrichment/jobs/__init__.py +30 -0
- topos/enrichment/jobs/base.py +54 -0
- topos/enrichment/jobs/canonical/__init__.py +1 -0
- topos/enrichment/jobs/canonical/embeddings_job.py +27 -0
- topos/enrichment/jobs/canonical/emo_27_job.py +97 -0
- topos/enrichment/jobs/canonical/entities_job.py +27 -0
- topos/enrichment/jobs/canonical/sentiment_job.py +27 -0
- topos/enrichment/jobs/canonical/topics_job.py +27 -0
- topos/enrichment/jobs/raw/__init__.py +1 -0
- topos/enrichment/jobs/raw/attachments_job.py +12 -0
- topos/enrichment/jobs/raw/language_job.py +12 -0
- topos/enrichment/jobs/raw/time_normalization_job.py +12 -0
- topos/enrichment/jobs/raw/tool_calls_job.py +12 -0
- topos/enrichment/models/__init__.py +1 -0
- topos/enrichment/models/manager.py +8 -0
- topos/enrichment/models/registry.py +71 -0
- topos/enrichment/models/versioning.py +8 -0
- topos/enrichment/orchestrator.py +177 -0
- topos/enrichment/processor.py +17 -0
- topos/enrichment/progress_bar.py +122 -0
- topos/enrichment/website_classifier.py +31 -0
- topos/filter_lab/__init__.py +1 -0
- topos/filter_lab/bundles.py +300 -0
- topos/filter_lab/schema.py +86 -0
- topos/filter_lab/service.py +167 -0
- topos/filter_lab/store.py +374 -0
- topos/filter_lab/worker.py +250 -0
- topos/hosted_pool_lease.py +153 -0
- topos/ingestion/__init__.py +1 -0
- topos/ingestion/checkpoints/__init__.py +6 -0
- topos/ingestion/checkpoints/checkpoint_store.py +24 -0
- topos/ingestion/checkpoints/sqlite_checkpoint_store.py +82 -0
- topos/ingestion/ingest_helpers.py +504 -0
- topos/ingestion/jobs.py +91 -0
- topos/ingestion/local_sync.py +823 -0
- topos/ingestion/log_preview.py +21 -0
- topos/ingestion/manager.py +1100 -0
- topos/ingestion/parser.py +174 -0
- topos/ingestion/parsers/__init__.py +32 -0
- topos/ingestion/parsers/base.py +24 -0
- topos/ingestion/parsers/browser_parser.py +171 -0
- topos/ingestion/parsers/calendar_parser.py +21 -0
- topos/ingestion/parsers/chatgpt_conversation_flattener.py +266 -0
- topos/ingestion/parsers/chatgpt_parser.py +67 -0
- topos/ingestion/parsers/grok_parser.py +21 -0
- topos/ingestion/parsers/messenger_parser.py +97 -0
- topos/ingestion/progress.py +54 -0
- topos/ingestion/sources/__init__.py +20 -0
- topos/ingestion/sources/base.py +39 -0
- topos/ingestion/sources/calendar.py +29 -0
- topos/ingestion/sources/chatgpt.py +29 -0
- topos/ingestion/sources/contact_importers.py +274 -0
- topos/ingestion/sources/grok.py +29 -0
- topos/ingestion/sources/imessage_reader.py +479 -0
- topos/ingestion/sources/signal_export_parser.py +132 -0
- topos/ingestion/sources/signal_reader.py +491 -0
- topos/ingestion/state_machine.py +70 -0
- topos/ingestion/triggers/__init__.py +1 -0
- topos/ingestion/triggers/file_trigger.py +36 -0
- topos/ingestion/triggers/sqlite_trigger.py +18 -0
- topos/ingestion/validation/__init__.py +1 -0
- topos/ingestion/validation/base.py +27 -0
- topos/ingestion/validation/schema_registry.py +111 -0
- topos/ingestion/validation/schema_validator.py +13 -0
- topos/lineage/__init__.py +1 -0
- topos/lineage/provenance.py +9 -0
- topos/lineage/tracker.py +9 -0
- topos/mcp_stdio_proxy.py +83 -0
- topos/observability/__init__.py +1 -0
- topos/observability/alerts.py +7 -0
- topos/observability/metrics.py +25 -0
- topos/observability/tracing.py +18 -0
- topos/openai_client.py +69 -0
- topos/projections/__init__.py +1 -0
- topos/projections/vector_index/__init__.py +1 -0
- topos/projections/vector_index/base.py +21 -0
- topos/projections/vector_index/builders.py +11 -0
- topos/projections/vector_index/health_checks.py +5 -0
- topos/rate_limit.py +43 -0
- topos/sanitization/__init__.py +16 -0
- topos/sanitization/ollama_transforms.py +276 -0
- topos/scope_resolution.py +89 -0
- topos/services/__init__.py +1 -0
- topos/services/container.py +46 -0
- topos/services/embeddings/__init__.py +1 -0
- topos/services/embeddings/base.py +7 -0
- topos/services/embeddings/local.py +9 -0
- topos/services/embeddings/remote.py +9 -0
- topos/services/interfaces.py +40 -0
- topos/services/llm/__init__.py +1 -0
- topos/services/llm/base.py +7 -0
- topos/services/llm/openai.py +126 -0
- topos/services/local.py +123 -0
- topos/services/postgres.py +385 -0
- topos/sources/__init__.py +6 -0
- topos/sources/definitions.py +114 -0
- topos/sources/install_service.py +836 -0
- topos/sources/registry.py +263 -0
- topos/sources/runtime_install.py +427 -0
- topos/storage/__init__.py +1 -0
- topos/storage/canonical/__init__.py +18 -0
- topos/storage/canonical/ai_chat/__init__.py +22 -0
- topos/storage/canonical/ai_chat/canonicalizer.py +147 -0
- topos/storage/canonical/ai_chat/mapper.py +168 -0
- topos/storage/canonical/ai_chat/model.py +87 -0
- topos/storage/canonical/ai_chat/tables.py +179 -0
- topos/storage/canonical/canonical_store.py +24 -0
- topos/storage/canonical/conversations_tables.py +1020 -0
- topos/storage/canonical/mapping_store.py +30 -0
- topos/storage/canonical/postgres.py +10 -0
- topos/storage/db/__init__.py +1 -0
- topos/storage/db/client.py +8 -0
- topos/storage/db/migrations/__init__.py +1 -0
- topos/storage/db/migrations/stage9_column_renames.py +78 -0
- topos/storage/db/paths.py +122 -0
- topos/storage/db/postgres.py +240 -0
- topos/storage/db/schema.py +6 -0
- topos/storage/enrichment/__init__.py +1 -0
- topos/storage/enrichment/canonical_enrichment_store.py +7 -0
- topos/storage/enrichment/raw_enrichment_store.py +18 -0
- topos/storage/normalized/__init__.py +1 -0
- topos/storage/normalized/normalized_store.py +24 -0
- topos/storage/oplog/__init__.py +1 -0
- topos/storage/oplog/decision.py +6 -0
- topos/storage/oplog/oplog_store.py +17 -0
- topos/storage/oplog/postgres.py +10 -0
- topos/storage/projections/__init__.py +1 -0
- topos/storage/projections/index_ops_store.py +6 -0
- topos/storage/projections/vector_index_store.py +6 -0
- topos/storage/raw/__init__.py +1 -0
- topos/storage/raw/browser_flat_tables.py +303 -0
- topos/storage/raw/file_store.py +100 -0
- topos/storage/raw/raw_store.py +29 -0
- topos/storage/raw/raw_tables_manager.py +295 -0
- topos/storage/raw/sqlite_raw_store.py +17 -0
- topos/storage/security/encryption.py +21 -0
- topos/storage/signal_identity.py +71 -0
- topos/storage/source_settings.py +116 -0
- topos/storage/user_identity.py +69 -0
- topos/sync/__init__.py +5 -0
- topos/sync/client.py +272 -0
- topos/sync_handlers.py +70 -0
- topos/testing/__init__.py +1 -0
- topos/testing/lifespan.py +7 -0
- topos/uma_contact_enrichment.py +1032 -0
- topos/uma_filters.py +669 -0
- topos/uma_resource_id.py +24 -0
- topos/uma_rpt.py +69 -0
- topos/utils/base_object.py +61 -0
- topos/websocket_client.py +21 -0
- topos_node-0.1.0.dist-info/METADATA +199 -0
- topos_node-0.1.0.dist-info/RECORD +249 -0
- topos_node-0.1.0.dist-info/WHEEL +5 -0
- topos_node-0.1.0.dist-info/entry_points.txt +2 -0
- topos_node-0.1.0.dist-info/licenses/LICENSE +201 -0
- topos_node-0.1.0.dist-info/top_level.txt +2 -0
|
@@ -0,0 +1,111 @@
|
|
|
1
|
+
"""Minimal schema registry for ingestion validation."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import logging
|
|
6
|
+
from typing import Any, Dict, Optional
|
|
7
|
+
|
|
8
|
+
logger = logging.getLogger("topos.ingestion.schema_registry")
|
|
9
|
+
|
|
10
|
+
SCHEMAS: Dict[str, Dict[str, Any]] = {
|
|
11
|
+
"chatgpt.conversation.v1": {
|
|
12
|
+
"name": "ChatGPT Conversation Export",
|
|
13
|
+
"version": "1",
|
|
14
|
+
"required_fields": ["id", "thread_id", "role", "content", "created_at"],
|
|
15
|
+
"field_types": {
|
|
16
|
+
"id": str,
|
|
17
|
+
"thread_id": str,
|
|
18
|
+
"role": str,
|
|
19
|
+
"content": str,
|
|
20
|
+
"created_at": (int, float, str),
|
|
21
|
+
},
|
|
22
|
+
"description": "ChatGPT conversation export format (JSONL - flat records)",
|
|
23
|
+
"file_format": "jsonl",
|
|
24
|
+
},
|
|
25
|
+
"chatgpt.conversation.v2": {
|
|
26
|
+
"name": "ChatGPT Real Export Format",
|
|
27
|
+
"version": "2",
|
|
28
|
+
"required_fields": ["id", "thread_id", "role", "content", "created_at"],
|
|
29
|
+
"field_types": {
|
|
30
|
+
"id": str,
|
|
31
|
+
"thread_id": str,
|
|
32
|
+
"role": str,
|
|
33
|
+
"content": str,
|
|
34
|
+
"created_at": (int, float, str),
|
|
35
|
+
},
|
|
36
|
+
"description": "ChatGPT real export format (JSON array of conversation objects, flattened to v1 format)",
|
|
37
|
+
"file_format": "json",
|
|
38
|
+
"source_structure": "conversation_array",
|
|
39
|
+
"note": "Flattened records match v1 format, but source is nested conversation objects",
|
|
40
|
+
},
|
|
41
|
+
# Sprint 02: Messenger ingestion (same logical shape as chat for conversation_messages)
|
|
42
|
+
"imessage.messages.v1": {
|
|
43
|
+
"name": "iMessage Messages",
|
|
44
|
+
"version": "1",
|
|
45
|
+
"required_fields": ["id", "thread_id", "role", "content", "created_at"],
|
|
46
|
+
"field_types": {
|
|
47
|
+
"id": str,
|
|
48
|
+
"thread_id": str,
|
|
49
|
+
"role": str,
|
|
50
|
+
"content": str,
|
|
51
|
+
"created_at": (int, float, str),
|
|
52
|
+
},
|
|
53
|
+
"description": "iMessage message format (normalized from chat.db or sync); id may be imessage:ROWID",
|
|
54
|
+
"file_format": "jsonl",
|
|
55
|
+
},
|
|
56
|
+
"signal.messages.v1": {
|
|
57
|
+
"name": "Signal Messages",
|
|
58
|
+
"version": "1",
|
|
59
|
+
"required_fields": ["id", "thread_id", "role", "content", "created_at"],
|
|
60
|
+
"field_types": {
|
|
61
|
+
"id": str,
|
|
62
|
+
"thread_id": str,
|
|
63
|
+
"role": str,
|
|
64
|
+
"content": str,
|
|
65
|
+
"created_at": (int, float, str),
|
|
66
|
+
},
|
|
67
|
+
"description": "Signal Desktop message format (normalized from SQLCipher or export); id may be signal:uuid",
|
|
68
|
+
"file_format": "jsonl",
|
|
69
|
+
},
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
def get_schema_definition(schema_id: str) -> Optional[Dict[str, Any]]:
|
|
74
|
+
return SCHEMAS.get(schema_id)
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
def validate_schema(record: Dict[str, Any], schema_id: str) -> tuple[bool, Optional[str]]:
|
|
78
|
+
schema = get_schema_definition(schema_id)
|
|
79
|
+
if not schema:
|
|
80
|
+
return False, f"Unknown schema: {schema_id}"
|
|
81
|
+
|
|
82
|
+
required_fields = schema.get("required_fields", [])
|
|
83
|
+
for field in required_fields:
|
|
84
|
+
if field not in record:
|
|
85
|
+
return False, f"Missing required field: {field}"
|
|
86
|
+
|
|
87
|
+
field_types = schema.get("field_types", {})
|
|
88
|
+
for field, expected_type in field_types.items():
|
|
89
|
+
if field not in record:
|
|
90
|
+
continue
|
|
91
|
+
value = record[field]
|
|
92
|
+
if expected_type is None:
|
|
93
|
+
continue
|
|
94
|
+
if isinstance(expected_type, tuple):
|
|
95
|
+
if not any(isinstance(value, t) for t in expected_type):
|
|
96
|
+
return False, (
|
|
97
|
+
f"Field '{field}' has invalid type. "
|
|
98
|
+
f"Expected one of {expected_type}, got {type(value).__name__}"
|
|
99
|
+
)
|
|
100
|
+
elif not isinstance(value, expected_type):
|
|
101
|
+
return False, (
|
|
102
|
+
f"Field '{field}' has invalid type. "
|
|
103
|
+
f"Expected {expected_type.__name__}, got {type(value).__name__}"
|
|
104
|
+
)
|
|
105
|
+
|
|
106
|
+
return True, None
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
def register_schema(schema_id: str, schema_def: Dict[str, Any]) -> None:
|
|
110
|
+
SCHEMAS[schema_id] = schema_def
|
|
111
|
+
logger.info("Registered schema: %s", schema_id)
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
"""Default schema validator (no-op placeholder)."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from typing import Any, Dict, Optional
|
|
6
|
+
|
|
7
|
+
from .base import SchemaDefinition, SchemaValidator, ValidationResult
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class NoOpSchemaValidator(SchemaValidator):
|
|
11
|
+
def validate(self, record: Dict[str, Any], schema: Optional[SchemaDefinition] = None) -> ValidationResult:
|
|
12
|
+
_ = (record, schema)
|
|
13
|
+
return ValidationResult(is_valid=True, errors=[], metadata={})
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Lineage and provenance tracking for Topos."""
|
topos/lineage/tracker.py
ADDED
topos/mcp_stdio_proxy.py
ADDED
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Stdio MCP proxy: Claude Desktop → local Topos engine (no Control Plane).
|
|
3
|
+
|
|
4
|
+
Run this so Claude talks MCP over stdio to the proxy; the proxy forwards tool calls
|
|
5
|
+
to the engine's /api/local/* HTTP endpoints. Use when the engine and Claude run on
|
|
6
|
+
the same machine.
|
|
7
|
+
|
|
8
|
+
Usage:
|
|
9
|
+
ENGINE_URL=http://localhost:9000 BEARER_TOKEN=your_key python -m topos.mcp_stdio_proxy
|
|
10
|
+
# or with args:
|
|
11
|
+
python -m topos.mcp_stdio_proxy --url http://localhost:9000
|
|
12
|
+
|
|
13
|
+
Claude Desktop config (direct to local engine): use scripts/run_local_mcp_proxy.sh
|
|
14
|
+
with full path, args ["--url", "http://localhost:9000"], and env BEARER_TOKEN.
|
|
15
|
+
|
|
16
|
+
Only list_database_tables and get_table_schema are exposed (engine's /api/local/*).
|
|
17
|
+
For full tools (get_analytics, get_messages, get_oplog) use the Control Plane.
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
from __future__ import annotations
|
|
21
|
+
|
|
22
|
+
import argparse
|
|
23
|
+
import os
|
|
24
|
+
import sys
|
|
25
|
+
|
|
26
|
+
import httpx
|
|
27
|
+
from mcp.server.fastmcp import FastMCP
|
|
28
|
+
|
|
29
|
+
# Engine URL and token; set in main() before FastMCP runs.
|
|
30
|
+
_engine_url: str = ""
|
|
31
|
+
_bearer_token: str = ""
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def _headers() -> dict[str, str]:
|
|
35
|
+
return {"Authorization": f"Bearer {_bearer_token}", "Content-Type": "application/json"}
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
async def _call_engine(path: str, json_body: dict | None = None) -> dict:
|
|
39
|
+
url = f"{_engine_url.rstrip('/')}{path}"
|
|
40
|
+
async with httpx.AsyncClient(timeout=30.0) as client:
|
|
41
|
+
r = await client.post(url, headers=_headers(), json=json_body or {})
|
|
42
|
+
r.raise_for_status()
|
|
43
|
+
data = r.json()
|
|
44
|
+
if isinstance(data, dict) and data.get("status") == "error":
|
|
45
|
+
raise ValueError(data.get("error", "engine error"))
|
|
46
|
+
return data
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def main() -> int:
|
|
50
|
+
global _engine_url, _bearer_token
|
|
51
|
+
parser = argparse.ArgumentParser(
|
|
52
|
+
description="MCP stdio proxy to local Topos engine (/api/local/*). No Control Plane."
|
|
53
|
+
)
|
|
54
|
+
parser.add_argument(
|
|
55
|
+
"--url",
|
|
56
|
+
default=os.environ.get("ENGINE_URL", "http://localhost:9000"),
|
|
57
|
+
help="Engine base URL (default: ENGINE_URL or http://localhost:9000)",
|
|
58
|
+
)
|
|
59
|
+
args = parser.parse_args()
|
|
60
|
+
_engine_url = args.url.rstrip("/")
|
|
61
|
+
_bearer_token = (os.environ.get("BEARER_TOKEN") or "").strip()
|
|
62
|
+
if not _bearer_token:
|
|
63
|
+
print("Error: BEARER_TOKEN env var required.", file=sys.stderr)
|
|
64
|
+
return 1
|
|
65
|
+
|
|
66
|
+
mcp = FastMCP("Topos (local engine)", port=0)
|
|
67
|
+
|
|
68
|
+
@mcp.tool()
|
|
69
|
+
async def list_database_tables() -> dict:
|
|
70
|
+
"""List all database tables grouped by layer with row counts."""
|
|
71
|
+
return await _call_engine("/api/local/list_database_tables")
|
|
72
|
+
|
|
73
|
+
@mcp.tool()
|
|
74
|
+
async def get_table_schema(table_name: str) -> dict:
|
|
75
|
+
"""Get column info (schema) for a table. Use list_database_tables to see available tables."""
|
|
76
|
+
return await _call_engine("/api/local/get_table_schema", {"table_name": table_name})
|
|
77
|
+
|
|
78
|
+
mcp.run(transport="stdio")
|
|
79
|
+
return 0
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
if __name__ == "__main__":
|
|
83
|
+
sys.exit(main())
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Observability utilities for Topos."""
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
"""Observability metrics (Sprint 07). In-memory counters; extend for Prometheus later."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import threading
|
|
6
|
+
from typing import Dict
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
_counts: Dict[str, float] = {}
|
|
10
|
+
_lock = threading.Lock()
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def record_metric(name: str, value: float) -> None:
|
|
14
|
+
with _lock:
|
|
15
|
+
_counts[name] = _counts.get(name, 0) + value
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def get_metric(name: str) -> float:
|
|
19
|
+
with _lock:
|
|
20
|
+
return _counts.get(name, 0.0)
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def reset_metrics() -> None:
|
|
24
|
+
with _lock:
|
|
25
|
+
_counts.clear()
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
"""Tracing stubs for Topos."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class Span:
|
|
7
|
+
def __init__(self, name: str):
|
|
8
|
+
self.name = name
|
|
9
|
+
|
|
10
|
+
def __enter__(self):
|
|
11
|
+
return self
|
|
12
|
+
|
|
13
|
+
def __exit__(self, exc_type, exc, tb):
|
|
14
|
+
_ = (exc_type, exc, tb)
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def start_span(name: str) -> Span:
|
|
18
|
+
return Span(name)
|
topos/openai_client.py
ADDED
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import logging
|
|
4
|
+
from typing import Any, Dict, Optional
|
|
5
|
+
|
|
6
|
+
import httpx
|
|
7
|
+
|
|
8
|
+
from .config.settings import settings
|
|
9
|
+
|
|
10
|
+
logger = logging.getLogger(__name__)
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class OpenAIError(Exception):
|
|
14
|
+
"""Wrapper for upstream OpenAI errors."""
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class OpenAIClient:
|
|
18
|
+
"""Minimal OpenAI chat completions client."""
|
|
19
|
+
|
|
20
|
+
def __init__(self, api_key: str | None = None, base_url: str | None = None) -> None:
|
|
21
|
+
self.api_key = api_key or settings.openai_api_key
|
|
22
|
+
self.base_url = base_url or settings.openai_base_url
|
|
23
|
+
self.timeout = settings.openai_timeout_seconds
|
|
24
|
+
|
|
25
|
+
async def generate(
|
|
26
|
+
self,
|
|
27
|
+
prompt: str,
|
|
28
|
+
max_tokens: Optional[int],
|
|
29
|
+
temperature: Optional[float],
|
|
30
|
+
) -> Dict[str, Any]:
|
|
31
|
+
url = f"{self.base_url.rstrip('/')}/chat/completions"
|
|
32
|
+
headers = {"Authorization": f"Bearer {self.api_key}"}
|
|
33
|
+
payload: Dict[str, Any] = {
|
|
34
|
+
"model": settings.openai_model,
|
|
35
|
+
"messages": [
|
|
36
|
+
{"role": "system", "content": "You are a concise assistant."},
|
|
37
|
+
{"role": "user", "content": prompt},
|
|
38
|
+
],
|
|
39
|
+
}
|
|
40
|
+
if max_tokens is not None:
|
|
41
|
+
payload["max_tokens"] = max_tokens
|
|
42
|
+
if temperature is not None:
|
|
43
|
+
payload["temperature"] = temperature
|
|
44
|
+
|
|
45
|
+
try:
|
|
46
|
+
async with httpx.AsyncClient(timeout=self.timeout) as client:
|
|
47
|
+
resp = await client.post(url, headers=headers, json=payload)
|
|
48
|
+
except Exception as exc: # noqa: BLE001
|
|
49
|
+
logger.error("OpenAI request failed: %s", exc)
|
|
50
|
+
raise OpenAIError(f"request failed: {exc}") from exc
|
|
51
|
+
|
|
52
|
+
if resp.status_code == 429:
|
|
53
|
+
raise OpenAIError("rate_limited")
|
|
54
|
+
|
|
55
|
+
if resp.status_code >= 400:
|
|
56
|
+
try:
|
|
57
|
+
detail = resp.json()
|
|
58
|
+
except Exception:
|
|
59
|
+
detail = resp.text
|
|
60
|
+
raise OpenAIError(f"upstream_error: {resp.status_code}: {detail}")
|
|
61
|
+
|
|
62
|
+
data = resp.json()
|
|
63
|
+
try:
|
|
64
|
+
message = data["choices"][0]["message"]["content"]
|
|
65
|
+
usage = data.get("usage", {})
|
|
66
|
+
except Exception as exc: # noqa: BLE001
|
|
67
|
+
raise OpenAIError("invalid_response") from exc
|
|
68
|
+
|
|
69
|
+
return {"output": message, "usage": usage}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Projection builders for Topos."""
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Vector index projection abstractions."""
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from dataclasses import dataclass
|
|
4
|
+
from typing import Iterable, List
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
@dataclass(frozen=True)
|
|
8
|
+
class EmbeddingRow:
|
|
9
|
+
record_id: str
|
|
10
|
+
vector: List[float]
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
@dataclass(frozen=True)
|
|
14
|
+
class ProjectionStatus:
|
|
15
|
+
status: str
|
|
16
|
+
count: int
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class ProjectionBuilder:
|
|
20
|
+
def build(self, embeddings: Iterable[EmbeddingRow]) -> ProjectionStatus:
|
|
21
|
+
raise NotImplementedError
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import Iterable
|
|
4
|
+
|
|
5
|
+
from .base import EmbeddingRow, ProjectionBuilder, ProjectionStatus
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class VectorIndexBuilder(ProjectionBuilder):
|
|
9
|
+
def build(self, embeddings: Iterable[EmbeddingRow]) -> ProjectionStatus:
|
|
10
|
+
count = sum(1 for _ in embeddings)
|
|
11
|
+
return ProjectionStatus(status="stub", count=count)
|
topos/rate_limit.py
ADDED
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import time
|
|
4
|
+
from typing import Dict
|
|
5
|
+
|
|
6
|
+
from fastapi import HTTPException, Request, status
|
|
7
|
+
|
|
8
|
+
from .config.settings import settings
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class TokenBucket:
|
|
12
|
+
def __init__(self, rate_per_minute: int) -> None:
|
|
13
|
+
self.capacity = rate_per_minute
|
|
14
|
+
self.tokens = rate_per_minute
|
|
15
|
+
self.refill_time = time.time()
|
|
16
|
+
self.rate_per_second = rate_per_minute / 60.0
|
|
17
|
+
|
|
18
|
+
def consume(self, tokens: int = 1) -> bool:
|
|
19
|
+
now = time.time()
|
|
20
|
+
elapsed = now - self.refill_time
|
|
21
|
+
refill = elapsed * self.rate_per_second
|
|
22
|
+
if refill > 0:
|
|
23
|
+
self.tokens = min(self.capacity, self.tokens + refill)
|
|
24
|
+
self.refill_time = now
|
|
25
|
+
if self.tokens >= tokens:
|
|
26
|
+
self.tokens -= tokens
|
|
27
|
+
return True
|
|
28
|
+
return False
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
_buckets: Dict[str, TokenBucket] = {}
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def rate_limit(request: Request) -> None:
|
|
35
|
+
"""Simple in-memory rate limit per client IP."""
|
|
36
|
+
ip = request.client.host if request.client else "unknown"
|
|
37
|
+
bucket = _buckets.get(ip)
|
|
38
|
+
if bucket is None:
|
|
39
|
+
bucket = TokenBucket(settings.rate_limit_per_minute)
|
|
40
|
+
_buckets[ip] = bucket
|
|
41
|
+
|
|
42
|
+
if not bucket.consume():
|
|
43
|
+
raise HTTPException(status_code=status.HTTP_429_TOO_MANY_REQUESTS, detail="Too many requests")
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
"""Runtime text sanitization helpers (optional Ollama-backed field transforms)."""
|
|
2
|
+
|
|
3
|
+
from topos.config.sanitization_ollama import SANITIZATION_OLLAMA_TRANSFORM_IDS
|
|
4
|
+
|
|
5
|
+
from .ollama_transforms import (
|
|
6
|
+
OLLAMA_TRANSFORM_IDS,
|
|
7
|
+
apply_text_transform_with_ollama,
|
|
8
|
+
ollama_sanitization_enabled,
|
|
9
|
+
)
|
|
10
|
+
|
|
11
|
+
__all__ = [
|
|
12
|
+
"SANITIZATION_OLLAMA_TRANSFORM_IDS",
|
|
13
|
+
"OLLAMA_TRANSFORM_IDS",
|
|
14
|
+
"apply_text_transform_with_ollama",
|
|
15
|
+
"ollama_sanitization_enabled",
|
|
16
|
+
]
|