PyPI - topos-node - Versions diffs - 0.1.0__py3-none-any.whl - Mend

topos-node 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (249) hide show

shared/__init__.py +59 -0
shared/filtering.py +640 -0
shared/schema_registry.py +229 -0
topos/__init__.py +5 -0
topos/__version__.py +6 -0
topos/analytics/__init__.py +15 -0
topos/analytics/duckdb_adapter.py +48 -0
topos/analytics/messenger_communities.py +349 -0
topos/analytics/messenger_graph.py +522 -0
topos/analytics/messenger_labels.py +321 -0
topos/analytics/profiles.py +22 -0
topos/analytics/query_engine.py +64 -0
topos/analytics/raw_queries.py +174 -0
topos/api/__init__.py +1 -0
topos/api/analytics.py +52 -0
topos/api/app_registry.py +31 -0
topos/api/backup.py +15 -0
topos/api/compute_remote.py +175 -0
topos/api/data_commit.py +158 -0
topos/api/data_explorer_table_prefs.py +81 -0
topos/api/db.py +10 -0
topos/api/device.py +25 -0
topos/api/enrichment.py +959 -0
topos/api/filter_lab.py +195 -0
topos/api/health.py +61 -0
topos/api/ingestion_api.py +37 -0
topos/api/ingestion_compat.py +21 -0
topos/api/ingestion_sources.py +600 -0
topos/api/llm.py +76 -0
topos/api/local_mcp.py +46 -0
topos/api/messenger_analytics.py +385 -0
topos/api/query_api.py +13 -0
topos/api/sanitization_ollama_config.py +64 -0
topos/api/source_install.py +324 -0
topos/api/sources.py +13 -0
topos/api/sync.py +10 -0
topos/api/ui_config.py +83 -0
topos/api/uma_data.py +311 -0
topos/api/usage.py +49 -0
topos/api/user_identity.py +46 -0
topos/app.py +239 -0
topos/auth.py +17 -0
topos/canonicalization/__init__.py +1 -0
topos/canonicalization/mappers/__init__.py +22 -0
topos/canonicalization/mappers/base.py +26 -0
topos/canonicalization/mappers/chatgpt_mapper.py +40 -0
topos/canonicalization/mappers/grok_mapper.py +17 -0
topos/canonicalization/mappers/messenger_mapper.py +58 -0
topos/canonicalization/models.py +31 -0
topos/canonicalization/resolver.py +23 -0
topos/cli/__init__.py +1 -0
topos/cli/__main__.py +6 -0
topos/cli/commands.py +132 -0
topos/config/__init__.py +1 -0
topos/config/sanitization_ollama.py +189 -0
topos/config/settings.py +310 -0
topos/contacts/__init__.py +5 -0
topos/contacts/identity.py +24 -0
topos/control_plane_client.py +300 -0
topos/core/__init__.py +1 -0
topos/core/api_models.py +128 -0
topos/core/connection_resilience.py +99 -0
topos/core/device_helpers.py +8 -0
topos/core/errors.py +13 -0
topos/core/events.py +12 -0
topos/core/handlers.py +5625 -0
topos/core/logging.py +175 -0
topos/core/metrics.py +21 -0
topos/core/startup_banner.py +62 -0
topos/core/state.py +682 -0
topos/core/table_layers.py +45 -0
topos/core/types.py +13 -0
topos/data_explorer_table_prefs.py +150 -0
topos/engine/__init__.py +29 -0
topos/engine/backends/__init__.py +50 -0
topos/engine/backends/base.py +21 -0
topos/engine/backends/huggingface.py +151 -0
topos/engine/backends/ollama.py +181 -0
topos/engine/backends/stub.py +22 -0
topos/engine/engine.py +165 -0
topos/engine/intake.py +32 -0
topos/engine/queue_manager.py +112 -0
topos/engine/registration.py +126 -0
topos/engine/result_formatter.py +38 -0
topos/engine/router.py +19 -0
topos/engine/scoped_token.py +82 -0
topos/engine/tasks.py +154 -0
topos/engine/transport.py +44 -0
topos/engine/usage_guard.py +100 -0
topos/engine/usage_observation.py +129 -0
topos/engine/validator.py +23 -0
topos/enrichment/__init__.py +1 -0
topos/enrichment/derived_tables.py +214 -0
topos/enrichment/jobs/__init__.py +30 -0
topos/enrichment/jobs/base.py +54 -0
topos/enrichment/jobs/canonical/__init__.py +1 -0
topos/enrichment/jobs/canonical/embeddings_job.py +27 -0
topos/enrichment/jobs/canonical/emo_27_job.py +97 -0
topos/enrichment/jobs/canonical/entities_job.py +27 -0
topos/enrichment/jobs/canonical/sentiment_job.py +27 -0
topos/enrichment/jobs/canonical/topics_job.py +27 -0
topos/enrichment/jobs/raw/__init__.py +1 -0
topos/enrichment/jobs/raw/attachments_job.py +12 -0
topos/enrichment/jobs/raw/language_job.py +12 -0
topos/enrichment/jobs/raw/time_normalization_job.py +12 -0
topos/enrichment/jobs/raw/tool_calls_job.py +12 -0
topos/enrichment/models/__init__.py +1 -0
topos/enrichment/models/manager.py +8 -0
topos/enrichment/models/registry.py +71 -0
topos/enrichment/models/versioning.py +8 -0
topos/enrichment/orchestrator.py +177 -0
topos/enrichment/processor.py +17 -0
topos/enrichment/progress_bar.py +122 -0
topos/enrichment/website_classifier.py +31 -0
topos/filter_lab/__init__.py +1 -0
topos/filter_lab/bundles.py +300 -0
topos/filter_lab/schema.py +86 -0
topos/filter_lab/service.py +167 -0
topos/filter_lab/store.py +374 -0
topos/filter_lab/worker.py +250 -0
topos/hosted_pool_lease.py +153 -0
topos/ingestion/__init__.py +1 -0
topos/ingestion/checkpoints/__init__.py +6 -0
topos/ingestion/checkpoints/checkpoint_store.py +24 -0
topos/ingestion/checkpoints/sqlite_checkpoint_store.py +82 -0
topos/ingestion/ingest_helpers.py +504 -0
topos/ingestion/jobs.py +91 -0
topos/ingestion/local_sync.py +823 -0
topos/ingestion/log_preview.py +21 -0
topos/ingestion/manager.py +1100 -0
topos/ingestion/parser.py +174 -0
topos/ingestion/parsers/__init__.py +32 -0
topos/ingestion/parsers/base.py +24 -0
topos/ingestion/parsers/browser_parser.py +171 -0
topos/ingestion/parsers/calendar_parser.py +21 -0
topos/ingestion/parsers/chatgpt_conversation_flattener.py +266 -0
topos/ingestion/parsers/chatgpt_parser.py +67 -0
topos/ingestion/parsers/grok_parser.py +21 -0
topos/ingestion/parsers/messenger_parser.py +97 -0
topos/ingestion/progress.py +54 -0
topos/ingestion/sources/__init__.py +20 -0
topos/ingestion/sources/base.py +39 -0
topos/ingestion/sources/calendar.py +29 -0
topos/ingestion/sources/chatgpt.py +29 -0
topos/ingestion/sources/contact_importers.py +274 -0
topos/ingestion/sources/grok.py +29 -0
topos/ingestion/sources/imessage_reader.py +479 -0
topos/ingestion/sources/signal_export_parser.py +132 -0
topos/ingestion/sources/signal_reader.py +491 -0
topos/ingestion/state_machine.py +70 -0
topos/ingestion/triggers/__init__.py +1 -0
topos/ingestion/triggers/file_trigger.py +36 -0
topos/ingestion/triggers/sqlite_trigger.py +18 -0
topos/ingestion/validation/__init__.py +1 -0
topos/ingestion/validation/base.py +27 -0
topos/ingestion/validation/schema_registry.py +111 -0
topos/ingestion/validation/schema_validator.py +13 -0
topos/lineage/__init__.py +1 -0
topos/lineage/provenance.py +9 -0
topos/lineage/tracker.py +9 -0
topos/mcp_stdio_proxy.py +83 -0
topos/observability/__init__.py +1 -0
topos/observability/alerts.py +7 -0
topos/observability/metrics.py +25 -0
topos/observability/tracing.py +18 -0
topos/openai_client.py +69 -0
topos/projections/__init__.py +1 -0
topos/projections/vector_index/__init__.py +1 -0
topos/projections/vector_index/base.py +21 -0
topos/projections/vector_index/builders.py +11 -0
topos/projections/vector_index/health_checks.py +5 -0
topos/rate_limit.py +43 -0
topos/sanitization/__init__.py +16 -0
topos/sanitization/ollama_transforms.py +276 -0
topos/scope_resolution.py +89 -0
topos/services/__init__.py +1 -0
topos/services/container.py +46 -0
topos/services/embeddings/__init__.py +1 -0
topos/services/embeddings/base.py +7 -0
topos/services/embeddings/local.py +9 -0
topos/services/embeddings/remote.py +9 -0
topos/services/interfaces.py +40 -0
topos/services/llm/__init__.py +1 -0
topos/services/llm/base.py +7 -0
topos/services/llm/openai.py +126 -0
topos/services/local.py +123 -0
topos/services/postgres.py +385 -0
topos/sources/__init__.py +6 -0
topos/sources/definitions.py +114 -0
topos/sources/install_service.py +836 -0
topos/sources/registry.py +263 -0
topos/sources/runtime_install.py +427 -0
topos/storage/__init__.py +1 -0
topos/storage/canonical/__init__.py +18 -0
topos/storage/canonical/ai_chat/__init__.py +22 -0
topos/storage/canonical/ai_chat/canonicalizer.py +147 -0
topos/storage/canonical/ai_chat/mapper.py +168 -0
topos/storage/canonical/ai_chat/model.py +87 -0
topos/storage/canonical/ai_chat/tables.py +179 -0
topos/storage/canonical/canonical_store.py +24 -0
topos/storage/canonical/conversations_tables.py +1020 -0
topos/storage/canonical/mapping_store.py +30 -0
topos/storage/canonical/postgres.py +10 -0
topos/storage/db/__init__.py +1 -0
topos/storage/db/client.py +8 -0
topos/storage/db/migrations/__init__.py +1 -0
topos/storage/db/migrations/stage9_column_renames.py +78 -0
topos/storage/db/paths.py +122 -0
topos/storage/db/postgres.py +240 -0
topos/storage/db/schema.py +6 -0
topos/storage/enrichment/__init__.py +1 -0
topos/storage/enrichment/canonical_enrichment_store.py +7 -0
topos/storage/enrichment/raw_enrichment_store.py +18 -0
topos/storage/normalized/__init__.py +1 -0
topos/storage/normalized/normalized_store.py +24 -0
topos/storage/oplog/__init__.py +1 -0
topos/storage/oplog/decision.py +6 -0
topos/storage/oplog/oplog_store.py +17 -0
topos/storage/oplog/postgres.py +10 -0
topos/storage/projections/__init__.py +1 -0
topos/storage/projections/index_ops_store.py +6 -0
topos/storage/projections/vector_index_store.py +6 -0
topos/storage/raw/__init__.py +1 -0
topos/storage/raw/browser_flat_tables.py +303 -0
topos/storage/raw/file_store.py +100 -0
topos/storage/raw/raw_store.py +29 -0
topos/storage/raw/raw_tables_manager.py +295 -0
topos/storage/raw/sqlite_raw_store.py +17 -0
topos/storage/security/encryption.py +21 -0
topos/storage/signal_identity.py +71 -0
topos/storage/source_settings.py +116 -0
topos/storage/user_identity.py +69 -0
topos/sync/__init__.py +5 -0
topos/sync/client.py +272 -0
topos/sync_handlers.py +70 -0
topos/testing/__init__.py +1 -0
topos/testing/lifespan.py +7 -0
topos/uma_contact_enrichment.py +1032 -0
topos/uma_filters.py +669 -0
topos/uma_resource_id.py +24 -0
topos/uma_rpt.py +69 -0
topos/utils/base_object.py +61 -0
topos/websocket_client.py +21 -0
topos_node-0.1.0.dist-info/METADATA +199 -0
topos_node-0.1.0.dist-info/RECORD +249 -0
topos_node-0.1.0.dist-info/WHEEL +5 -0
topos_node-0.1.0.dist-info/entry_points.txt +2 -0
topos_node-0.1.0.dist-info/licenses/LICENSE +201 -0
topos_node-0.1.0.dist-info/top_level.txt +2 -0

topos/ingestion/validation/schema_registry.py ADDED Viewed

@@ -0,0 +1,111 @@
+"""Minimal schema registry for ingestion validation."""
+from __future__ import annotations
+import logging
+from typing import Any, Dict, Optional
+logger = logging.getLogger("topos.ingestion.schema_registry")
+SCHEMAS: Dict[str, Dict[str, Any]] = {
+    "chatgpt.conversation.v1": {
+        "name": "ChatGPT Conversation Export",
+        "version": "1",
+        "required_fields": ["id", "thread_id", "role", "content", "created_at"],
+        "field_types": {
+            "id": str,
+            "thread_id": str,
+            "role": str,
+            "content": str,
+            "created_at": (int, float, str),
+        },
+        "description": "ChatGPT conversation export format (JSONL - flat records)",
+        "file_format": "jsonl",
+    },
+    "chatgpt.conversation.v2": {
+        "name": "ChatGPT Real Export Format",
+        "version": "2",
+        "required_fields": ["id", "thread_id", "role", "content", "created_at"],
+        "field_types": {
+            "id": str,
+            "thread_id": str,
+            "role": str,
+            "content": str,
+            "created_at": (int, float, str),
+        },
+        "description": "ChatGPT real export format (JSON array of conversation objects, flattened to v1 format)",
+        "file_format": "json",
+        "source_structure": "conversation_array",
+        "note": "Flattened records match v1 format, but source is nested conversation objects",
+    },
+    # Sprint 02: Messenger ingestion (same logical shape as chat for conversation_messages)
+    "imessage.messages.v1": {
+        "name": "iMessage Messages",
+        "version": "1",
+        "required_fields": ["id", "thread_id", "role", "content", "created_at"],
+        "field_types": {
+            "id": str,
+            "thread_id": str,
+            "role": str,
+            "content": str,
+            "created_at": (int, float, str),
+        },
+        "description": "iMessage message format (normalized from chat.db or sync); id may be imessage:ROWID",
+        "file_format": "jsonl",
+    },
+    "signal.messages.v1": {
+        "name": "Signal Messages",
+        "version": "1",
+        "required_fields": ["id", "thread_id", "role", "content", "created_at"],
+        "field_types": {
+            "id": str,
+            "thread_id": str,
+            "role": str,
+            "content": str,
+            "created_at": (int, float, str),
+        },
+        "description": "Signal Desktop message format (normalized from SQLCipher or export); id may be signal:uuid",
+        "file_format": "jsonl",
+    },
+}
+def get_schema_definition(schema_id: str) -> Optional[Dict[str, Any]]:
+    return SCHEMAS.get(schema_id)
+def validate_schema(record: Dict[str, Any], schema_id: str) -> tuple[bool, Optional[str]]:
+    schema = get_schema_definition(schema_id)
+    if not schema:
+        return False, f"Unknown schema: {schema_id}"
+    required_fields = schema.get("required_fields", [])
+    for field in required_fields:
+        if field not in record:
+            return False, f"Missing required field: {field}"
+    field_types = schema.get("field_types", {})
+    for field, expected_type in field_types.items():
+        if field not in record:
+            continue
+        value = record[field]
+        if expected_type is None:
+            continue
+        if isinstance(expected_type, tuple):
+            if not any(isinstance(value, t) for t in expected_type):
+                return False, (
+                    f"Field '{field}' has invalid type. "
+                    f"Expected one of {expected_type}, got {type(value).__name__}"
+                )
+        elif not isinstance(value, expected_type):
+            return False, (
+                f"Field '{field}' has invalid type. "
+                f"Expected {expected_type.__name__}, got {type(value).__name__}"
+            )
+    return True, None
+def register_schema(schema_id: str, schema_def: Dict[str, Any]) -> None:
+    SCHEMAS[schema_id] = schema_def
+    logger.info("Registered schema: %s", schema_id)

topos/ingestion/validation/schema_validator.py ADDED Viewed

@@ -0,0 +1,13 @@
+"""Default schema validator (no-op placeholder)."""
+from __future__ import annotations
+from typing import Any, Dict, Optional
+from .base import SchemaDefinition, SchemaValidator, ValidationResult
+class NoOpSchemaValidator(SchemaValidator):
+    def validate(self, record: Dict[str, Any], schema: Optional[SchemaDefinition] = None) -> ValidationResult:
+        _ = (record, schema)
+        return ValidationResult(is_valid=True, errors=[], metadata={})

topos/lineage/__init__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ """Lineage and provenance tracking for Topos."""

topos/lineage/provenance.py ADDED Viewed

@@ -0,0 +1,9 @@
+"""Provenance stubs for Topos."""
+from __future__ import annotations
+from typing import Dict
+def record_provenance(payload: Dict[str, str]) -> None:
+    _ = payload

topos/lineage/tracker.py ADDED Viewed

@@ -0,0 +1,9 @@
+"""Lineage tracking stubs."""
+from __future__ import annotations
+from typing import Dict
+def record_lineage(payload: Dict[str, str]) -> None:
+    _ = payload

topos/mcp_stdio_proxy.py ADDED Viewed

@@ -0,0 +1,83 @@
+"""
+Stdio MCP proxy: Claude Desktop → local Topos engine (no Control Plane).
+Run this so Claude talks MCP over stdio to the proxy; the proxy forwards tool calls
+to the engine's /api/local/* HTTP endpoints. Use when the engine and Claude run on
+the same machine.
+Usage:
+  ENGINE_URL=http://localhost:9000 BEARER_TOKEN=your_key python -m topos.mcp_stdio_proxy
+  # or with args:
+  python -m topos.mcp_stdio_proxy --url http://localhost:9000
+Claude Desktop config (direct to local engine): use scripts/run_local_mcp_proxy.sh
+with full path, args ["--url", "http://localhost:9000"], and env BEARER_TOKEN.
+Only list_database_tables and get_table_schema are exposed (engine's /api/local/*).
+For full tools (get_analytics, get_messages, get_oplog) use the Control Plane.
+"""
+from __future__ import annotations
+import argparse
+import os
+import sys
+import httpx
+from mcp.server.fastmcp import FastMCP
+# Engine URL and token; set in main() before FastMCP runs.
+_engine_url: str = ""
+_bearer_token: str = ""
+def _headers() -> dict[str, str]:
+    return {"Authorization": f"Bearer {_bearer_token}", "Content-Type": "application/json"}
+async def _call_engine(path: str, json_body: dict | None = None) -> dict:
+    url = f"{_engine_url.rstrip('/')}{path}"
+    async with httpx.AsyncClient(timeout=30.0) as client:
+        r = await client.post(url, headers=_headers(), json=json_body or {})
+    r.raise_for_status()
+    data = r.json()
+    if isinstance(data, dict) and data.get("status") == "error":
+        raise ValueError(data.get("error", "engine error"))
+    return data
+def main() -> int:
+    global _engine_url, _bearer_token
+    parser = argparse.ArgumentParser(
+        description="MCP stdio proxy to local Topos engine (/api/local/*). No Control Plane."
+    )
+    parser.add_argument(
+        "--url",
+        default=os.environ.get("ENGINE_URL", "http://localhost:9000"),
+        help="Engine base URL (default: ENGINE_URL or http://localhost:9000)",
+    )
+    args = parser.parse_args()
+    _engine_url = args.url.rstrip("/")
+    _bearer_token = (os.environ.get("BEARER_TOKEN") or "").strip()
+    if not _bearer_token:
+        print("Error: BEARER_TOKEN env var required.", file=sys.stderr)
+        return 1
+    mcp = FastMCP("Topos (local engine)", port=0)
+    @mcp.tool()
+    async def list_database_tables() -> dict:
+        """List all database tables grouped by layer with row counts."""
+        return await _call_engine("/api/local/list_database_tables")
+    @mcp.tool()
+    async def get_table_schema(table_name: str) -> dict:
+        """Get column info (schema) for a table. Use list_database_tables to see available tables."""
+        return await _call_engine("/api/local/get_table_schema", {"table_name": table_name})
+    mcp.run(transport="stdio")
+    return 0
+if __name__ == "__main__":
+    sys.exit(main())

topos/observability/__init__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ """Observability utilities for Topos."""

topos/observability/alerts.py ADDED Viewed

@@ -0,0 +1,7 @@
+"""Alerting stubs for Topos."""
+from __future__ import annotations
+def send_alert(message: str) -> None:
+    _ = message

topos/observability/metrics.py ADDED Viewed

@@ -0,0 +1,25 @@
+"""Observability metrics (Sprint 07). In-memory counters; extend for Prometheus later."""
+from __future__ import annotations
+import threading
+from typing import Dict
+_counts: Dict[str, float] = {}
+_lock = threading.Lock()
+def record_metric(name: str, value: float) -> None:
+    with _lock:
+        _counts[name] = _counts.get(name, 0) + value
+def get_metric(name: str) -> float:
+    with _lock:
+        return _counts.get(name, 0.0)
+def reset_metrics() -> None:
+    with _lock:
+        _counts.clear()

topos/observability/tracing.py ADDED Viewed

@@ -0,0 +1,18 @@
+"""Tracing stubs for Topos."""
+from __future__ import annotations
+class Span:
+    def __init__(self, name: str):
+        self.name = name
+    def __enter__(self):
+        return self
+    def __exit__(self, exc_type, exc, tb):
+        _ = (exc_type, exc, tb)
+def start_span(name: str) -> Span:
+    return Span(name)

topos/openai_client.py ADDED Viewed

@@ -0,0 +1,69 @@
+from __future__ import annotations
+import logging
+from typing import Any, Dict, Optional
+import httpx
+from .config.settings import settings
+logger = logging.getLogger(__name__)
+class OpenAIError(Exception):
+    """Wrapper for upstream OpenAI errors."""
+class OpenAIClient:
+    """Minimal OpenAI chat completions client."""
+    def __init__(self, api_key: str | None = None, base_url: str | None = None) -> None:
+        self.api_key = api_key or settings.openai_api_key
+        self.base_url = base_url or settings.openai_base_url
+        self.timeout = settings.openai_timeout_seconds
+    async def generate(
+        self,
+        prompt: str,
+        max_tokens: Optional[int],
+        temperature: Optional[float],
+    ) -> Dict[str, Any]:
+        url = f"{self.base_url.rstrip('/')}/chat/completions"
+        headers = {"Authorization": f"Bearer {self.api_key}"}
+        payload: Dict[str, Any] = {
+            "model": settings.openai_model,
+            "messages": [
+                {"role": "system", "content": "You are a concise assistant."},
+                {"role": "user", "content": prompt},
+            ],
+        }
+        if max_tokens is not None:
+            payload["max_tokens"] = max_tokens
+        if temperature is not None:
+            payload["temperature"] = temperature
+        try:
+            async with httpx.AsyncClient(timeout=self.timeout) as client:
+                resp = await client.post(url, headers=headers, json=payload)
+        except Exception as exc:  # noqa: BLE001
+            logger.error("OpenAI request failed: %s", exc)
+            raise OpenAIError(f"request failed: {exc}") from exc
+        if resp.status_code == 429:
+            raise OpenAIError("rate_limited")
+        if resp.status_code >= 400:
+            try:
+                detail = resp.json()
+            except Exception:
+                detail = resp.text
+            raise OpenAIError(f"upstream_error: {resp.status_code}: {detail}")
+        data = resp.json()
+        try:
+            message = data["choices"][0]["message"]["content"]
+            usage = data.get("usage", {})
+        except Exception as exc:  # noqa: BLE001
+            raise OpenAIError("invalid_response") from exc
+        return {"output": message, "usage": usage}

topos/projections/__init__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ """Projection builders for Topos."""

topos/projections/vector_index/__init__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ """Vector index projection abstractions."""

topos/projections/vector_index/base.py ADDED Viewed

@@ -0,0 +1,21 @@
+from __future__ import annotations
+from dataclasses import dataclass
+from typing import Iterable, List
+@dataclass(frozen=True)
+class EmbeddingRow:
+    record_id: str
+    vector: List[float]
+@dataclass(frozen=True)
+class ProjectionStatus:
+    status: str
+    count: int
+class ProjectionBuilder:
+    def build(self, embeddings: Iterable[EmbeddingRow]) -> ProjectionStatus:
+        raise NotImplementedError

topos/projections/vector_index/builders.py ADDED Viewed

@@ -0,0 +1,11 @@
+from __future__ import annotations
+from typing import Iterable
+from .base import EmbeddingRow, ProjectionBuilder, ProjectionStatus
+class VectorIndexBuilder(ProjectionBuilder):
+    def build(self, embeddings: Iterable[EmbeddingRow]) -> ProjectionStatus:
+        count = sum(1 for _ in embeddings)
+        return ProjectionStatus(status="stub", count=count)

topos/projections/vector_index/health_checks.py ADDED Viewed

@@ -0,0 +1,5 @@
+from __future__ import annotations
+def check_index_health() -> dict:
+    return {"status": "stub"}

topos/rate_limit.py ADDED Viewed

@@ -0,0 +1,43 @@
+from __future__ import annotations
+import time
+from typing import Dict
+from fastapi import HTTPException, Request, status
+from .config.settings import settings
+class TokenBucket:
+    def __init__(self, rate_per_minute: int) -> None:
+        self.capacity = rate_per_minute
+        self.tokens = rate_per_minute
+        self.refill_time = time.time()
+        self.rate_per_second = rate_per_minute / 60.0
+    def consume(self, tokens: int = 1) -> bool:
+        now = time.time()
+        elapsed = now - self.refill_time
+        refill = elapsed * self.rate_per_second
+        if refill > 0:
+            self.tokens = min(self.capacity, self.tokens + refill)
+            self.refill_time = now
+        if self.tokens >= tokens:
+            self.tokens -= tokens
+            return True
+        return False
+_buckets: Dict[str, TokenBucket] = {}
+def rate_limit(request: Request) -> None:
+    """Simple in-memory rate limit per client IP."""
+    ip = request.client.host if request.client else "unknown"
+    bucket = _buckets.get(ip)
+    if bucket is None:
+        bucket = TokenBucket(settings.rate_limit_per_minute)
+        _buckets[ip] = bucket
+    if not bucket.consume():
+        raise HTTPException(status_code=status.HTTP_429_TOO_MANY_REQUESTS, detail="Too many requests")

topos/sanitization/__init__.py ADDED Viewed

@@ -0,0 +1,16 @@
+"""Runtime text sanitization helpers (optional Ollama-backed field transforms)."""
+from topos.config.sanitization_ollama import SANITIZATION_OLLAMA_TRANSFORM_IDS
+from .ollama_transforms import (
+    OLLAMA_TRANSFORM_IDS,
+    apply_text_transform_with_ollama,
+    ollama_sanitization_enabled,
+)
+__all__ = [
+    "SANITIZATION_OLLAMA_TRANSFORM_IDS",
+    "OLLAMA_TRANSFORM_IDS",
+    "apply_text_transform_with_ollama",
+    "ollama_sanitization_enabled",
+]