PyPI - topos-node - Versions diffs - 0.1.0__py3-none-any.whl - Mend

topos-node 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (249) hide show

shared/__init__.py +59 -0
shared/filtering.py +640 -0
shared/schema_registry.py +229 -0
topos/__init__.py +5 -0
topos/__version__.py +6 -0
topos/analytics/__init__.py +15 -0
topos/analytics/duckdb_adapter.py +48 -0
topos/analytics/messenger_communities.py +349 -0
topos/analytics/messenger_graph.py +522 -0
topos/analytics/messenger_labels.py +321 -0
topos/analytics/profiles.py +22 -0
topos/analytics/query_engine.py +64 -0
topos/analytics/raw_queries.py +174 -0
topos/api/__init__.py +1 -0
topos/api/analytics.py +52 -0
topos/api/app_registry.py +31 -0
topos/api/backup.py +15 -0
topos/api/compute_remote.py +175 -0
topos/api/data_commit.py +158 -0
topos/api/data_explorer_table_prefs.py +81 -0
topos/api/db.py +10 -0
topos/api/device.py +25 -0
topos/api/enrichment.py +959 -0
topos/api/filter_lab.py +195 -0
topos/api/health.py +61 -0
topos/api/ingestion_api.py +37 -0
topos/api/ingestion_compat.py +21 -0
topos/api/ingestion_sources.py +600 -0
topos/api/llm.py +76 -0
topos/api/local_mcp.py +46 -0
topos/api/messenger_analytics.py +385 -0
topos/api/query_api.py +13 -0
topos/api/sanitization_ollama_config.py +64 -0
topos/api/source_install.py +324 -0
topos/api/sources.py +13 -0
topos/api/sync.py +10 -0
topos/api/ui_config.py +83 -0
topos/api/uma_data.py +311 -0
topos/api/usage.py +49 -0
topos/api/user_identity.py +46 -0
topos/app.py +239 -0
topos/auth.py +17 -0
topos/canonicalization/__init__.py +1 -0
topos/canonicalization/mappers/__init__.py +22 -0
topos/canonicalization/mappers/base.py +26 -0
topos/canonicalization/mappers/chatgpt_mapper.py +40 -0
topos/canonicalization/mappers/grok_mapper.py +17 -0
topos/canonicalization/mappers/messenger_mapper.py +58 -0
topos/canonicalization/models.py +31 -0
topos/canonicalization/resolver.py +23 -0
topos/cli/__init__.py +1 -0
topos/cli/__main__.py +6 -0
topos/cli/commands.py +132 -0
topos/config/__init__.py +1 -0
topos/config/sanitization_ollama.py +189 -0
topos/config/settings.py +310 -0
topos/contacts/__init__.py +5 -0
topos/contacts/identity.py +24 -0
topos/control_plane_client.py +300 -0
topos/core/__init__.py +1 -0
topos/core/api_models.py +128 -0
topos/core/connection_resilience.py +99 -0
topos/core/device_helpers.py +8 -0
topos/core/errors.py +13 -0
topos/core/events.py +12 -0
topos/core/handlers.py +5625 -0
topos/core/logging.py +175 -0
topos/core/metrics.py +21 -0
topos/core/startup_banner.py +62 -0
topos/core/state.py +682 -0
topos/core/table_layers.py +45 -0
topos/core/types.py +13 -0
topos/data_explorer_table_prefs.py +150 -0
topos/engine/__init__.py +29 -0
topos/engine/backends/__init__.py +50 -0
topos/engine/backends/base.py +21 -0
topos/engine/backends/huggingface.py +151 -0
topos/engine/backends/ollama.py +181 -0
topos/engine/backends/stub.py +22 -0
topos/engine/engine.py +165 -0
topos/engine/intake.py +32 -0
topos/engine/queue_manager.py +112 -0
topos/engine/registration.py +126 -0
topos/engine/result_formatter.py +38 -0
topos/engine/router.py +19 -0
topos/engine/scoped_token.py +82 -0
topos/engine/tasks.py +154 -0
topos/engine/transport.py +44 -0
topos/engine/usage_guard.py +100 -0
topos/engine/usage_observation.py +129 -0
topos/engine/validator.py +23 -0
topos/enrichment/__init__.py +1 -0
topos/enrichment/derived_tables.py +214 -0
topos/enrichment/jobs/__init__.py +30 -0
topos/enrichment/jobs/base.py +54 -0
topos/enrichment/jobs/canonical/__init__.py +1 -0
topos/enrichment/jobs/canonical/embeddings_job.py +27 -0
topos/enrichment/jobs/canonical/emo_27_job.py +97 -0
topos/enrichment/jobs/canonical/entities_job.py +27 -0
topos/enrichment/jobs/canonical/sentiment_job.py +27 -0
topos/enrichment/jobs/canonical/topics_job.py +27 -0
topos/enrichment/jobs/raw/__init__.py +1 -0
topos/enrichment/jobs/raw/attachments_job.py +12 -0
topos/enrichment/jobs/raw/language_job.py +12 -0
topos/enrichment/jobs/raw/time_normalization_job.py +12 -0
topos/enrichment/jobs/raw/tool_calls_job.py +12 -0
topos/enrichment/models/__init__.py +1 -0
topos/enrichment/models/manager.py +8 -0
topos/enrichment/models/registry.py +71 -0
topos/enrichment/models/versioning.py +8 -0
topos/enrichment/orchestrator.py +177 -0
topos/enrichment/processor.py +17 -0
topos/enrichment/progress_bar.py +122 -0
topos/enrichment/website_classifier.py +31 -0
topos/filter_lab/__init__.py +1 -0
topos/filter_lab/bundles.py +300 -0
topos/filter_lab/schema.py +86 -0
topos/filter_lab/service.py +167 -0
topos/filter_lab/store.py +374 -0
topos/filter_lab/worker.py +250 -0
topos/hosted_pool_lease.py +153 -0
topos/ingestion/__init__.py +1 -0
topos/ingestion/checkpoints/__init__.py +6 -0
topos/ingestion/checkpoints/checkpoint_store.py +24 -0
topos/ingestion/checkpoints/sqlite_checkpoint_store.py +82 -0
topos/ingestion/ingest_helpers.py +504 -0
topos/ingestion/jobs.py +91 -0
topos/ingestion/local_sync.py +823 -0
topos/ingestion/log_preview.py +21 -0
topos/ingestion/manager.py +1100 -0
topos/ingestion/parser.py +174 -0
topos/ingestion/parsers/__init__.py +32 -0
topos/ingestion/parsers/base.py +24 -0
topos/ingestion/parsers/browser_parser.py +171 -0
topos/ingestion/parsers/calendar_parser.py +21 -0
topos/ingestion/parsers/chatgpt_conversation_flattener.py +266 -0
topos/ingestion/parsers/chatgpt_parser.py +67 -0
topos/ingestion/parsers/grok_parser.py +21 -0
topos/ingestion/parsers/messenger_parser.py +97 -0
topos/ingestion/progress.py +54 -0
topos/ingestion/sources/__init__.py +20 -0
topos/ingestion/sources/base.py +39 -0
topos/ingestion/sources/calendar.py +29 -0
topos/ingestion/sources/chatgpt.py +29 -0
topos/ingestion/sources/contact_importers.py +274 -0
topos/ingestion/sources/grok.py +29 -0
topos/ingestion/sources/imessage_reader.py +479 -0
topos/ingestion/sources/signal_export_parser.py +132 -0
topos/ingestion/sources/signal_reader.py +491 -0
topos/ingestion/state_machine.py +70 -0
topos/ingestion/triggers/__init__.py +1 -0
topos/ingestion/triggers/file_trigger.py +36 -0
topos/ingestion/triggers/sqlite_trigger.py +18 -0
topos/ingestion/validation/__init__.py +1 -0
topos/ingestion/validation/base.py +27 -0
topos/ingestion/validation/schema_registry.py +111 -0
topos/ingestion/validation/schema_validator.py +13 -0
topos/lineage/__init__.py +1 -0
topos/lineage/provenance.py +9 -0
topos/lineage/tracker.py +9 -0
topos/mcp_stdio_proxy.py +83 -0
topos/observability/__init__.py +1 -0
topos/observability/alerts.py +7 -0
topos/observability/metrics.py +25 -0
topos/observability/tracing.py +18 -0
topos/openai_client.py +69 -0
topos/projections/__init__.py +1 -0
topos/projections/vector_index/__init__.py +1 -0
topos/projections/vector_index/base.py +21 -0
topos/projections/vector_index/builders.py +11 -0
topos/projections/vector_index/health_checks.py +5 -0
topos/rate_limit.py +43 -0
topos/sanitization/__init__.py +16 -0
topos/sanitization/ollama_transforms.py +276 -0
topos/scope_resolution.py +89 -0
topos/services/__init__.py +1 -0
topos/services/container.py +46 -0
topos/services/embeddings/__init__.py +1 -0
topos/services/embeddings/base.py +7 -0
topos/services/embeddings/local.py +9 -0
topos/services/embeddings/remote.py +9 -0
topos/services/interfaces.py +40 -0
topos/services/llm/__init__.py +1 -0
topos/services/llm/base.py +7 -0
topos/services/llm/openai.py +126 -0
topos/services/local.py +123 -0
topos/services/postgres.py +385 -0
topos/sources/__init__.py +6 -0
topos/sources/definitions.py +114 -0
topos/sources/install_service.py +836 -0
topos/sources/registry.py +263 -0
topos/sources/runtime_install.py +427 -0
topos/storage/__init__.py +1 -0
topos/storage/canonical/__init__.py +18 -0
topos/storage/canonical/ai_chat/__init__.py +22 -0
topos/storage/canonical/ai_chat/canonicalizer.py +147 -0
topos/storage/canonical/ai_chat/mapper.py +168 -0
topos/storage/canonical/ai_chat/model.py +87 -0
topos/storage/canonical/ai_chat/tables.py +179 -0
topos/storage/canonical/canonical_store.py +24 -0
topos/storage/canonical/conversations_tables.py +1020 -0
topos/storage/canonical/mapping_store.py +30 -0
topos/storage/canonical/postgres.py +10 -0
topos/storage/db/__init__.py +1 -0
topos/storage/db/client.py +8 -0
topos/storage/db/migrations/__init__.py +1 -0
topos/storage/db/migrations/stage9_column_renames.py +78 -0
topos/storage/db/paths.py +122 -0
topos/storage/db/postgres.py +240 -0
topos/storage/db/schema.py +6 -0
topos/storage/enrichment/__init__.py +1 -0
topos/storage/enrichment/canonical_enrichment_store.py +7 -0
topos/storage/enrichment/raw_enrichment_store.py +18 -0
topos/storage/normalized/__init__.py +1 -0
topos/storage/normalized/normalized_store.py +24 -0
topos/storage/oplog/__init__.py +1 -0
topos/storage/oplog/decision.py +6 -0
topos/storage/oplog/oplog_store.py +17 -0
topos/storage/oplog/postgres.py +10 -0
topos/storage/projections/__init__.py +1 -0
topos/storage/projections/index_ops_store.py +6 -0
topos/storage/projections/vector_index_store.py +6 -0
topos/storage/raw/__init__.py +1 -0
topos/storage/raw/browser_flat_tables.py +303 -0
topos/storage/raw/file_store.py +100 -0
topos/storage/raw/raw_store.py +29 -0
topos/storage/raw/raw_tables_manager.py +295 -0
topos/storage/raw/sqlite_raw_store.py +17 -0
topos/storage/security/encryption.py +21 -0
topos/storage/signal_identity.py +71 -0
topos/storage/source_settings.py +116 -0
topos/storage/user_identity.py +69 -0
topos/sync/__init__.py +5 -0
topos/sync/client.py +272 -0
topos/sync_handlers.py +70 -0
topos/testing/__init__.py +1 -0
topos/testing/lifespan.py +7 -0
topos/uma_contact_enrichment.py +1032 -0
topos/uma_filters.py +669 -0
topos/uma_resource_id.py +24 -0
topos/uma_rpt.py +69 -0
topos/utils/base_object.py +61 -0
topos/websocket_client.py +21 -0
topos_node-0.1.0.dist-info/METADATA +199 -0
topos_node-0.1.0.dist-info/RECORD +249 -0
topos_node-0.1.0.dist-info/WHEEL +5 -0
topos_node-0.1.0.dist-info/entry_points.txt +2 -0
topos_node-0.1.0.dist-info/licenses/LICENSE +201 -0
topos_node-0.1.0.dist-info/top_level.txt +2 -0

topos/core/table_layers.py ADDED Viewed

@@ -0,0 +1,45 @@
+"""Coarse table layer labels for list_database_tables (system / raw / enrichment / canonical)."""
+from __future__ import annotations
+from typing import Dict, Tuple
+TableLayerKind = str  # "system" | "raw" | "enrichment" | "canonical"
+_LAYER_LABELS: Dict[TableLayerKind, str] = {
+    "system": "Topos system",
+    "raw": "Raw",
+    "enrichment": "Enrichment",
+    "canonical": "Canonical",
+}
+# Maps engine list_database_tables category keys to coarse layer kinds.
+_CATEGORY_TO_LAYER: Dict[str, TableLayerKind] = {
+    "system": "system",
+    "enrichment_system": "system",
+    "raw_retention": "raw",
+    "raw_enrichment": "enrichment",
+    "browser_flat": "raw",
+    "source": "raw",
+    "canonical": "canonical",
+    "canonical_enrichment": "enrichment",
+    "other": "raw",
+}
+def layer_kind_for_category(category_key: str) -> TableLayerKind:
+    key = (category_key or "").strip()
+    return _CATEGORY_TO_LAYER.get(key, "raw")
+def layer_label_for_kind(layer_kind: str) -> str:
+    return _LAYER_LABELS.get((layer_kind or "").strip(), "Raw")
+def layer_for_category(category_key: str) -> Tuple[TableLayerKind, str]:
+    kind = layer_kind_for_category(category_key)
+    return kind, layer_label_for_kind(kind)
+def layer_kind_labels() -> Dict[TableLayerKind, str]:
+    return dict(_LAYER_LABELS)

topos/core/types.py ADDED Viewed

@@ -0,0 +1,13 @@
+"""Shared types for Topos."""
+from __future__ import annotations
+from typing import Any, Dict, TypedDict
+JsonDict = Dict[str, Any]
+class HealthStatus(TypedDict):
+    status: str
+    time: float
+    cloud_connected: bool | None

topos/data_explorer_table_prefs.py ADDED Viewed

@@ -0,0 +1,150 @@
+from __future__ import annotations
+import json
+import logging
+import sqlite3
+from typing import Any, Dict, Optional
+from topos.core.state import get_engine_config_value, set_engine_config_value
+logger = logging.getLogger("topos.data_explorer_table_prefs")
+PREFS_VERSION = 1
+MAX_TABLE_NAME_LEN = 256
+MAX_USER_ID_LEN = 256
+MAX_PREFS_BYTES = 32 * 1024
+MIN_COLUMN_WIDTH = 80
+MAX_COLUMN_WIDTH = 480
+def build_table_prefs_config_key(user_id: str, table_name: str) -> str:
+    uid = str(user_id or "").strip()
+    name = str(table_name or "").strip()
+    return f"data_explorer_table_prefs:v1:{uid}:{name}"
+def _clamp_column_width(width: Any, fallback: int = 160) -> int:
+    try:
+        n = float(width)
+    except (TypeError, ValueError):
+        n = float(fallback)
+    if n != n or n < 0:
+        n = float(fallback)
+    return int(min(MAX_COLUMN_WIDTH, max(MIN_COLUMN_WIDTH, n)))
+def normalize_table_prefs_payload(raw: Any) -> Dict[str, Any]:
+    if not isinstance(raw, dict):
+        raise ValueError("INVALID_PREFS")
+    column_widths_raw = raw.get("columnWidths")
+    if not isinstance(column_widths_raw, dict):
+        raise ValueError("INVALID_PREFS")
+    column_widths: Dict[str, int] = {}
+    for key, width in column_widths_raw.items():
+        col = str(key).strip()
+        if not col:
+            continue
+        column_widths[col] = _clamp_column_width(width)
+    prefs: Dict[str, Any] = {"columnWidths": column_widths}
+    sort = raw.get("sort")
+    if sort is not None:
+        if not isinstance(sort, dict):
+            raise ValueError("INVALID_SORT")
+        column_id = str(sort.get("columnId") or "").strip()
+        direction = str(sort.get("direction") or "").strip().lower()
+        if not column_id or direction not in {"asc", "desc"}:
+            raise ValueError("INVALID_SORT")
+        prefs["sort"] = {"columnId": column_id, "direction": direction}
+    for field in ("hiddenColumns", "columnOrder"):
+        values = raw.get(field)
+        if values is None:
+            continue
+        if not isinstance(values, list):
+            raise ValueError("INVALID_PREFS")
+        cleaned = [str(v).strip() for v in values if str(v).strip()]
+        if cleaned:
+            prefs[field] = cleaned
+    envelope = {"version": PREFS_VERSION, **prefs}
+    serialized = json.dumps(envelope, separators=(",", ":"), default=str)
+    if len(serialized.encode("utf-8")) > MAX_PREFS_BYTES:
+        raise ValueError("PREFS_TOO_LARGE")
+    return envelope
+def _validate_table_name(table_name: str) -> str:
+    name = str(table_name or "").strip()
+    if not name or len(name) > MAX_TABLE_NAME_LEN:
+        raise ValueError("INVALID_TABLE_NAME")
+    return name
+def _validate_user_id(user_id: str) -> str:
+    uid = str(user_id or "").strip()
+    if not uid or len(uid) > MAX_USER_ID_LEN:
+        raise ValueError("INVALID_USER_ID")
+    return uid
+def get_table_prefs(
+    conn: sqlite3.Connection,
+    *,
+    user_id: str,
+    table_name: str,
+) -> Optional[Dict[str, Any]]:
+    uid = _validate_user_id(user_id)
+    name = _validate_table_name(table_name)
+    raw = get_engine_config_value(conn, build_table_prefs_config_key(uid, name))
+    if not raw:
+        return None
+    try:
+        parsed = json.loads(raw)
+    except json.JSONDecodeError:
+        return None
+    if not isinstance(parsed, dict):
+        return None
+    try:
+        return normalize_table_prefs_payload(parsed)
+    except ValueError:
+        return None
+def put_table_prefs(
+    conn: sqlite3.Connection,
+    *,
+    user_id: str,
+    table_name: str,
+    prefs: Dict[str, Any],
+) -> Dict[str, Any]:
+    uid = _validate_user_id(user_id)
+    name = _validate_table_name(table_name)
+    normalized = normalize_table_prefs_payload(prefs)
+    set_engine_config_value(conn, build_table_prefs_config_key(uid, name), json.dumps(normalized))
+    logger.info(
+        "data_explorer_table_prefs_saved user_id=%s table_name=%s columns=%d",
+        uid[:8],
+        name,
+        len(normalized.get("columnWidths") or {}),
+    )
+    return normalized
+def delete_table_prefs(
+    conn: sqlite3.Connection,
+    *,
+    user_id: str,
+    table_name: str,
+) -> bool:
+    uid = _validate_user_id(user_id)
+    name = _validate_table_name(table_name)
+    key = build_table_prefs_config_key(uid, name)
+    existing = get_engine_config_value(conn, key)
+    if not existing:
+        return False
+    conn.execute("DELETE FROM engine_config WHERE key = ?", (key,))
+    conn.commit()
+    logger.info(
+        "data_explorer_table_prefs_deleted user_id=%s table_name=%s",
+        uid[:8],
+        name,
+    )
+    return True

topos/engine/__init__.py ADDED Viewed

@@ -0,0 +1,29 @@
+"""Topos Engine: unified runtime for ML/LLM processing (enrichments, transformations, queries)."""
+from .engine import Engine
+from .tasks import (
+    ExecutionMeta,
+    ExecutionSpec,
+    ModelRequest,
+    ProcessingResult,
+    ProcessingTask,
+    Provenance,
+    RequestedBy,
+    TaskOptions,
+    build_task,
+    build_url_classification_task,
+)
+__all__ = [
+    "Engine",
+    "ProcessingTask",
+    "ProcessingResult",
+    "ModelRequest",
+    "ExecutionSpec",
+    "TaskOptions",
+    "RequestedBy",
+    "Provenance",
+    "ExecutionMeta",
+    "build_task",
+    "build_url_classification_task",
+]

topos/engine/backends/__init__.py ADDED Viewed

@@ -0,0 +1,50 @@
+"""Backend adapters for model inference."""
+from __future__ import annotations
+import threading
+from typing import Any, Dict, Optional
+from .base import BackendAdapter
+from .huggingface import HuggingFaceAdapter
+from .ollama import OllamaAdapter
+from .stub import StubBackendAdapter, get_stub_adapter
+_huggingface_singleton: HuggingFaceAdapter | None = None
+_ollama_singleton: OllamaAdapter | None = None
+_huggingface_lock = threading.Lock()
+_ollama_lock = threading.Lock()
+__all__ = [
+    "BackendAdapter",
+    "HuggingFaceAdapter",
+    "OllamaAdapter",
+    "StubBackendAdapter",
+    "get_stub_adapter",
+    "get_huggingface_adapter",
+    "get_ollama_adapter",
+]
+def get_huggingface_adapter() -> HuggingFaceAdapter:
+    """Return the shared HuggingFace adapter (loads models on first use, cached afterward)."""
+    global _huggingface_singleton
+    if _huggingface_singleton is not None:
+        return _huggingface_singleton
+    with _huggingface_lock:
+        if _huggingface_singleton is None:
+            _huggingface_singleton = HuggingFaceAdapter()
+        return _huggingface_singleton
+def get_ollama_adapter() -> OllamaAdapter:
+    """Return the shared Ollama adapter (uses ENGINE_OLLAMA_BASE_URL from config)."""
+    global _ollama_singleton
+    if _ollama_singleton is not None:
+        return _ollama_singleton
+    with _ollama_lock:
+        if _ollama_singleton is None:
+            _ollama_singleton = OllamaAdapter()
+        return _ollama_singleton

topos/engine/backends/base.py ADDED Viewed

@@ -0,0 +1,21 @@
+"""Backend adapter protocol for model inference (PRD §8.2)."""
+from __future__ import annotations
+from typing import Any, Dict, Optional, Protocol
+class BackendAdapter(Protocol):
+    """Protocol for model backends (Ollama, HuggingFace)."""
+    def load_model(self, model_name: str, config: Optional[Dict[str, Any]] = None) -> None:
+        """Load or ensure model is loaded. Optional cache."""
+        ...
+    def run_inference(self, payload: Dict[str, Any], config: Optional[Dict[str, Any]] = None) -> Dict[str, Any]:
+        """Run inference; payload is task input; config may include subtype, model, etc. Returns output dict."""
+        ...
+    def unload_model(self, model_name: str) -> None:
+        """Unload model to free memory."""
+        ...

topos/engine/backends/huggingface.py ADDED Viewed

@@ -0,0 +1,151 @@
+"""HuggingFace backend adapter: url_classification and emotion_classification."""
+from __future__ import annotations
+import logging
+import threading
+from typing import Any, Dict, Optional
+logger = logging.getLogger("topos.engine.huggingface")
+# Default models (same as current website_classifier and emo_27_job)
+DEFAULT_URL_CLASSIFICATION_MODEL = "KnutJaegersberg/website-classifier"
+DEFAULT_EMOTION_MODEL = "SamLowe/roberta-base-go_emotions"
+class HuggingFaceAdapter:
+    """BackendAdapter for HuggingFace: text-classification pipeline and go_emotions model."""
+    def __init__(self) -> None:
+        self._url_pipeline: Any = None
+        self._url_lock = threading.Lock()
+        self._emotion_model: Any = None
+        self._emotion_tokenizer: Any = None
+        self._emotion_loaded = False
+        self._emotion_lock = threading.Lock()
+    def load_model(self, model_name: str, config: Optional[Dict[str, Any]] = None) -> None:
+        """Load model by name; we load on first run_inference per subtype instead."""
+        pass
+    def ensure_model(self, model_name: str, subtype: Optional[str] = None) -> bool:
+        """
+        Ensure the model is downloaded (e.g. from HuggingFace Hub). Downloads if not present.
+        Returns True if a download was triggered (caller may clean up cache later), False if already in cache.
+        Logs when download starts; Hub may show progress via tqdm if enabled.
+        """
+        try:
+            from huggingface_hub import snapshot_download
+        except ImportError:
+            return False
+        logger.info("Downloading model %s (huggingface)...", model_name)
+        try:
+            # tqdm_enabled=True lets HuggingFace show a progress bar when available
+            snapshot_download(repo_id=model_name, tqdm_enabled=True)
+        except Exception:
+            logger.exception("Failed to download model %s", model_name)
+            return False
+        logger.info("Model %s (huggingface) download complete.", model_name)
+        return True
+    def _get_url_pipeline(self, model_name: str):
+        with self._url_lock:
+            if self._url_pipeline is not None:
+                return self._url_pipeline
+            from transformers import pipeline
+            self._url_pipeline = pipeline(
+                task="text-classification",
+                model=model_name,
+            )
+            return self._url_pipeline
+    def _get_emotion_model(self, model_name: str):
+        with self._emotion_lock:
+            if self._emotion_loaded:
+                return self._emotion_model, self._emotion_tokenizer
+            from transformers import AutoModelForSequenceClassification, AutoTokenizer
+            import torch
+            self._emotion_tokenizer = AutoTokenizer.from_pretrained(model_name)
+            self._emotion_model = AutoModelForSequenceClassification.from_pretrained(model_name)
+            self._emotion_model.eval()
+            self._emotion_loaded = True
+            return self._emotion_model, self._emotion_tokenizer
+    def run_inference(self, payload: Dict[str, Any], config: Optional[Dict[str, Any]] = None) -> Dict[str, Any]:
+        """Dispatch by config.subtype to url_classification or emotion_classification."""
+        config = config or {}
+        subtype = config.get("subtype") or ""
+        model = config.get("model") or ""
+        if subtype == "url_classification":
+            return self._run_url_classification(payload, model)
+        if subtype in ("emotion_classification", "emo_27"):
+            return self._run_emotion_classification(payload, model)
+        # Unknown subtype: return error-like output so formatter can set status
+        return {"error": f"Unknown subtype: {subtype}", "status": "unsupported"}
+    def _run_url_classification(self, payload: Dict[str, Any], model_name: str) -> Dict[str, Any]:
+        """Same behavior as WebsiteUrlClassifier."""
+        url = payload.get("url") or ""
+        title = payload.get("title") or ""
+        if not isinstance(url, str) or not url.strip():
+            return {"error": "url must be a non-empty string", "category": "unknown", "confidence": 0.0, "model": model_name or DEFAULT_URL_CLASSIFICATION_MODEL}
+        model = model_name or DEFAULT_URL_CLASSIFICATION_MODEL
+        pipeline = self._get_url_pipeline(model)
+        clean_url = url.strip()
+        clean_title = (title or "").strip()
+        text = f"{clean_url} [SEP] {clean_title}" if clean_title else clean_url
+        result = pipeline(text, truncation=True, top_k=1)
+        top_result = result[0] if isinstance(result, list) and result else {}
+        return {
+            "category": top_result.get("label", "unknown"),
+            "confidence": float(top_result.get("score", 0.0) or 0.0),
+            "model": model,
+        }
+    def _run_emotion_classification(self, payload: Dict[str, Any], model_name: str) -> Dict[str, Any]:
+        """Same behavior as Emo27Job._classify_emotion."""
+        text = payload.get("text") or payload.get("content") or ""
+        if not text or not isinstance(text, str):
+            return {"error": "text or content required", "emotion_label": None, "confidence": None, "all_emotions": [], "model": model_name or DEFAULT_EMOTION_MODEL}
+        model = model_name or DEFAULT_EMOTION_MODEL
+        import torch
+        emo_model, tokenizer = self._get_emotion_model(model)
+        inputs = tokenizer(
+            text,
+            return_tensors="pt",
+            truncation=True,
+            max_length=512,
+            padding=True,
+        )
+        with torch.no_grad():
+            outputs = emo_model(**inputs)
+            probabilities = torch.nn.functional.softmax(outputs.logits[0], dim=-1)
+        labels = getattr(emo_model.config, "id2label", {}) or {}
+        top_k = min(5, len(labels))
+        top_probs, top_indices = torch.topk(probabilities, top_k)
+        all_emotions = []
+        for prob, idx in zip(top_probs, top_indices):
+            label_id = idx.item()
+            label = labels.get(label_id, f"label_{label_id}")
+            confidence = prob.item()
+            if confidence > 0.1:
+                all_emotions.append({"label": label, "confidence": float(confidence)})
+        top = all_emotions[0] if all_emotions else None
+        return {
+            "emotion_label": top["label"] if top else None,
+            "confidence": top["confidence"] if top else None,
+            "all_emotions": all_emotions,
+            "model": model,
+        }
+    def unload_model(self, model_name: str) -> None:
+        """Clear cached pipeline/model (simplified: clear if name matches)."""
+        if model_name == DEFAULT_URL_CLASSIFICATION_MODEL or "website-classifier" in (model_name or ""):
+            with self._url_lock:
+                self._url_pipeline = None
+        if model_name == DEFAULT_EMOTION_MODEL or "go_emotions" in (model_name or ""):
+            with self._emotion_lock:
+                self._emotion_model = None
+                self._emotion_tokenizer = None
+                self._emotion_loaded = False

topos/engine/backends/ollama.py ADDED Viewed

@@ -0,0 +1,181 @@
+"""Ollama backend adapter: HTTP API for local LLM inference."""
+from __future__ import annotations
+import json
+import logging
+import urllib.request
+import urllib.error
+from typing import Any, Dict, List, Optional
+logger = logging.getLogger("topos.engine.ollama")
+class OllamaAdapter:
+    """BackendAdapter for Ollama (http://localhost:11434)."""
+    def __init__(self, base_url: Optional[str] = None) -> None:
+        if base_url is None:
+            try:
+                from ...config.settings import settings
+                base_url = getattr(settings, "engine_ollama_base_url", None) or "http://localhost:11434"
+            except Exception:
+                base_url = "http://localhost:11434"
+        self._base_url = str(base_url).rstrip("/")
+    def list_models(self) -> List[str]:
+        """Return list of model names available on the server (from /api/tags)."""
+        req = urllib.request.Request(f"{self._base_url}/api/tags", method="GET")
+        try:
+            with urllib.request.urlopen(req, timeout=10) as resp:
+                data = json.loads(resp.read().decode())
+                return [m.get("name", "") for m in data.get("models", []) if m.get("name")]
+        except Exception:
+            return []
+    def pull_model(self, model_name: str, *, stream: bool = True) -> None:
+        """Download the model from the registry. Logs progress when stream=True. Raises on failure."""
+        body = {"model": model_name, "stream": stream}
+        req = urllib.request.Request(
+            f"{self._base_url}/api/pull",
+            data=json.dumps(body).encode("utf-8"),
+            headers={"Content-Type": "application/json"},
+            method="POST",
+        )
+        logger.info("Downloading model %s (ollama)...", model_name)
+        with urllib.request.urlopen(req, timeout=3600) as resp:
+            if stream:
+                last_pct = -1
+                for line in resp:
+                    if not line.strip():
+                        continue
+                    try:
+                        event = json.loads(line.decode())
+                    except json.JSONDecodeError:
+                        continue
+                    status = event.get("status", "")
+                    total = event.get("total") or 0
+                    completed = event.get("completed") or 0
+                    if total and total > 0 and completed >= 0:
+                        pct = min(100, int(100 * completed / total))
+                        if pct != last_pct and (pct % 10 == 0 or pct == 100):
+                            last_pct = pct
+                            total_mb = total / (1024 * 1024)
+                            done_mb = completed / (1024 * 1024)
+                            bar_len = 10
+                            filled = int(bar_len * pct / 100) if pct < 100 else bar_len
+                            bar = "=" * filled + ">" * (1 if filled < bar_len and pct > 0 else 0) + " " * (bar_len - filled - (1 if filled < bar_len and pct > 0 else 0))
+                            logger.info(
+                                "Pulling model %s: [%s] %d%% (%.1f / %.1f MB)",
+                                model_name, bar[:bar_len], pct, done_mb, total_mb,
+                            )
+                    elif status:
+                        logger.debug("Pulling model %s: %s", model_name, status)
+                logger.info("Model %s (ollama) pull complete.", model_name)
+            else:
+                json.loads(resp.read().decode())
+                logger.info("Model %s (ollama) pull complete.", model_name)
+    def delete_model(self, model_name: str) -> None:
+        """Remove the model from the server. Raises on failure."""
+        req = urllib.request.Request(
+            f"{self._base_url}/api/delete",
+            data=json.dumps({"model": model_name}).encode("utf-8"),
+            headers={"Content-Type": "application/json"},
+            method="DELETE",
+        )
+        try:
+            with urllib.request.urlopen(req, timeout=30) as resp:
+                resp.read()
+        except urllib.error.HTTPError as e:
+            if e.code != 404:
+                raise
+    def ensure_model(self, model_name: str) -> bool:
+        """
+        Ensure the model is available: pull if not present.
+        Returns True if we pulled the model (caller may want to remove it later), False if already present.
+        Logs download start and progress (when streaming).
+        """
+        names = self.list_models()
+        for n in names:
+            if n == model_name or model_name in n or (model_name.split(":")[0] == n.split(":")[0] if ":" in n else n == model_name.split(":")[0]):
+                return False
+        self.pull_model(model_name, stream=True)
+        return True
+    def load_model(self, model_name: str, config: Optional[Dict[str, Any]] = None) -> None:
+        """Load model into memory by running a minimal generate. Idempotent if already loaded."""
+        self._generate(model_name, " ", num_predict=1, keep_alive=None)
+    def run_inference(self, payload: Dict[str, Any], config: Optional[Dict[str, Any]] = None) -> Dict[str, Any]:
+        """Call Ollama /api/generate; map payload to prompt and parse response."""
+        config = config or {}
+        subtype = config.get("subtype") or ""
+        model = config.get("model") or "llama3.2:3b"
+        text = payload.get("text") or payload.get("content") or payload.get("url") or ""
+        try:
+            if subtype == "emotion_classification" or subtype == "emo_27":
+                prompt = (
+                    f'Classify the emotion of this text in one word or short phrase. '
+                    f'Reply with JSON only: {{"emotion_label": "...", "confidence": 0.9}}\n\nText: {text}'
+                )
+            else:
+                prompt = str(payload) if payload else ""
+            response_text = self._generate(model, prompt, num_predict=None, keep_alive=None)
+            out = self._parse_response(response_text, subtype, model)
+            out["model"] = model
+            return out
+        except Exception as e:
+            return {"error": str(e), "model": model, "emotion_label": None, "confidence": None, "all_emotions": []}
+    def _generate(
+        self,
+        model: str,
+        prompt: str,
+        *,
+        num_predict: Optional[int] = None,
+        keep_alive: Optional[str] = None,
+    ) -> str:
+        body: Dict[str, Any] = {"model": model, "prompt": prompt, "stream": False}
+        if keep_alive is not None:
+            body["keep_alive"] = keep_alive
+        if num_predict is not None:
+            body["options"] = {"num_predict": num_predict}
+        req = urllib.request.Request(
+            f"{self._base_url}/api/generate",
+            data=json.dumps(body).encode("utf-8"),
+            headers={"Content-Type": "application/json"},
+            method="POST",
+        )
+        try:
+            with urllib.request.urlopen(req, timeout=60) as resp:
+                data = json.loads(resp.read().decode())
+                return data.get("response", "")
+        except urllib.error.URLError as e:
+            raise RuntimeError(f"Ollama request failed: {e}") from e
+    def _parse_response(self, response_text: str, subtype: str, model: str) -> Dict[str, Any]:
+        """Try to parse JSON from response; else return raw."""
+        response_text = (response_text or "").strip()
+        if subtype in ("emotion_classification", "emo_27"):
+            try:
+                # Try to find JSON in the response
+                start = response_text.find("{")
+                if start >= 0:
+                    end = response_text.rfind("}") + 1
+                    if end > start:
+                        obj = json.loads(response_text[start:end])
+                        return {
+                            "emotion_label": obj.get("emotion_label"),
+                            "confidence": obj.get("confidence"),
+                            "all_emotions": [{"label": obj.get("emotion_label"), "confidence": obj.get("confidence", 0)}],
+                        }
+            except (json.JSONDecodeError, KeyError):
+                pass
+            return {"emotion_label": response_text[:100] if response_text else None, "confidence": None, "all_emotions": []}
+        return {"output": response_text}
+    def unload_model(self, model_name: str) -> None:
+        """Unload model from memory by sending a minimal generate with keep_alive=0."""
+        self._generate(model_name, " ", num_predict=1, keep_alive="0")

topos/engine/backends/stub.py ADDED Viewed

@@ -0,0 +1,22 @@
+"""Stub backend adapter for when no real backend is configured."""
+from __future__ import annotations
+from typing import Any, Dict, Optional
+class StubBackendAdapter:
+    """Stub adapter: no real inference, returns fixed dict."""
+    def load_model(self, model_name: str, config: Optional[Dict[str, Any]] = None) -> None:
+        pass
+    def run_inference(self, payload: Dict[str, Any], config: Optional[Dict[str, Any]] = None) -> Dict[str, Any]:
+        return {"status": "stub", "message": "No backend configured; use Sprint 02+ for real inference"}
+    def unload_model(self, model_name: str) -> None:
+        pass
+def get_stub_adapter() -> StubBackendAdapter:
+    return StubBackendAdapter()