PyPI - tribalmemory - Versions diffs - 0.1.0__py3-none-any.whl - Mend

tribalmemory 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (51) hide show

tribalmemory/__init__.py +3 -0
tribalmemory/a21/__init__.py +38 -0
tribalmemory/a21/config/__init__.py +20 -0
tribalmemory/a21/config/providers.py +104 -0
tribalmemory/a21/config/system.py +184 -0
tribalmemory/a21/container/__init__.py +8 -0
tribalmemory/a21/container/container.py +212 -0
tribalmemory/a21/providers/__init__.py +32 -0
tribalmemory/a21/providers/base.py +241 -0
tribalmemory/a21/providers/deduplication.py +99 -0
tribalmemory/a21/providers/lancedb.py +232 -0
tribalmemory/a21/providers/memory.py +128 -0
tribalmemory/a21/providers/mock.py +54 -0
tribalmemory/a21/providers/openai.py +151 -0
tribalmemory/a21/providers/timestamp.py +88 -0
tribalmemory/a21/system.py +293 -0
tribalmemory/cli.py +298 -0
tribalmemory/interfaces.py +306 -0
tribalmemory/mcp/__init__.py +9 -0
tribalmemory/mcp/__main__.py +6 -0
tribalmemory/mcp/server.py +484 -0
tribalmemory/performance/__init__.py +1 -0
tribalmemory/performance/benchmarks.py +285 -0
tribalmemory/performance/corpus_generator.py +171 -0
tribalmemory/portability/__init__.py +1 -0
tribalmemory/portability/embedding_metadata.py +320 -0
tribalmemory/server/__init__.py +9 -0
tribalmemory/server/__main__.py +6 -0
tribalmemory/server/app.py +187 -0
tribalmemory/server/config.py +115 -0
tribalmemory/server/models.py +206 -0
tribalmemory/server/routes.py +378 -0
tribalmemory/services/__init__.py +15 -0
tribalmemory/services/deduplication.py +115 -0
tribalmemory/services/embeddings.py +273 -0
tribalmemory/services/import_export.py +506 -0
tribalmemory/services/memory.py +275 -0
tribalmemory/services/vector_store.py +360 -0
tribalmemory/testing/__init__.py +22 -0
tribalmemory/testing/embedding_utils.py +110 -0
tribalmemory/testing/fixtures.py +123 -0
tribalmemory/testing/metrics.py +256 -0
tribalmemory/testing/mocks.py +560 -0
tribalmemory/testing/semantic_expansions.py +91 -0
tribalmemory/utils.py +23 -0
tribalmemory-0.1.0.dist-info/METADATA +275 -0
tribalmemory-0.1.0.dist-info/RECORD +51 -0
tribalmemory-0.1.0.dist-info/WHEEL +5 -0
tribalmemory-0.1.0.dist-info/entry_points.txt +3 -0
tribalmemory-0.1.0.dist-info/licenses/LICENSE +190 -0
tribalmemory-0.1.0.dist-info/top_level.txt +1 -0

tribalmemory/portability/embedding_metadata.py ADDED Viewed

@@ -0,0 +1,320 @@
+"""Embedding metadata and portability utilities.
+Tracks embedding model info in export bundles so that imports can detect
+model mismatches and handle re-embedding when needed.
+Strategy chosen for Issue #5: **Metadata + optional re-embedding**.
+- Every export bundle includes an EmbeddingMetadata block documenting
+  which model/dimensions produced the embeddings.
+- On import, the system compares source and target metadata.
+- Three strategies: KEEP (use as-is), DROP (clear for re-generation),
+  AUTO (keep if compatible, drop if not).
+"""
+from __future__ import annotations
+import copy
+from dataclasses import dataclass, field
+from datetime import datetime, timezone
+from enum import Enum
+from typing import Optional
+from ..interfaces import MemoryEntry
+class ReembeddingStrategy(Enum):
+    """Strategy for handling embeddings on import."""
+    KEEP = "keep"     # Keep original embeddings regardless of model mismatch
+    DROP = "drop"     # Drop embeddings (caller must re-embed later)
+    AUTO = "auto"     # Keep if compatible, drop if not
+@dataclass
+class EmbeddingMetadata:
+    """Metadata about the embedding model used to generate vectors.
+    Attributes:
+        model_name: Model identifier (e.g. "text-embedding-3-small").
+        dimensions: Number of dimensions in the embedding vector.
+        provider: Optional provider name (e.g. "openai", "sentence-transformers").
+        created_at: When this metadata was created.
+    """
+    model_name: str
+    dimensions: int
+    provider: Optional[str] = None
+    created_at: Optional[str] = None
+    def is_compatible_with(self, other: EmbeddingMetadata) -> bool:
+        """Check if two embedding configurations are compatible.
+        Compatible means same model name AND same dimensions,
+        so vectors can be compared directly without re-embedding.
+        """
+        return (
+            self.model_name == other.model_name
+            and self.dimensions == other.dimensions
+        )
+    def to_dict(self) -> dict:
+        """Serialize to dict for JSON export."""
+        d: dict = {
+            "model_name": self.model_name,
+            "dimensions": self.dimensions,
+        }
+        if self.provider is not None:
+            d["provider"] = self.provider
+        if self.created_at is not None:
+            d["created_at"] = self.created_at
+        return d
+    @classmethod
+    def from_dict(cls, d: dict) -> EmbeddingMetadata:
+        """Deserialize from dict."""
+        return cls(
+            model_name=d["model_name"],
+            dimensions=d["dimensions"],
+            provider=d.get("provider"),
+            created_at=d.get("created_at"),
+        )
+@dataclass
+class EmbeddingManifest:
+    """Manifest included in portable bundles.
+    Extends the basic manifest with embedding metadata so importers
+    can determine compatibility.
+    """
+    schema_version: str
+    embedding_metadata: EmbeddingMetadata
+    memory_count: int
+    exported_at: Optional[str] = None
+    def to_dict(self) -> dict:
+        """Serialize to dict for JSON export."""
+        d: dict = {
+            "schema_version": self.schema_version,
+            "embedding": self.embedding_metadata.to_dict(),
+            "memory_count": self.memory_count,
+        }
+        if self.exported_at:
+            d["exported_at"] = self.exported_at
+        return d
+    @classmethod
+    def from_dict(cls, d: dict) -> EmbeddingManifest:
+        """Deserialize from dict."""
+        return cls(
+            schema_version=d["schema_version"],
+            embedding_metadata=EmbeddingMetadata.from_dict(d["embedding"]),
+            memory_count=d["memory_count"],
+            exported_at=d.get("exported_at"),
+        )
+@dataclass
+class PortableBundle:
+    """A portable memory bundle with embedding metadata.
+    Contains the manifest (with embedding info) and the memory entries.
+    Designed for JSON serialization to enable cross-system portability.
+    """
+    manifest: EmbeddingManifest
+    entries: list[MemoryEntry] = field(default_factory=list)
+    def to_dict(self) -> dict:
+        """Serialize the entire bundle to a dict."""
+        return {
+            "manifest": self.manifest.to_dict(),
+            "entries": [_entry_to_dict(e) for e in self.entries],
+        }
+    @classmethod
+    def from_dict(cls, d: dict) -> PortableBundle:
+        """Deserialize from dict."""
+        manifest = EmbeddingManifest.from_dict(d["manifest"])
+        entries = [_entry_from_dict(e) for e in d.get("entries", [])]
+        return cls(manifest=manifest, entries=entries)
+@dataclass
+class ImportResult:
+    """Result of importing a portable bundle.
+    Attributes:
+        entries: Imported memory entries (embeddings may be None if dropped).
+        needs_embedding: True if entries had embeddings cleared and need
+            re-embedding by the caller before they can be searched.
+        source_metadata: Embedding metadata from the source bundle.
+        target_metadata: Embedding metadata of the target system.
+        strategy_used: Which re-embedding strategy was applied.
+    """
+    entries: list[MemoryEntry]
+    needs_embedding: bool
+    source_metadata: EmbeddingMetadata
+    target_metadata: EmbeddingMetadata
+    strategy_used: ReembeddingStrategy
+def create_embedding_metadata(
+    model_name: str,
+    dimensions: int,
+    provider: Optional[str] = None,
+) -> EmbeddingMetadata:
+    """Create embedding metadata with current timestamp."""
+    return EmbeddingMetadata(
+        model_name=model_name,
+        dimensions=dimensions,
+        provider=provider,
+        created_at=datetime.now(timezone.utc).isoformat(),
+    )
+def needs_reembedding(
+    source: EmbeddingMetadata,
+    target: EmbeddingMetadata,
+) -> bool:
+    """Check if embeddings need to be regenerated.
+    Returns True if the source and target models are incompatible.
+    """
+    return not source.is_compatible_with(target)
+def create_portable_bundle(
+    entries: list[MemoryEntry],
+    embedding_metadata: EmbeddingMetadata,
+    schema_version: str = "1.0",
+) -> PortableBundle:
+    """Create a portable bundle from memory entries and embedding metadata.
+    Validates that any entry with an embedding has dimensions matching
+    the declared metadata.
+    Raises:
+        ValueError: If an entry's embedding dimensions don't match metadata.
+    """
+    for entry in entries:
+        if entry.embedding and len(entry.embedding) != embedding_metadata.dimensions:
+            raise ValueError(
+                f"Entry {entry.id} has {len(entry.embedding)} dimensions, "
+                f"expected {embedding_metadata.dimensions}"
+            )
+    manifest = EmbeddingManifest(
+        schema_version=schema_version,
+        embedding_metadata=embedding_metadata,
+        memory_count=len(entries),
+        exported_at=datetime.now(timezone.utc).isoformat(),
+    )
+    return PortableBundle(manifest=manifest, entries=list(entries))
+SUPPORTED_SCHEMA_VERSIONS = {"1.0"}
+def import_bundle(
+    bundle: PortableBundle,
+    target_metadata: EmbeddingMetadata,
+    strategy: ReembeddingStrategy = ReembeddingStrategy.AUTO,
+) -> ImportResult:
+    """Import a portable bundle with the given re-embedding strategy.
+    Args:
+        bundle: The bundle to import.
+        target_metadata: Embedding metadata of the target system.
+        strategy: How to handle embedding model mismatches.
+    Returns:
+        ImportResult with entries (possibly with cleared embeddings).
+    Raises:
+        ValueError: If bundle schema version is unsupported.
+    """
+    version = bundle.manifest.schema_version
+    if version not in SUPPORTED_SCHEMA_VERSIONS:
+        raise ValueError(
+            f"Unsupported schema version '{version}'. "
+            f"Supported: {SUPPORTED_SCHEMA_VERSIONS}"
+        )
+    source_meta = bundle.manifest.embedding_metadata
+    compatible = source_meta.is_compatible_with(target_metadata)
+    # Deep copy entries to avoid mutating the bundle
+    imported_entries = [_copy_entry(e) for e in bundle.entries]
+    should_drop = False
+    if strategy == ReembeddingStrategy.DROP:
+        should_drop = True
+    elif strategy == ReembeddingStrategy.AUTO and not compatible:
+        should_drop = True
+    # KEEP and AUTO-compatible: keep embeddings as-is
+    if should_drop:
+        for entry in imported_entries:
+            entry.embedding = None
+    return ImportResult(
+        entries=imported_entries,
+        needs_embedding=should_drop,
+        source_metadata=source_meta,
+        target_metadata=target_metadata,
+        strategy_used=strategy,
+    )
+# --- Serialization helpers for MemoryEntry ---
+def _entry_to_dict(entry: MemoryEntry) -> dict:
+    """Serialize a MemoryEntry to a dict."""
+    return {
+        "id": entry.id,
+        "content": entry.content,
+        "embedding": entry.embedding,
+        "source_instance": entry.source_instance,
+        "source_type": entry.source_type.value,
+        "created_at": entry.created_at.isoformat(),
+        "updated_at": entry.updated_at.isoformat(),
+        "tags": entry.tags,
+        "context": entry.context,
+        "confidence": entry.confidence,
+        "supersedes": entry.supersedes,
+        "related_to": entry.related_to,
+    }
+def _entry_from_dict(d: dict) -> MemoryEntry:
+    """Deserialize a MemoryEntry from a dict."""
+    from ..interfaces import MemorySource
+    return MemoryEntry(
+        id=d["id"],
+        content=d["content"],
+        embedding=d.get("embedding"),
+        source_instance=d.get("source_instance", "unknown"),
+        source_type=MemorySource(d.get("source_type", "unknown")),
+        created_at=datetime.fromisoformat(d["created_at"]),
+        updated_at=datetime.fromisoformat(d["updated_at"]),
+        tags=d.get("tags", []),
+        context=d.get("context"),
+        confidence=d.get("confidence", 1.0),
+        supersedes=d.get("supersedes"),
+        related_to=d.get("related_to", []),
+    )
+def _copy_entry(entry: MemoryEntry) -> MemoryEntry:
+    """Deep copy a MemoryEntry."""
+    return MemoryEntry(
+        id=entry.id,
+        content=entry.content,
+        embedding=list(entry.embedding) if entry.embedding else None,
+        source_instance=entry.source_instance,
+        source_type=entry.source_type,
+        created_at=entry.created_at,
+        updated_at=entry.updated_at,
+        tags=list(entry.tags),
+        context=entry.context,
+        confidence=entry.confidence,
+        supersedes=entry.supersedes,
+        related_to=list(entry.related_to),
+    )

tribalmemory/server/__init__.py ADDED Viewed

@@ -0,0 +1,9 @@
+"""Tribal Memory HTTP Server.
+FastAPI-based HTTP interface for tribal-memory service.
+Designed for integration with OpenClaw's memory-tribal extension.
+"""
+from .app import create_app, run_server
+__all__ = ["create_app", "run_server"]

tribalmemory/server/__main__.py ADDED Viewed

@@ -0,0 +1,6 @@
+"""Allow running as: python -m tribalmemory.server"""
+from .app import main
+if __name__ == "__main__":
+    main()

tribalmemory/server/app.py ADDED Viewed

@@ -0,0 +1,187 @@
+"""FastAPI application for tribal-memory service."""
+import logging
+from contextlib import asynccontextmanager
+from pathlib import Path
+from typing import Optional
+import uvicorn
+from fastapi import FastAPI
+from fastapi.middleware.cors import CORSMiddleware
+from ..services import create_memory_service, TribalMemoryService
+from .config import TribalMemoryConfig
+from .routes import router
+# Global service instance (set during lifespan)
+_memory_service: Optional[TribalMemoryService] = None
+_instance_id: Optional[str] = None
+logger = logging.getLogger("tribalmemory.server")
+@asynccontextmanager
+async def lifespan(app: FastAPI):
+    """Application lifespan manager."""
+    global _memory_service, _instance_id
+    config: TribalMemoryConfig = app.state.config
+    # Validate config
+    errors = config.validate()
+    if errors:
+        raise ValueError(f"Configuration errors: {errors}")
+    logger.info(f"Starting tribal-memory service (instance: {config.instance_id})")
+    # Create memory service
+    _instance_id = config.instance_id
+    _memory_service = create_memory_service(
+        instance_id=config.instance_id,
+        db_path=config.db.path,
+        openai_api_key=config.embedding.api_key,
+        api_base=config.embedding.api_base,
+        embedding_model=config.embedding.model,
+        embedding_dimensions=config.embedding.dimensions,
+    )
+    logger.info(f"Memory service initialized (db: {config.db.path})")
+    yield
+    # Cleanup
+    logger.info("Shutting down tribal-memory service")
+    _memory_service = None
+    _instance_id = None
+def create_app(config: Optional[TribalMemoryConfig] = None) -> FastAPI:
+    """Create FastAPI application.
+    Args:
+        config: Service configuration. If None, loads from environment.
+    Returns:
+        Configured FastAPI application.
+    """
+    if config is None:
+        config = TribalMemoryConfig.from_env()
+    app = FastAPI(
+        title="Tribal Memory",
+        description="Long-term memory service for AI agents with provenance tracking",
+        version="0.1.0",
+        lifespan=lifespan,
+    )
+    # Store config for lifespan access
+    app.state.config = config
+    # CORS middleware (localhost only)
+    # Uses regex to match any port on localhost - OpenClaw Gateway runs on
+    # user-configurable ports (default 18789). Server is bound to 127.0.0.1
+    # so only local processes can reach it regardless of CORS settings.
+    app.add_middleware(
+        CORSMiddleware,
+        allow_origin_regex=r"http://(localhost|127\.0\.0\.1)(:\d+)?",
+        allow_credentials=True,
+        allow_methods=["*"],
+        allow_headers=["*"],
+    )
+    # Include routes
+    app.include_router(router)
+    # Root endpoint
+    @app.get("/")
+    async def root():
+        return {
+            "service": "tribal-memory",
+            "version": "0.1.0",
+            "docs": "/docs",
+        }
+    return app
+def run_server(
+    config: Optional[TribalMemoryConfig] = None,
+    host: Optional[str] = None,
+    port: Optional[int] = None,
+    log_level: str = "info",
+):
+    """Run the HTTP server.
+    Args:
+        config: Service configuration. If None, loads from environment.
+        host: Override host from config.
+        port: Override port from config.
+        log_level: Logging level.
+    """
+    if config is None:
+        config = TribalMemoryConfig.from_env()
+    # Ensure db directory exists
+    db_path = Path(config.db.path)
+    db_path.parent.mkdir(parents=True, exist_ok=True)
+    app = create_app(config)
+    uvicorn.run(
+        app,
+        host=host or config.server.host,
+        port=port or config.server.port,
+        log_level=log_level,
+    )
+# CLI entry point
+def main():
+    """CLI entry point."""
+    import argparse
+    parser = argparse.ArgumentParser(description="Tribal Memory HTTP Server")
+    parser.add_argument(
+        "--config", "-c",
+        type=str,
+        default=None,
+        help="Path to config file (default: ~/.tribal-memory/config.yaml)",
+    )
+    parser.add_argument(
+        "--host",
+        type=str,
+        default=None,
+        help="Host to bind to (default: 127.0.0.1)",
+    )
+    parser.add_argument(
+        "--port", "-p",
+        type=int,
+        default=None,
+        help="Port to bind to (default: 18790)",
+    )
+    parser.add_argument(
+        "--log-level",
+        type=str,
+        default="info",
+        choices=["debug", "info", "warning", "error"],
+        help="Logging level",
+    )
+    args = parser.parse_args()
+    # Load config
+    if args.config:
+        config = TribalMemoryConfig.from_file(args.config)
+    else:
+        config = TribalMemoryConfig.from_env()
+    run_server(
+        config=config,
+        host=args.host,
+        port=args.port,
+        log_level=args.log_level,
+    )
+if __name__ == "__main__":
+    main()

tribalmemory/server/config.py ADDED Viewed

@@ -0,0 +1,115 @@
+"""Server configuration."""
+import os
+from dataclasses import dataclass, field
+from pathlib import Path
+from typing import Optional
+import yaml
+@dataclass
+class EmbeddingConfig:
+    """Embedding service configuration.
+    Supports OpenAI, Ollama, and any OpenAI-compatible embedding API.
+    For local Ollama embeddings (zero cloud, zero cost):
+        api_base: http://localhost:11434/v1
+        model: nomic-embed-text
+        dimensions: 768
+        # api_key not needed for local models
+    """
+    provider: str = "openai"
+    model: str = "text-embedding-3-small"
+    api_key: Optional[str] = None
+    api_base: Optional[str] = None
+    dimensions: int = 1536
+    def __post_init__(self):
+        # Resolve from environment if not set
+        if self.api_key is None:
+            self.api_key = os.environ.get("OPENAI_API_KEY")
+        if self.api_base is None:
+            self.api_base = os.environ.get("TRIBAL_MEMORY_EMBEDDING_API_BASE")
+@dataclass
+class DatabaseConfig:
+    """Database configuration."""
+    provider: str = "lancedb"
+    path: str = "~/.tribal-memory/lancedb"
+    uri: Optional[str] = None  # For cloud
+    def __post_init__(self):
+        # Expand home directory
+        self.path = str(Path(self.path).expanduser())
+@dataclass
+class ServerConfig:
+    """HTTP server configuration."""
+    host: str = "127.0.0.1"
+    port: int = 18790
+@dataclass
+class TribalMemoryConfig:
+    """Full service configuration."""
+    instance_id: str = "default"
+    db: DatabaseConfig = field(default_factory=DatabaseConfig)
+    embedding: EmbeddingConfig = field(default_factory=EmbeddingConfig)
+    server: ServerConfig = field(default_factory=ServerConfig)
+    @classmethod
+    def from_file(cls, path: str | Path) -> "TribalMemoryConfig":
+        """Load configuration from YAML file."""
+        path = Path(path).expanduser()
+        if not path.exists():
+            return cls()
+        with open(path) as f:
+            data = yaml.safe_load(f) or {}
+        return cls.from_dict(data)
+    @classmethod
+    def from_dict(cls, data: dict) -> "TribalMemoryConfig":
+        """Create configuration from dictionary."""
+        db_data = data.get("db", {})
+        embedding_data = data.get("embedding", {})
+        server_data = data.get("server", {})
+        return cls(
+            instance_id=data.get("instance_id", "default"),
+            db=DatabaseConfig(**db_data) if db_data else DatabaseConfig(),
+            embedding=EmbeddingConfig(**embedding_data) if embedding_data else EmbeddingConfig(),
+            server=ServerConfig(**server_data) if server_data else ServerConfig(),
+        )
+    @classmethod
+    def from_env(cls) -> "TribalMemoryConfig":
+        """Create configuration from environment variables."""
+        config_path = os.environ.get(
+            "TRIBAL_MEMORY_CONFIG",
+            "~/.tribal-memory/config.yaml"
+        )
+        return cls.from_file(config_path)
+    def validate(self) -> list[str]:
+        """Validate configuration, return list of errors."""
+        errors = []
+        # api_key is only required when using OpenAI (no custom api_base)
+        api_base = (self.embedding.api_base or "").strip()
+        is_local = (
+            api_base != ""
+            and "api.openai.com" not in api_base.lower()
+        )
+        if not self.embedding.api_key and not is_local:
+            errors.append("embedding.api_key is required (or set OPENAI_API_KEY)")
+        if not self.instance_id:
+            errors.append("instance_id is required")
+        return errors