PyPI - tribalmemory - Versions diffs - 0.1.0__py3-none-any.whl - Mend

tribalmemory 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (51) hide show

tribalmemory/__init__.py +3 -0
tribalmemory/a21/__init__.py +38 -0
tribalmemory/a21/config/__init__.py +20 -0
tribalmemory/a21/config/providers.py +104 -0
tribalmemory/a21/config/system.py +184 -0
tribalmemory/a21/container/__init__.py +8 -0
tribalmemory/a21/container/container.py +212 -0
tribalmemory/a21/providers/__init__.py +32 -0
tribalmemory/a21/providers/base.py +241 -0
tribalmemory/a21/providers/deduplication.py +99 -0
tribalmemory/a21/providers/lancedb.py +232 -0
tribalmemory/a21/providers/memory.py +128 -0
tribalmemory/a21/providers/mock.py +54 -0
tribalmemory/a21/providers/openai.py +151 -0
tribalmemory/a21/providers/timestamp.py +88 -0
tribalmemory/a21/system.py +293 -0
tribalmemory/cli.py +298 -0
tribalmemory/interfaces.py +306 -0
tribalmemory/mcp/__init__.py +9 -0
tribalmemory/mcp/__main__.py +6 -0
tribalmemory/mcp/server.py +484 -0
tribalmemory/performance/__init__.py +1 -0
tribalmemory/performance/benchmarks.py +285 -0
tribalmemory/performance/corpus_generator.py +171 -0
tribalmemory/portability/__init__.py +1 -0
tribalmemory/portability/embedding_metadata.py +320 -0
tribalmemory/server/__init__.py +9 -0
tribalmemory/server/__main__.py +6 -0
tribalmemory/server/app.py +187 -0
tribalmemory/server/config.py +115 -0
tribalmemory/server/models.py +206 -0
tribalmemory/server/routes.py +378 -0
tribalmemory/services/__init__.py +15 -0
tribalmemory/services/deduplication.py +115 -0
tribalmemory/services/embeddings.py +273 -0
tribalmemory/services/import_export.py +506 -0
tribalmemory/services/memory.py +275 -0
tribalmemory/services/vector_store.py +360 -0
tribalmemory/testing/__init__.py +22 -0
tribalmemory/testing/embedding_utils.py +110 -0
tribalmemory/testing/fixtures.py +123 -0
tribalmemory/testing/metrics.py +256 -0
tribalmemory/testing/mocks.py +560 -0
tribalmemory/testing/semantic_expansions.py +91 -0
tribalmemory/utils.py +23 -0
tribalmemory-0.1.0.dist-info/METADATA +275 -0
tribalmemory-0.1.0.dist-info/RECORD +51 -0
tribalmemory-0.1.0.dist-info/WHEEL +5 -0
tribalmemory-0.1.0.dist-info/entry_points.txt +3 -0
tribalmemory-0.1.0.dist-info/licenses/LICENSE +190 -0
tribalmemory-0.1.0.dist-info/top_level.txt +1 -0

tribalmemory/a21/providers/mock.py ADDED Viewed

@@ -0,0 +1,54 @@
+"""Mock providers for testing."""
+from datetime import datetime
+from typing import Optional
+from .base import (
+    EmbeddingProvider,
+    StorageProvider,
+    ProviderHealth,
+    ProviderStatus,
+)
+from ..config.providers import EmbeddingConfig
+from ...testing.embedding_utils import hash_to_embedding
+class MockEmbeddingProvider(EmbeddingProvider[EmbeddingConfig]):
+    """Mock embedding provider using deterministic hashing."""
+    def __init__(self, config: EmbeddingConfig):
+        super().__init__(config)
+    @property
+    def dimensions(self) -> int:
+        return self.config.dimensions
+    @property
+    def model_name(self) -> str:
+        return "mock-embedding"
+    async def initialize(self) -> None:
+        self._initialized = True
+    async def shutdown(self) -> None:
+        self._initialized = False
+    async def health_check(self) -> ProviderHealth:
+        return ProviderHealth(
+            status=ProviderStatus.HEALTHY,
+            latency_ms=0.1,
+            message="Mock provider always healthy"
+        )
+    async def embed(self, text: str) -> list[float]:
+        return self._hash_to_embedding(text)
+    async def embed_batch(self, texts: list[str]) -> list[list[float]]:
+        return [self._hash_to_embedding(t) for t in texts]
+    def _hash_to_embedding(self, text: str) -> list[float]:
+        """Convert text to deterministic embedding that preserves semantic similarity.
+        Delegates to shared utility for consistent behavior across mock implementations.
+        """
+        return hash_to_embedding(text, self.dimensions)

tribalmemory/a21/providers/openai.py ADDED Viewed

@@ -0,0 +1,151 @@
+"""OpenAI Embedding Provider."""
+import asyncio
+from datetime import datetime
+from typing import Optional
+import httpx
+from .base import EmbeddingProvider, ProviderHealth, ProviderStatus
+from ..config.providers import EmbeddingConfig
+from ...utils import normalize_embedding
+class OpenAIEmbeddingProvider(EmbeddingProvider[EmbeddingConfig]):
+    """OpenAI embedding provider implementation."""
+    API_URL = "https://api.openai.com/v1/embeddings"
+    def __init__(self, config: EmbeddingConfig):
+        super().__init__(config)
+        self._client: Optional[httpx.AsyncClient] = None
+    def __repr__(self) -> str:
+        """Safe repr that masks API key to prevent accidental logging."""
+        return f"OpenAIEmbeddingProvider(model={self.config.model!r}, api_key=***)"
+    @property
+    def dimensions(self) -> int:
+        return self.config.dimensions
+    @property
+    def model_name(self) -> str:
+        return self.config.model
+    async def initialize(self) -> None:
+        """Initialize the OpenAI client.
+        Creates an async HTTP client for API requests.
+        Ensures cleanup if initialization fails partway through.
+        Raises:
+            ValueError: If API key is not configured
+        """
+        if not self.config.api_key:
+            raise ValueError("OpenAI API key required")
+        client = None
+        try:
+            client = httpx.AsyncClient(
+                timeout=httpx.Timeout(self.config.timeout_seconds),
+                headers={
+                    "Authorization": f"Bearer {self.config.api_key}",
+                    "Content-Type": "application/json",
+                }
+            )
+            self._client = client
+            self._initialized = True
+        except Exception:
+            # Ensure cleanup on partial initialization failure
+            if client:
+                await client.aclose()
+            raise
+    async def shutdown(self) -> None:
+        if self._client:
+            await self._client.aclose()
+            self._client = None
+        self._initialized = False
+    async def health_check(self) -> ProviderHealth:
+        if not self._client:
+            return ProviderHealth(
+                status=ProviderStatus.UNAVAILABLE,
+                message="Client not initialized"
+            )
+        try:
+            start = datetime.utcnow()
+            await self.embed("health check")
+            latency = (datetime.utcnow() - start).total_seconds() * 1000
+            return ProviderHealth(
+                status=ProviderStatus.HEALTHY,
+                latency_ms=latency
+            )
+        except Exception as e:
+            return ProviderHealth(
+                status=ProviderStatus.UNAVAILABLE,
+                message=str(e)
+            )
+    async def embed(self, text: str) -> list[float]:
+        results = await self.embed_batch([text])
+        return results[0]
+    async def embed_batch(self, texts: list[str]) -> list[list[float]]:
+        if not texts:
+            return []
+        if not self._client:
+            raise RuntimeError("Provider not initialized")
+        # Clean texts
+        cleaned = [self._clean_text(t) for t in texts]
+        payload = {
+            "model": self.config.model,
+            "input": cleaned,
+            "dimensions": self.config.dimensions,
+        }
+        last_error = None
+        for attempt in range(self.config.max_retries):
+            try:
+                response = await self._client.post(self.API_URL, json=payload)
+                if response.status_code == 200:
+                    data = response.json()
+                    embeddings = sorted(data["data"], key=lambda x: x["index"])
+                    return [normalize_embedding(e["embedding"]) for e in embeddings]
+                elif response.status_code == 429:
+                    retry_after = int(response.headers.get("Retry-After", 5))
+                    await asyncio.sleep(retry_after)
+                    continue
+                elif response.status_code >= 500:
+                    backoff = min(self.config.backoff_base ** attempt, self.config.backoff_max)
+                    await asyncio.sleep(backoff)
+                    continue
+                else:
+                    error = response.json().get("error", {}).get("message", response.text)
+                    raise RuntimeError(f"OpenAI API error ({response.status_code}): {error}")
+            except httpx.TimeoutException as e:
+                last_error = e
+                backoff = min(self.config.backoff_base ** attempt, self.config.backoff_max)
+                await asyncio.sleep(backoff)
+            except httpx.RequestError as e:
+                last_error = e
+                backoff = min(self.config.backoff_base ** attempt, self.config.backoff_max)
+                await asyncio.sleep(backoff)
+        raise RuntimeError(f"OpenAI API failed after {self.config.max_retries} retries: {last_error}")
+    def _clean_text(self, text: str) -> str:
+        cleaned = " ".join(text.split())
+        max_bytes = 8191 * 4
+        encoded = cleaned.encode('utf-8')
+        if len(encoded) > max_bytes:
+            cleaned = encoded[:max_bytes].decode('utf-8', errors='ignore')
+        return cleaned

tribalmemory/a21/providers/timestamp.py ADDED Viewed

@@ -0,0 +1,88 @@
+"""Timestamp providers."""
+import hashlib
+from datetime import datetime
+from typing import Optional
+from .base import TimestampProvider, ProviderHealth, ProviderStatus
+from ..config.providers import TimestampConfig
+class RFC3161TimestampProvider(TimestampProvider[TimestampConfig]):
+    """RFC 3161 Time Stamp Authority provider.
+    TODO: Implement actual RFC 3161 integration.
+    For now, this is a placeholder that matches the interface.
+    """
+    async def initialize(self) -> None:
+        if not self.config.tsa_url:
+            raise ValueError("TSA URL required for RFC 3161 provider")
+        self._initialized = True
+    async def shutdown(self) -> None:
+        self._initialized = False
+    async def health_check(self) -> ProviderHealth:
+        # TODO: Actually ping the TSA
+        return ProviderHealth(
+            status=ProviderStatus.HEALTHY,
+            message=f"RFC 3161 TSA at {self.config.tsa_url}"
+        )
+    async def timestamp(self, data: bytes) -> bytes:
+        # TODO: Implement actual RFC 3161 timestamp request
+        raise NotImplementedError("RFC 3161 implementation pending")
+    async def verify(self, data: bytes, token: bytes) -> tuple[bool, Optional[datetime]]:
+        # TODO: Implement actual RFC 3161 verification
+        raise NotImplementedError("RFC 3161 implementation pending")
+class MockTimestampProvider(TimestampProvider[TimestampConfig]):
+    """Mock timestamp provider for testing."""
+    def __init__(self, config: TimestampConfig):
+        super().__init__(config)
+        self._timestamps: dict[bytes, datetime] = {}
+    async def initialize(self) -> None:
+        self._initialized = True
+    async def shutdown(self) -> None:
+        self._timestamps.clear()
+        self._initialized = False
+    async def health_check(self) -> ProviderHealth:
+        return ProviderHealth(
+            status=ProviderStatus.HEALTHY,
+            message="Mock timestamp provider"
+        )
+    async def timestamp(self, data: bytes) -> bytes:
+        now = datetime.utcnow()
+        data_hash = hashlib.sha256(data).hexdigest()[:16]
+        token = f"MOCK_TSA|{now.isoformat()}|{data_hash}".encode()
+        self._timestamps[token] = now
+        return token
+    async def verify(self, data: bytes, token: bytes) -> tuple[bool, Optional[datetime]]:
+        try:
+            decoded = token.decode()
+            if not decoded.startswith("MOCK_TSA|"):
+                return False, None
+            parts = decoded.split("|")
+            if len(parts) != 3:
+                return False, None
+            timestamp_str = parts[1]
+            stored_hash = parts[2]
+            actual_hash = hashlib.sha256(data).hexdigest()[:16]
+            if actual_hash != stored_hash:
+                return False, None
+            return True, datetime.fromisoformat(timestamp_str)
+        except Exception:
+            return False, None

tribalmemory/a21/system.py ADDED Viewed

@@ -0,0 +1,293 @@
+"""Memory System - High-level API for A2.1.
+This is the main entry point for interacting with tribal memory.
+It provides a clean, high-level interface while delegating to
+the underlying providers through the container.
+"""
+import uuid
+from datetime import datetime
+from typing import Any, Optional
+from .config import SystemConfig
+from .container import Container
+from ..interfaces import MemoryEntry, MemorySource, RecallResult, StoreResult
+class MemorySystem:
+    """High-level memory system API.
+    This class provides a simple, clean interface for memory operations
+    while managing all the underlying complexity through the container.
+    Usage:
+        config = SystemConfig.from_env()
+        system = MemorySystem(config)
+        async with system:
+            await system.remember("Important fact")
+            results = await system.recall("What was that fact?")
+    Or manually:
+        system = MemorySystem(config)
+        await system.start()
+        try:
+            await system.remember("Important fact")
+        finally:
+            await system.stop()
+    """
+    def __init__(self, config: SystemConfig):
+        """Initialize memory system.
+        Args:
+            config: System configuration
+        """
+        self.config = config
+        self._container = Container(config)
+        self._started = False
+    async def start(self) -> None:
+        """Start the memory system."""
+        if self._started:
+            return
+        # Validate config
+        errors = self.config.validate()
+        if errors:
+            raise ValueError(f"Invalid configuration: {errors}")
+        await self._container.initialize()
+        self._started = True
+    async def stop(self) -> None:
+        """Stop the memory system."""
+        if not self._started:
+            return
+        await self._container.shutdown()
+        self._started = False
+    async def remember(
+        self,
+        content: str,
+        source_type: MemorySource = MemorySource.AUTO_CAPTURE,
+        context: Optional[str] = None,
+        tags: Optional[list[str]] = None,
+        skip_dedup: bool = False,
+    ) -> StoreResult:
+        """Store a new memory.
+        Args:
+            content: The memory content
+            source_type: How this memory was captured
+            context: Additional context about capture
+            tags: Tags for categorization
+            skip_dedup: Skip duplicate checking
+        Returns:
+            StoreResult with success status
+        """
+        self._ensure_started()
+        # Validate
+        if not content or not content.strip():
+            return StoreResult(success=False, error="Empty content not allowed")
+        content = content.strip()
+        # Generate embedding
+        try:
+            embedding = await self._container.embedding.embed(content)
+        except Exception as e:
+            return StoreResult(success=False, error=f"Embedding failed: {e}")
+        # Check for duplicates
+        if not skip_dedup and self._container.deduplication:
+            is_dup, dup_id = await self._container.deduplication.is_duplicate(
+                content, embedding
+            )
+            if is_dup:
+                return StoreResult(success=False, duplicate_of=dup_id)
+        # Create entry
+        entry = MemoryEntry(
+            id=str(uuid.uuid4()),
+            content=content,
+            embedding=embedding,
+            source_instance=self.config.instance_id,
+            source_type=source_type,
+            created_at=datetime.utcnow(),
+            updated_at=datetime.utcnow(),
+            tags=tags or [],
+            context=context,
+            confidence=1.0,
+        )
+        return await self._container.storage.store(entry)
+    async def recall(
+        self,
+        query: str,
+        limit: int = 5,
+        min_relevance: float = 0.7,
+        tags: Optional[list[str]] = None,
+    ) -> list[RecallResult]:
+        """Recall relevant memories.
+        Args:
+            query: Natural language query
+            limit: Maximum results
+            min_relevance: Minimum similarity score
+            tags: Filter by tags
+        Returns:
+            List of RecallResults sorted by relevance
+        """
+        self._ensure_started()
+        try:
+            query_embedding = await self._container.embedding.embed(query)
+        except Exception:
+            return []
+        filters = {"tags": tags} if tags else None
+        results = await self._container.storage.recall(
+            query_embedding,
+            limit=limit,
+            min_similarity=min_relevance,
+            filters=filters,
+        )
+        return self._filter_superseded(results)
+    async def correct(
+        self,
+        original_id: str,
+        corrected_content: str,
+        context: Optional[str] = None,
+    ) -> StoreResult:
+        """Store a correction to an existing memory.
+        Args:
+            original_id: ID of memory being corrected
+            corrected_content: The corrected information
+            context: Why this correction was made
+        Returns:
+            StoreResult for the correction entry
+        """
+        self._ensure_started()
+        # Verify original exists
+        original = await self._container.storage.get(original_id)
+        if not original:
+            return StoreResult(success=False, error=f"Original memory {original_id} not found")
+        # Generate embedding
+        try:
+            embedding = await self._container.embedding.embed(corrected_content)
+        except Exception as e:
+            return StoreResult(success=False, error=f"Embedding failed: {e}")
+        # Create correction entry
+        entry = MemoryEntry(
+            id=str(uuid.uuid4()),
+            content=corrected_content,
+            embedding=embedding,
+            source_instance=self.config.instance_id,
+            source_type=MemorySource.CORRECTION,
+            created_at=datetime.utcnow(),
+            updated_at=datetime.utcnow(),
+            tags=original.tags,
+            context=context or f"Correction of {original_id}",
+            confidence=1.0,
+            supersedes=original_id,
+            related_to=[original_id],
+        )
+        return await self._container.storage.store(entry)
+    async def forget(self, memory_id: str) -> bool:
+        """Forget (soft delete) a memory.
+        Args:
+            memory_id: ID of memory to forget
+        Returns:
+            True if forgotten successfully
+        """
+        self._ensure_started()
+        return await self._container.storage.delete(memory_id)
+    async def get(self, memory_id: str) -> Optional[MemoryEntry]:
+        """Get a specific memory by ID.
+        Args:
+            memory_id: Memory ID
+        Returns:
+            MemoryEntry or None if not found
+        """
+        self._ensure_started()
+        return await self._container.storage.get(memory_id)
+    async def health(self) -> dict[str, Any]:
+        """Check system health.
+        Returns:
+            Dict with provider health statuses including:
+            - status: "running" or "stopped"
+            - instance_id: This instance's ID
+            - providers: Dict of provider name to health info
+        """
+        if not self._started:
+            return {"status": "stopped"}
+        health = await self._container.health_check()
+        return {
+            "status": "running",
+            "instance_id": self.config.instance_id,
+            "providers": {
+                name: {"status": h.status.value, "latency_ms": h.latency_ms}
+                for name, h in health.items()
+            }
+        }
+    async def stats(self) -> dict[str, Any]:
+        """Get memory statistics.
+        Returns:
+            Dict with memory counts and breakdowns including:
+            - total_memories: Total count of active memories
+            - instance_id: This instance's ID
+        """
+        self._ensure_started()
+        total = await self._container.storage.count()
+        return {
+            "total_memories": total,
+            "instance_id": self.config.instance_id,
+        }
+    def _ensure_started(self) -> None:
+        if not self._started:
+            raise RuntimeError("MemorySystem not started. Call start() first.")
+    @staticmethod
+    def _filter_superseded(results: list[RecallResult]) -> list[RecallResult]:
+        """Remove memories that are superseded by corrections in the result set."""
+        superseded_ids = {
+            r.memory.supersedes for r in results if r.memory.supersedes
+        }
+        if not superseded_ids:
+            return results
+        return [r for r in results if r.memory.id not in superseded_ids]
+    async def __aenter__(self):
+        await self.start()
+        return self
+    async def __aexit__(self, exc_type, exc_val, exc_tb):
+        await self.stop()