PyPI - tribalmemory - Versions diffs - 0.1.1__py3-none-any.whl → 0.3.0__py3-none-any.whl - Mend

tribalmemory 0.1.1py3-none-any.whl → 0.3.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (19) hide show

tribalmemory/cli.py +147 -4
tribalmemory/interfaces.py +66 -3
tribalmemory/mcp/server.py +272 -14
tribalmemory/server/app.py +53 -2
tribalmemory/server/config.py +41 -0
tribalmemory/server/models.py +65 -0
tribalmemory/server/routes.py +68 -0
tribalmemory/services/fts_store.py +255 -0
tribalmemory/services/graph_store.py +627 -0
tribalmemory/services/memory.py +471 -37
tribalmemory/services/reranker.py +267 -0
tribalmemory/services/session_store.py +412 -0
tribalmemory/services/vector_store.py +86 -1
{tribalmemory-0.1.1.dist-info → tribalmemory-0.3.0.dist-info}/METADATA +1 -1
{tribalmemory-0.1.1.dist-info → tribalmemory-0.3.0.dist-info}/RECORD +19 -15
{tribalmemory-0.1.1.dist-info → tribalmemory-0.3.0.dist-info}/WHEEL +0 -0
{tribalmemory-0.1.1.dist-info → tribalmemory-0.3.0.dist-info}/entry_points.txt +0 -0
{tribalmemory-0.1.1.dist-info → tribalmemory-0.3.0.dist-info}/licenses/LICENSE +0 -0
{tribalmemory-0.1.1.dist-info → tribalmemory-0.3.0.dist-info}/top_level.txt +0 -0

tribalmemory/services/reranker.py ADDED Viewed

@@ -0,0 +1,267 @@
+"""Result reranking for improved retrieval quality.
+Provides multiple reranking strategies:
+- NoopReranker: Pass-through, no reranking
+- HeuristicReranker: Fast heuristic scoring (recency, tags, length)
+- CrossEncoderReranker: Model-based reranking (sentence-transformers)
+Reranking happens after initial retrieval (vector + BM25) to refine ordering.
+"""
+import logging
+import math
+from datetime import datetime
+from typing import TYPE_CHECKING, Protocol
+from ..interfaces import RecallResult
+if TYPE_CHECKING:
+    from ..server.config import SearchConfig
+logger = logging.getLogger(__name__)
+# Lazy import for optional dependency
+CROSS_ENCODER_AVAILABLE = False
+CrossEncoder = None
+try:
+    from sentence_transformers import CrossEncoder as _CrossEncoder
+    CrossEncoder = _CrossEncoder
+    CROSS_ENCODER_AVAILABLE = True
+except ImportError:
+    pass
+class IReranker(Protocol):
+    """Interface for result reranking."""
+    def rerank(
+        self, query: str, candidates: list[RecallResult], top_k: int
+    ) -> list[RecallResult]:
+        """Rerank candidates and return top_k results.
+        Args:
+            query: Original search query
+            candidates: Initial retrieval results
+            top_k: Number of results to return
+        Returns:
+            Reranked results (up to top_k)
+        """
+        ...
+class NoopReranker:
+    """Pass-through reranker (no reranking)."""
+    def rerank(
+        self, query: str, candidates: list[RecallResult], top_k: int
+    ) -> list[RecallResult]:
+        """Return top_k candidates unchanged."""
+        return candidates[:top_k]
+class HeuristicReranker:
+    """Heuristic reranking with recency, tag match, and length signals.
+    Combines multiple quality signals:
+    - Recency: newer memories score higher (exponential decay)
+    - Tag match: query terms matching tags boost score
+    - Length penalty: very short or very long content penalized
+    Final score: original_score * (1 + boost_sum)
+    """
+    def __init__(
+        self,
+        recency_decay_days: float = 30.0,
+        tag_boost_weight: float = 0.1,
+        min_length: int = 10,
+        max_length: int = 2000,
+        short_penalty: float = 0.05,
+        long_penalty: float = 0.03,
+    ):
+        """Initialize heuristic reranker.
+        Args:
+            recency_decay_days: Half-life for recency boost (days)
+            tag_boost_weight: Weight for tag match boost
+            min_length: Content shorter than this gets penalty
+            max_length: Content longer than this gets penalty
+            short_penalty: Penalty for content shorter than min_length
+            long_penalty: Penalty for content longer than max_length
+        """
+        self.recency_decay_days = recency_decay_days
+        self.tag_boost_weight = tag_boost_weight
+        self.min_length = min_length
+        self.max_length = max_length
+        self.short_penalty = short_penalty
+        self.long_penalty = long_penalty
+    def rerank(
+        self, query: str, candidates: list[RecallResult], top_k: int
+    ) -> list[RecallResult]:
+        """Rerank using heuristic scoring."""
+        if not candidates:
+            return []
+        # Compute boost for each candidate
+        scored = []
+        query_lower = query.lower()
+        query_terms = set(query_lower.split())
+        now = datetime.utcnow()
+        for i, candidate in enumerate(candidates):
+            boost = 0.0
+            # Recency boost (exponential decay)
+            # Brand new memory (age=0) gets boost of 1.0, older memories decay exponentially
+            age_days = (now - candidate.memory.created_at).total_seconds() / 86400
+            recency_boost = math.exp(-age_days / self.recency_decay_days)
+            boost += recency_boost
+            # Tag match boost (exact term matching, not substring)
+            if candidate.memory.tags:
+                tag_lower = set(t.lower() for t in candidate.memory.tags)
+                # Count query terms that exactly match tags
+                matches = sum(1 for term in query_terms if term in tag_lower)
+                if matches > 0:
+                    boost += self.tag_boost_weight * matches
+            # Length penalty
+            content_length = len(candidate.memory.content)
+            if content_length < self.min_length:
+                boost -= self.short_penalty
+            elif content_length > self.max_length:
+                boost -= self.long_penalty
+            # Combine with original score
+            final_score = candidate.similarity_score * (1.0 + boost)
+            scored.append((final_score, i, candidate))
+        # Sort by final score (desc), then original index (preserve order on ties)
+        scored.sort(key=lambda x: (-x[0], x[1]))
+        # Build reranked results with updated scores
+        reranked = []
+        for final_score, _, candidate in scored[:top_k]:
+            reranked.append(
+                RecallResult(
+                    memory=candidate.memory,
+                    similarity_score=final_score,
+                    retrieval_time_ms=candidate.retrieval_time_ms,
+                )
+            )
+        return reranked
+class CrossEncoderReranker:
+    """Cross-encoder model-based reranking.
+    Uses a sentence-transformers cross-encoder to score (query, candidate) pairs.
+    Model scores relevance directly, producing better ranking than retrieval alone.
+    Requires sentence-transformers package.
+    """
+    def __init__(self, model_name: str = "cross-encoder/ms-marco-MiniLM-L-6-v2"):
+        """Initialize cross-encoder reranker.
+        Args:
+            model_name: HuggingFace model name
+        Raises:
+            ImportError: If sentence-transformers not installed
+        """
+        if not CROSS_ENCODER_AVAILABLE:
+            raise ImportError(
+                "sentence-transformers required for CrossEncoderReranker. "
+                "Install with: pip install sentence-transformers"
+            )
+        logger.info(f"Loading cross-encoder model: {model_name}")
+        self.model = CrossEncoder(model_name)
+    def rerank(
+        self, query: str, candidates: list[RecallResult], top_k: int
+    ) -> list[RecallResult]:
+        """Rerank using cross-encoder model."""
+        if not candidates:
+            logger.debug("No candidates to rerank")
+            return []
+        # Build (query, content) pairs
+        pairs = [(query, candidate.memory.content) for candidate in candidates]
+        # Score with model
+        scores = self.model.predict(pairs)
+        # Sort by score descending
+        scored = list(zip(scores, candidates))
+        scored.sort(key=lambda x: -x[0])
+        # Build reranked results with updated scores
+        reranked = []
+        for score, candidate in scored[:top_k]:
+            reranked.append(
+                RecallResult(
+                    memory=candidate.memory,
+                    similarity_score=float(score),
+                    retrieval_time_ms=candidate.retrieval_time_ms,
+                )
+            )
+        return reranked
+def create_reranker(config: "SearchConfig") -> IReranker:
+    """Factory function to create reranker from config.
+    Args:
+        config: SearchConfig with reranking settings
+    Returns:
+        Configured reranker instance
+    Raises:
+        ValueError: If reranking mode is invalid
+        ImportError: If cross-encoder requested but unavailable
+    """
+    mode = getattr(config, "reranking", "heuristic")
+    if mode == "none":
+        return NoopReranker()
+    elif mode == "heuristic":
+        return HeuristicReranker(
+            recency_decay_days=getattr(config, "recency_decay_days", 30.0),
+            tag_boost_weight=getattr(config, "tag_boost_weight", 0.1),
+        )
+    elif mode == "cross-encoder":
+        if not CROSS_ENCODER_AVAILABLE:
+            raise ImportError(
+                "Cross-encoder reranking requires sentence-transformers. "
+                "Install with: pip install sentence-transformers"
+            )
+        return CrossEncoderReranker()
+    elif mode == "auto":
+        # Try cross-encoder, fall back to heuristic
+        if CROSS_ENCODER_AVAILABLE:
+            try:
+                return CrossEncoderReranker()
+            except Exception as e:
+                logger.warning(f"Cross-encoder init failed: {e}. Falling back to heuristic.")
+        return HeuristicReranker(
+            recency_decay_days=getattr(config, "recency_decay_days", 30.0),
+            tag_boost_weight=getattr(config, "tag_boost_weight", 0.1),
+        )
+    else:
+        raise ValueError(
+            f"Unknown reranking mode: {mode}. "
+            f"Valid options: 'none', 'heuristic', 'cross-encoder', 'auto'"
+        )

tribalmemory/services/session_store.py ADDED Viewed

@@ -0,0 +1,412 @@
+"""Session transcript indexing service.
+Indexes conversation transcripts as chunked embeddings for contextual recall.
+Supports delta-based ingestion and retention-based cleanup.
+"""
+import logging
+import uuid
+from dataclasses import dataclass, field
+from datetime import datetime, timedelta, timezone
+from typing import Optional
+from ..interfaces import IEmbeddingService, IVectorStore
+logger = logging.getLogger(__name__)
+@dataclass
+class SessionMessage:
+    """A single message in a conversation transcript.
+    Attributes:
+        role: Message role (user, assistant, system)
+        content: Message content
+        timestamp: When the message was sent
+    """
+    role: str
+    content: str
+    timestamp: datetime = field(default_factory=lambda: datetime.now(timezone.utc))
+@dataclass
+class SessionChunk:
+    """A chunk of conversation transcript with embedding.
+    Attributes:
+        chunk_id: Unique identifier for this chunk
+        session_id: ID of the session this chunk belongs to
+        instance_id: Which agent instance processed this session
+        content: The actual conversation content (multiple messages)
+        embedding: Vector embedding of the content
+        start_time: Timestamp of first message in chunk
+        end_time: Timestamp of last message in chunk
+        chunk_index: Sequential index within session (0, 1, 2...)
+    """
+    chunk_id: str
+    session_id: str
+    instance_id: str
+    content: str
+    embedding: list[float]
+    start_time: datetime
+    end_time: datetime
+    chunk_index: int
+class SessionStore:
+    """Service for indexing and searching session transcripts.
+    Usage:
+        store = SessionStore(
+            instance_id="clawdio-1",
+            embedding_service=embedding_service,
+            vector_store=vector_store,
+        )
+        # Ingest a session transcript
+        messages = [
+            SessionMessage("user", "What is Docker?", datetime.now(timezone.utc)),
+            SessionMessage("assistant", "Docker is a container platform", datetime.now(timezone.utc)),
+        ]
+        await store.ingest("session-123", messages)
+        # Search across all sessions
+        results = await store.search("Docker setup error")
+        # Search within specific session
+        results = await store.search("Docker", session_id="session-123")
+    """
+    # Chunking parameters
+    TARGET_CHUNK_TOKENS = 400  # Target size for each chunk
+    WORDS_PER_TOKEN = 0.75     # Approximate tokens per word
+    OVERLAP_TOKENS = 50        # Overlap between chunks for context
+    def __init__(
+        self,
+        instance_id: str,
+        embedding_service: IEmbeddingService,
+        vector_store: IVectorStore,
+    ):
+        self.instance_id = instance_id
+        self.embedding_service = embedding_service
+        self.vector_store = vector_store
+        # Track last ingested index per session for delta ingestion
+        self._session_state: dict[str, int] = {}
+    async def ingest(
+        self,
+        session_id: str,
+        messages: list[SessionMessage],
+        instance_id: Optional[str] = None,
+    ) -> dict:
+        """Ingest session messages with delta-based processing.
+        Only processes new messages since last ingestion for this session.
+        Args:
+            session_id: Unique identifier for the session
+            messages: List of conversation messages
+            instance_id: Override instance ID (defaults to self.instance_id)
+        Returns:
+            Dict with keys: success, chunks_created, messages_processed
+        """
+        if not messages:
+            return {
+                "success": True,
+                "chunks_created": 0,
+                "messages_processed": 0,
+            }
+        # Delta ingestion: only process new messages
+        last_index = self._session_state.get(session_id, 0)
+        new_messages = messages[last_index:]
+        if not new_messages:
+            return {
+                "success": True,
+                "chunks_created": 0,
+                "messages_processed": 0,
+            }
+        try:
+            # Create chunks from new messages
+            chunks = await self._chunk_messages(
+                new_messages,
+                session_id,
+                instance_id or self.instance_id,
+            )
+            # Store chunks in vector store
+            for chunk in chunks:
+                await self._store_chunk(chunk)
+            # Update state
+            self._session_state[session_id] = len(messages)
+            return {
+                "success": True,
+                "chunks_created": len(chunks),
+                "messages_processed": len(new_messages),
+            }
+        except Exception as e:
+            logger.exception(f"Failed to ingest session {session_id}: {e}")
+            return {
+                "success": False,
+                "error": str(e),
+            }
+    async def search(
+        self,
+        query: str,
+        session_id: Optional[str] = None,
+        limit: int = 5,
+        min_relevance: float = 0.0,
+    ) -> list[dict]:
+        """Search session transcripts by semantic similarity.
+        Args:
+            query: Natural language search query
+            session_id: Optional filter to specific session
+            limit: Maximum number of results to return
+            min_relevance: Minimum similarity score (0.0 to 1.0)
+        Returns:
+            List of dicts with keys: chunk_id, session_id, instance_id,
+            content, similarity_score, start_time, end_time, chunk_index
+        """
+        try:
+            # Generate query embedding
+            query_embedding = await self.embedding_service.embed(query)
+            # Search chunks
+            results = await self._search_chunks(
+                query_embedding,
+                session_id,
+                limit,
+                min_relevance,
+            )
+            return results
+        except Exception as e:
+            logger.exception(f"Failed to search sessions: {e}")
+            return []
+    async def cleanup(self, retention_days: int = 30) -> int:
+        """Delete session chunks older than retention period.
+        Args:
+            retention_days: Number of days to retain chunks
+        Returns:
+            Number of chunks deleted
+        """
+        try:
+            cutoff_time = datetime.now(timezone.utc) - timedelta(days=retention_days)
+            # Find and delete expired chunks
+            deleted = await self._delete_chunks_before(cutoff_time)
+            return deleted
+        except Exception as e:
+            logger.exception(f"Failed to cleanup sessions: {e}")
+            return 0
+    async def get_stats(self) -> dict:
+        """Get statistics about indexed sessions.
+        Returns:
+            Dict with keys: total_chunks, total_sessions,
+            earliest_chunk, latest_chunk
+        """
+        try:
+            chunks = await self._get_all_chunks()
+            if not chunks:
+                return {
+                    "total_chunks": 0,
+                    "total_sessions": 0,
+                    "earliest_chunk": None,
+                    "latest_chunk": None,
+                }
+            session_ids = set()
+            timestamps = []
+            for chunk in chunks:
+                session_ids.add(chunk["session_id"])
+                timestamps.append(chunk["start_time"])
+            return {
+                "total_chunks": len(chunks),
+                "total_sessions": len(session_ids),
+                "earliest_chunk": min(timestamps) if timestamps else None,
+                "latest_chunk": max(timestamps) if timestamps else None,
+            }
+        except Exception as e:
+            logger.exception(f"Failed to get stats: {e}")
+            return {
+                "total_chunks": 0,
+                "total_sessions": 0,
+                "earliest_chunk": None,
+                "latest_chunk": None,
+            }
+    async def _chunk_messages(
+        self,
+        messages: list[SessionMessage],
+        session_id: str,
+        instance_id: str,
+    ) -> list[SessionChunk]:
+        """Chunk messages into ~400 token windows with overlap.
+        Uses a simple word-count approximation: words / 0.75 ≈ tokens.
+        """
+        chunks = []
+        chunk_index = 0
+        # Convert messages to text with timestamps
+        message_texts = []
+        for msg in messages:
+            text = f"{msg.role}: {msg.content}"
+            message_texts.append((text, msg.timestamp))
+        # Estimate tokens
+        target_words = int(self.TARGET_CHUNK_TOKENS * self.WORDS_PER_TOKEN)
+        overlap_words = int(self.OVERLAP_TOKENS * self.WORDS_PER_TOKEN)
+        i = 0
+        while i < len(message_texts):
+            chunk_messages = []
+            chunk_word_count = 0
+            start_time = message_texts[i][1]
+            end_time = start_time
+            # Collect messages until we reach target size
+            while i < len(message_texts) and chunk_word_count < target_words:
+                text, timestamp = message_texts[i]
+                words = len(text.split())
+                chunk_messages.append(text)
+                chunk_word_count += words
+                end_time = timestamp
+                i += 1
+            # Create chunk
+            if chunk_messages:
+                content = "\n".join(chunk_messages)
+                embedding = await self.embedding_service.embed(content)
+                chunk = SessionChunk(
+                    chunk_id=str(uuid.uuid4()),
+                    session_id=session_id,
+                    instance_id=instance_id,
+                    content=content,
+                    embedding=embedding,
+                    start_time=start_time,
+                    end_time=end_time,
+                    chunk_index=chunk_index,
+                )
+                chunks.append(chunk)
+                chunk_index += 1
+            # Backtrack for overlap
+            if i < len(message_texts):
+                # Calculate how many messages to backtrack
+                overlap_word_target = 0
+                backtrack = 0
+                while (backtrack < len(chunk_messages) and
+                       overlap_word_target < overlap_words):
+                    backtrack += 1
+                    overlap_word_target += len(chunk_messages[-backtrack].split())
+                i -= min(backtrack, 2)  # Backtrack at most 2 messages
+                i = max(i, 0)
+        return chunks
+    async def _store_chunk(self, chunk: SessionChunk) -> None:
+        """Store a session chunk in memory.
+        Note: Currently uses in-memory list storage. This is intentional for v0.2.0
+        to keep the initial implementation simple and testable. Data does not persist
+        across restarts. A future version will integrate with LanceDB for persistent
+        storage in a separate 'session_chunks' table. See issue #38 follow-up.
+        """
+        if not hasattr(self, '_chunks'):
+            self._chunks = []
+        self._chunks.append({
+            "chunk_id": chunk.chunk_id,
+            "session_id": chunk.session_id,
+            "instance_id": chunk.instance_id,
+            "content": chunk.content,
+            "embedding": chunk.embedding,
+            "start_time": chunk.start_time,
+            "end_time": chunk.end_time,
+            "chunk_index": chunk.chunk_index,
+        })
+    async def _search_chunks(
+        self,
+        query_embedding: list[float],
+        session_id: Optional[str],
+        limit: int,
+        min_relevance: float,
+    ) -> list[dict]:
+        """Search for chunks by similarity."""
+        if not hasattr(self, '_chunks'):
+            return []
+        # Calculate similarities
+        results = []
+        for chunk in self._chunks:
+            # Filter by session_id if provided
+            if session_id and chunk["session_id"] != session_id:
+                continue
+            similarity = self.embedding_service.similarity(
+                query_embedding,
+                chunk["embedding"],
+            )
+            if similarity >= min_relevance:
+                results.append({
+                    "chunk_id": chunk["chunk_id"],
+                    "session_id": chunk["session_id"],
+                    "instance_id": chunk["instance_id"],
+                    "content": chunk["content"],
+                    "similarity_score": similarity,
+                    "start_time": chunk["start_time"],
+                    "end_time": chunk["end_time"],
+                    "chunk_index": chunk["chunk_index"],
+                })
+        # Sort by similarity
+        results.sort(key=lambda x: x["similarity_score"], reverse=True)
+        return results[:limit]
+    async def _delete_chunks_before(self, cutoff_time: datetime) -> int:
+        """Delete chunks older than cutoff time."""
+        if not hasattr(self, '_chunks'):
+            return 0
+        initial_count = len(self._chunks)
+        self._chunks = [
+            chunk for chunk in self._chunks
+            if chunk["end_time"] >= cutoff_time
+        ]
+        return initial_count - len(self._chunks)
+    async def _get_all_chunks(self) -> list[dict]:
+        """Get all stored chunks."""
+        if not hasattr(self, '_chunks'):
+            return []
+        return self._chunks

tribalmemory 0.1.1__py3-none-any.whl → 0.3.0__py3-none-any.whl

tribalmemory 0.1.1py3-none-any.whl → 0.3.0py3-none-any.whl