PyPI - tribalmemory - Versions diffs - 0.1.1__py3-none-any.whl → 0.2.0__py3-none-any.whl - Mend

tribalmemory 0.1.1py3-none-any.whl → 0.2.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (18) hide show

tribalmemory/cli.py +147 -4
tribalmemory/interfaces.py +44 -0
tribalmemory/mcp/server.py +160 -14
tribalmemory/server/app.py +53 -2
tribalmemory/server/config.py +41 -0
tribalmemory/server/models.py +65 -0
tribalmemory/server/routes.py +68 -0
tribalmemory/services/fts_store.py +255 -0
tribalmemory/services/memory.py +193 -33
tribalmemory/services/reranker.py +267 -0
tribalmemory/services/session_store.py +412 -0
tribalmemory/services/vector_store.py +86 -1
{tribalmemory-0.1.1.dist-info → tribalmemory-0.2.0.dist-info}/METADATA +1 -1
{tribalmemory-0.1.1.dist-info → tribalmemory-0.2.0.dist-info}/RECORD +18 -15
{tribalmemory-0.1.1.dist-info → tribalmemory-0.2.0.dist-info}/WHEEL +0 -0
{tribalmemory-0.1.1.dist-info → tribalmemory-0.2.0.dist-info}/entry_points.txt +0 -0
{tribalmemory-0.1.1.dist-info → tribalmemory-0.2.0.dist-info}/licenses/LICENSE +0 -0
{tribalmemory-0.1.1.dist-info → tribalmemory-0.2.0.dist-info}/top_level.txt +0 -0

tribalmemory/services/memory.py CHANGED Viewed

@@ -1,7 +1,9 @@
 """Tribal Memory Service - Main API for agents."""
+import logging
 import os
 from datetime import datetime
+from pathlib import Path
 from typing import Optional
 import uuid
@@ -15,6 +17,10 @@ from ..interfaces import (
     StoreResult,
 )
 from .deduplication import SemanticDeduplicationService
+from .fts_store import FTSStore, hybrid_merge
+from .reranker import IReranker, NoopReranker, create_reranker
+logger = logging.getLogger(__name__)
 class TribalMemoryService(IMemoryService):
@@ -39,11 +45,25 @@ class TribalMemoryService(IMemoryService):
         dedup_exact_threshold: float = 0.98,
         dedup_near_threshold: float = 0.90,
         auto_reject_duplicates: bool = True,
+        fts_store: Optional[FTSStore] = None,
+        hybrid_search: bool = True,
+        hybrid_vector_weight: float = 0.7,
+        hybrid_text_weight: float = 0.3,
+        hybrid_candidate_multiplier: int = 4,
+        reranker: Optional[IReranker] = None,
+        rerank_pool_multiplier: int = 2,
     ):
         self.instance_id = instance_id
         self.embedding_service = embedding_service
         self.vector_store = vector_store
         self.auto_reject_duplicates = auto_reject_duplicates
+        self.fts_store = fts_store
+        self.hybrid_search = hybrid_search and fts_store is not None
+        self.hybrid_vector_weight = hybrid_vector_weight
+        self.hybrid_text_weight = hybrid_text_weight
+        self.hybrid_candidate_multiplier = hybrid_candidate_multiplier
+        self.reranker = reranker or NoopReranker()
+        self.rerank_pool_multiplier = rerank_pool_multiplier
         self.dedup_service = SemanticDeduplicationService(
             vector_store=vector_store,
@@ -89,7 +109,16 @@ class TribalMemoryService(IMemoryService):
             confidence=1.0,
         )
-        return await self.vector_store.store(entry)
+        result = await self.vector_store.store(entry)
+        # Index in FTS for hybrid search (best-effort; vector store is primary)
+        if result.success and self.fts_store:
+            try:
+                self.fts_store.index(entry.id, content, tags or [])
+            except Exception as e:
+                logger.warning("FTS indexing failed for %s: %s", entry.id, e)
+        return result
     async def recall(
         self,
@@ -98,7 +127,11 @@ class TribalMemoryService(IMemoryService):
         min_relevance: float = 0.7,
         tags: Optional[list[str]] = None,
     ) -> list[RecallResult]:
-        """Recall relevant memories.
+        """Recall relevant memories using hybrid search.
+        When hybrid search is enabled (FTS store available), combines
+        vector similarity with BM25 keyword matching for better results.
+        Falls back to vector-only search when FTS is unavailable.
         Args:
             query: Natural language query
@@ -112,7 +145,13 @@ class TribalMemoryService(IMemoryService):
             return []
         filters = {"tags": tags} if tags else None
+        if self.hybrid_search and self.fts_store:
+            return await self._hybrid_recall(
+                query, query_embedding, limit, min_relevance, filters
+            )
+        # Vector-only fallback
         results = await self.vector_store.recall(
             query_embedding,
             limit=limit,
@@ -121,6 +160,90 @@ class TribalMemoryService(IMemoryService):
         )
         return self._filter_superseded(results)
+    async def _hybrid_recall(
+        self,
+        query: str,
+        query_embedding: list[float],
+        limit: int,
+        min_relevance: float,
+        filters: Optional[dict],
+    ) -> list[RecallResult]:
+        """Hybrid recall: vector + BM25 combined, then reranked."""
+        candidate_limit = limit * self.hybrid_candidate_multiplier
+        # 1. Vector search — get wide candidate pool
+        vector_results = await self.vector_store.recall(
+            query_embedding,
+            limit=candidate_limit,
+            min_similarity=min_relevance * 0.5,  # Lower threshold for candidates
+            filters=filters,
+        )
+        # 2. BM25 search
+        bm25_results = self.fts_store.search(query, limit=candidate_limit)
+        # 3. Build lookup for vector results
+        vector_for_merge = [
+            {"id": r.memory.id, "score": r.similarity_score}
+            for r in vector_results
+        ]
+        # 4. Hybrid merge
+        merged = hybrid_merge(
+            vector_for_merge,
+            bm25_results,
+            self.hybrid_vector_weight,
+            self.hybrid_text_weight,
+        )
+        # 5. Build candidate results for reranking — need full MemoryEntry for each
+        # Create lookup from vector results
+        entry_map = {r.memory.id: r for r in vector_results}
+        # Get rerank_pool_multiplier * limit candidates before reranking
+        rerank_pool_size = min(limit * self.rerank_pool_multiplier, len(merged))
+        # Separate cached (vector) hits from BM25-only hits that need fetching
+        cached_hits: list[tuple[dict, RecallResult]] = []
+        bm25_only_ids: list[dict] = []
+        for m in merged[:rerank_pool_size]:
+            if m["id"] in entry_map:
+                cached_hits.append((m, entry_map[m["id"]]))
+            else:
+                bm25_only_ids.append(m)
+        # Batch-fetch BM25-only hits concurrently
+        import asyncio
+        fetched_entries = await asyncio.gather(
+            *(self.vector_store.get(m["id"]) for m in bm25_only_ids)
+        ) if bm25_only_ids else []
+        # Build candidate list
+        candidates: list[RecallResult] = []
+        # Add cached vector hits
+        for m, recall_result in cached_hits:
+            candidates.append(RecallResult(
+                memory=recall_result.memory,
+                similarity_score=m["final_score"],
+                retrieval_time_ms=recall_result.retrieval_time_ms,
+            ))
+        # Add fetched BM25-only hits
+        for m, entry in zip(bm25_only_ids, fetched_entries):
+            if entry and m["final_score"] >= min_relevance * 0.5:
+                candidates.append(RecallResult(
+                    memory=entry,
+                    similarity_score=m["final_score"],
+                    retrieval_time_ms=0,
+                ))
+        # 6. Rerank candidates
+        reranked = self.reranker.rerank(query, candidates, top_k=limit)
+        return self._filter_superseded(reranked)
     async def correct(
         self,
@@ -157,7 +280,13 @@ class TribalMemoryService(IMemoryService):
     async def forget(self, memory_id: str) -> bool:
         """Forget (soft delete) a memory."""
-        return await self.vector_store.delete(memory_id)
+        result = await self.vector_store.delete(memory_id)
+        if result and self.fts_store:
+            try:
+                self.fts_store.delete(memory_id)
+            except Exception as e:
+                logger.warning("FTS cleanup failed for %s: %s", memory_id, e)
+        return result
     async def get(self, memory_id: str) -> Optional[MemoryEntry]:
         """Get a memory by ID with full provenance."""
@@ -165,40 +294,17 @@ class TribalMemoryService(IMemoryService):
     async def get_stats(self) -> dict:
         """Get memory statistics.
-        Note: Stats are computed over up to 10,000 most recent memories.
-        For systems with >10k memories, consider using count() with filters.
+        Delegates to vector_store.get_stats() which computes aggregates
+        efficiently (paginated by default, native queries for SQL-backed
+        stores).
         """
-        all_memories = await self.vector_store.list(limit=10000)
-        by_source: dict[str, int] = {}
-        by_instance: dict[str, int] = {}
-        by_tag: dict[str, int] = {}
-        for m in all_memories:
-            source = m.source_type.value
-            by_source[source] = by_source.get(source, 0) + 1
-            instance = m.source_instance
-            by_instance[instance] = by_instance.get(instance, 0) + 1
-            for tag in m.tags:
-                by_tag[tag] = by_tag.get(tag, 0) + 1
-        corrections = sum(1 for m in all_memories if m.supersedes)
-        return {
-            "total_memories": len(all_memories),
-            "by_source_type": by_source,
-            "by_tag": by_tag,
-            "by_instance": by_instance,
-            "corrections": corrections,
-        }
+        return await self.vector_store.get_stats()
     @staticmethod
     def _filter_superseded(results: list[RecallResult]) -> list[RecallResult]:
         """Remove memories that are superseded by corrections in the result set."""
-        superseded_ids = {
+        superseded_ids: set[str] = {
             r.memory.supersedes for r in results if r.memory.supersedes
         }
         if not superseded_ids:
@@ -213,6 +319,14 @@ def create_memory_service(
     api_base: Optional[str] = None,
     embedding_model: Optional[str] = None,
     embedding_dimensions: Optional[int] = None,
+    hybrid_search: bool = True,
+    hybrid_vector_weight: float = 0.7,
+    hybrid_text_weight: float = 0.3,
+    hybrid_candidate_multiplier: int = 4,
+    reranking: str = "heuristic",
+    recency_decay_days: float = 30.0,
+    tag_boost_weight: float = 0.1,
+    rerank_pool_multiplier: int = 2,
 ) -> TribalMemoryService:
     """Factory function to create a memory service with sensible defaults.
@@ -225,6 +339,18 @@ def create_memory_service(
             For Ollama: "http://localhost:11434/v1"
         embedding_model: Embedding model name. Default: "text-embedding-3-small".
         embedding_dimensions: Embedding output dimensions. Default: 1536.
+        hybrid_search: Enable BM25 hybrid search (default: True).
+        hybrid_vector_weight: Weight for vector similarity (default: 0.7).
+        hybrid_text_weight: Weight for BM25 text score (default: 0.3).
+        hybrid_candidate_multiplier: Multiplier for candidate pool size
+            (default: 4). Retrieves 4× limit from each source before
+            merging.
+        reranking: Reranking mode: "auto", "cross-encoder", "heuristic", "none"
+            (default: "heuristic").
+        recency_decay_days: Half-life for recency boost (default: 30.0).
+        tag_boost_weight: Weight for tag match boost (default: 0.1).
+        rerank_pool_multiplier: How many candidates to give the reranker
+            (N × limit). Default: 2.
     Returns:
         Configured TribalMemoryService ready for use.
@@ -267,9 +393,43 @@ def create_memory_service(
             vector_store = InMemoryVectorStore(embedding_service)
     else:
         vector_store = InMemoryVectorStore(embedding_service)
+    # Create FTS store for hybrid search (co-located with LanceDB)
+    fts_store = None
+    if hybrid_search and db_path:
+        try:
+            fts_db_path = str(Path(db_path) / "fts_index.db")
+            fts_store = FTSStore(fts_db_path)
+            if fts_store.is_available():
+                logger.info("Hybrid search enabled (SQLite FTS5)")
+            else:
+                logger.warning(
+                    "FTS5 not available in SQLite build. "
+                    "Hybrid search disabled, using vector-only."
+                )
+                fts_store = None
+        except Exception as e:
+            logger.warning(f"FTS store init failed: {e}. Using vector-only.")
+            fts_store = None
+    # Create reranker
+    from ..server.config import SearchConfig
+    search_config = SearchConfig(
+        reranking=reranking,
+        recency_decay_days=recency_decay_days,
+        tag_boost_weight=tag_boost_weight,
+    )
+    reranker = create_reranker(search_config)
     return TribalMemoryService(
         instance_id=instance_id,
         embedding_service=embedding_service,
-        vector_store=vector_store
+        vector_store=vector_store,
+        fts_store=fts_store,
+        hybrid_search=hybrid_search,
+        hybrid_vector_weight=hybrid_vector_weight,
+        hybrid_text_weight=hybrid_text_weight,
+        hybrid_candidate_multiplier=hybrid_candidate_multiplier,
+        reranker=reranker,
+        rerank_pool_multiplier=rerank_pool_multiplier,
     )

tribalmemory/services/reranker.py ADDED Viewed

@@ -0,0 +1,267 @@
+"""Result reranking for improved retrieval quality.
+Provides multiple reranking strategies:
+- NoopReranker: Pass-through, no reranking
+- HeuristicReranker: Fast heuristic scoring (recency, tags, length)
+- CrossEncoderReranker: Model-based reranking (sentence-transformers)
+Reranking happens after initial retrieval (vector + BM25) to refine ordering.
+"""
+import logging
+import math
+from datetime import datetime
+from typing import TYPE_CHECKING, Protocol
+from ..interfaces import RecallResult
+if TYPE_CHECKING:
+    from ..server.config import SearchConfig
+logger = logging.getLogger(__name__)
+# Lazy import for optional dependency
+CROSS_ENCODER_AVAILABLE = False
+CrossEncoder = None
+try:
+    from sentence_transformers import CrossEncoder as _CrossEncoder
+    CrossEncoder = _CrossEncoder
+    CROSS_ENCODER_AVAILABLE = True
+except ImportError:
+    pass
+class IReranker(Protocol):
+    """Interface for result reranking."""
+    def rerank(
+        self, query: str, candidates: list[RecallResult], top_k: int
+    ) -> list[RecallResult]:
+        """Rerank candidates and return top_k results.
+        Args:
+            query: Original search query
+            candidates: Initial retrieval results
+            top_k: Number of results to return
+        Returns:
+            Reranked results (up to top_k)
+        """
+        ...
+class NoopReranker:
+    """Pass-through reranker (no reranking)."""
+    def rerank(
+        self, query: str, candidates: list[RecallResult], top_k: int
+    ) -> list[RecallResult]:
+        """Return top_k candidates unchanged."""
+        return candidates[:top_k]
+class HeuristicReranker:
+    """Heuristic reranking with recency, tag match, and length signals.
+    Combines multiple quality signals:
+    - Recency: newer memories score higher (exponential decay)
+    - Tag match: query terms matching tags boost score
+    - Length penalty: very short or very long content penalized
+    Final score: original_score * (1 + boost_sum)
+    """
+    def __init__(
+        self,
+        recency_decay_days: float = 30.0,
+        tag_boost_weight: float = 0.1,
+        min_length: int = 10,
+        max_length: int = 2000,
+        short_penalty: float = 0.05,
+        long_penalty: float = 0.03,
+    ):
+        """Initialize heuristic reranker.
+        Args:
+            recency_decay_days: Half-life for recency boost (days)
+            tag_boost_weight: Weight for tag match boost
+            min_length: Content shorter than this gets penalty
+            max_length: Content longer than this gets penalty
+            short_penalty: Penalty for content shorter than min_length
+            long_penalty: Penalty for content longer than max_length
+        """
+        self.recency_decay_days = recency_decay_days
+        self.tag_boost_weight = tag_boost_weight
+        self.min_length = min_length
+        self.max_length = max_length
+        self.short_penalty = short_penalty
+        self.long_penalty = long_penalty
+    def rerank(
+        self, query: str, candidates: list[RecallResult], top_k: int
+    ) -> list[RecallResult]:
+        """Rerank using heuristic scoring."""
+        if not candidates:
+            return []
+        # Compute boost for each candidate
+        scored = []
+        query_lower = query.lower()
+        query_terms = set(query_lower.split())
+        now = datetime.utcnow()
+        for i, candidate in enumerate(candidates):
+            boost = 0.0
+            # Recency boost (exponential decay)
+            # Brand new memory (age=0) gets boost of 1.0, older memories decay exponentially
+            age_days = (now - candidate.memory.created_at).total_seconds() / 86400
+            recency_boost = math.exp(-age_days / self.recency_decay_days)
+            boost += recency_boost
+            # Tag match boost (exact term matching, not substring)
+            if candidate.memory.tags:
+                tag_lower = set(t.lower() for t in candidate.memory.tags)
+                # Count query terms that exactly match tags
+                matches = sum(1 for term in query_terms if term in tag_lower)
+                if matches > 0:
+                    boost += self.tag_boost_weight * matches
+            # Length penalty
+            content_length = len(candidate.memory.content)
+            if content_length < self.min_length:
+                boost -= self.short_penalty
+            elif content_length > self.max_length:
+                boost -= self.long_penalty
+            # Combine with original score
+            final_score = candidate.similarity_score * (1.0 + boost)
+            scored.append((final_score, i, candidate))
+        # Sort by final score (desc), then original index (preserve order on ties)
+        scored.sort(key=lambda x: (-x[0], x[1]))
+        # Build reranked results with updated scores
+        reranked = []
+        for final_score, _, candidate in scored[:top_k]:
+            reranked.append(
+                RecallResult(
+                    memory=candidate.memory,
+                    similarity_score=final_score,
+                    retrieval_time_ms=candidate.retrieval_time_ms,
+                )
+            )
+        return reranked
+class CrossEncoderReranker:
+    """Cross-encoder model-based reranking.
+    Uses a sentence-transformers cross-encoder to score (query, candidate) pairs.
+    Model scores relevance directly, producing better ranking than retrieval alone.
+    Requires sentence-transformers package.
+    """
+    def __init__(self, model_name: str = "cross-encoder/ms-marco-MiniLM-L-6-v2"):
+        """Initialize cross-encoder reranker.
+        Args:
+            model_name: HuggingFace model name
+        Raises:
+            ImportError: If sentence-transformers not installed
+        """
+        if not CROSS_ENCODER_AVAILABLE:
+            raise ImportError(
+                "sentence-transformers required for CrossEncoderReranker. "
+                "Install with: pip install sentence-transformers"
+            )
+        logger.info(f"Loading cross-encoder model: {model_name}")
+        self.model = CrossEncoder(model_name)
+    def rerank(
+        self, query: str, candidates: list[RecallResult], top_k: int
+    ) -> list[RecallResult]:
+        """Rerank using cross-encoder model."""
+        if not candidates:
+            logger.debug("No candidates to rerank")
+            return []
+        # Build (query, content) pairs
+        pairs = [(query, candidate.memory.content) for candidate in candidates]
+        # Score with model
+        scores = self.model.predict(pairs)
+        # Sort by score descending
+        scored = list(zip(scores, candidates))
+        scored.sort(key=lambda x: -x[0])
+        # Build reranked results with updated scores
+        reranked = []
+        for score, candidate in scored[:top_k]:
+            reranked.append(
+                RecallResult(
+                    memory=candidate.memory,
+                    similarity_score=float(score),
+                    retrieval_time_ms=candidate.retrieval_time_ms,
+                )
+            )
+        return reranked
+def create_reranker(config: "SearchConfig") -> IReranker:
+    """Factory function to create reranker from config.
+    Args:
+        config: SearchConfig with reranking settings
+    Returns:
+        Configured reranker instance
+    Raises:
+        ValueError: If reranking mode is invalid
+        ImportError: If cross-encoder requested but unavailable
+    """
+    mode = getattr(config, "reranking", "heuristic")
+    if mode == "none":
+        return NoopReranker()
+    elif mode == "heuristic":
+        return HeuristicReranker(
+            recency_decay_days=getattr(config, "recency_decay_days", 30.0),
+            tag_boost_weight=getattr(config, "tag_boost_weight", 0.1),
+        )
+    elif mode == "cross-encoder":
+        if not CROSS_ENCODER_AVAILABLE:
+            raise ImportError(
+                "Cross-encoder reranking requires sentence-transformers. "
+                "Install with: pip install sentence-transformers"
+            )
+        return CrossEncoderReranker()
+    elif mode == "auto":
+        # Try cross-encoder, fall back to heuristic
+        if CROSS_ENCODER_AVAILABLE:
+            try:
+                return CrossEncoderReranker()
+            except Exception as e:
+                logger.warning(f"Cross-encoder init failed: {e}. Falling back to heuristic.")
+        return HeuristicReranker(
+            recency_decay_days=getattr(config, "recency_decay_days", 30.0),
+            tag_boost_weight=getattr(config, "tag_boost_weight", 0.1),
+        )
+    else:
+        raise ValueError(
+            f"Unknown reranking mode: {mode}. "
+            f"Valid options: 'none', 'heuristic', 'cross-encoder', 'auto'"
+        )

tribalmemory 0.1.1__py3-none-any.whl → 0.2.0__py3-none-any.whl

tribalmemory 0.1.1py3-none-any.whl → 0.2.0py3-none-any.whl