PyPI - warm-memory - Versions diffs - 0.2.1__py3-none-any.whl - Mend

warm-memory 0.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (16) hide show

warm_memory/__init__.py +14 -0
warm_memory/benchmark.py +219 -0
warm_memory/buffer.py +171 -0
warm_memory/decorators.py +70 -0
warm_memory/langgraph/__init__.py +17 -0
warm_memory/langgraph/agent.py +137 -0
warm_memory/langgraph/benchmark.py +325 -0
warm_memory/langgraph/embeddings.py +94 -0
warm_memory/langgraph/store.py +335 -0
warm_memory/scoring.py +61 -0
warm_memory/workload.py +35 -0
warm_memory-0.2.1.dist-info/METADATA +306 -0
warm_memory-0.2.1.dist-info/RECORD +16 -0
warm_memory-0.2.1.dist-info/WHEEL +5 -0
warm_memory-0.2.1.dist-info/licenses/LICENSE +21 -0
warm_memory-0.2.1.dist-info/top_level.txt +1 -0

warm_memory/__init__.py ADDED Viewed

@@ -0,0 +1,14 @@
+from .buffer import WarmMemoryBuffer
+from .benchmark import BenchmarkConfig, BenchmarkResult, run_benchmark
+from .decorators import remember_interaction
+from .scoring import ImportanceScorer, KeywordImportanceScorer
+__all__ = [
+    "BenchmarkConfig",
+    "BenchmarkResult",
+    "ImportanceScorer",
+    "KeywordImportanceScorer",
+    "WarmMemoryBuffer",
+    "remember_interaction",
+    "run_benchmark",
+]

warm_memory/benchmark.py ADDED Viewed

@@ -0,0 +1,219 @@
+from __future__ import annotations
+from dataclasses import dataclass
+from pathlib import Path
+from time import perf_counter
+from typing import Any
+import pandas as pd
+from .buffer import WarmMemoryBuffer
+from .workload import ScenarioTurn, default_workload
+@dataclass(slots=True)
+class BenchmarkConfig:
+    capacity: int = 8
+    top_k: int = 5
+    long_term_limit: int = 8
+    warm_hit_threshold: float = 0.34
+    long_term_latency_ms: float = 8.0
+    llm_base_latency_ms: float = 35.0
+    llm_latency_per_token_ms: float = 0.12
+    prompt_build_per_item_ms: float = 0.08
+@dataclass(slots=True)
+class BenchmarkResult:
+    name: str
+    turn_log: pd.DataFrame
+    summary: dict[str, float]
+def _estimate_tokens(items: list[str]) -> int:
+    return sum(max(1, len(item.split())) for item in items)
+def _summarize_turn_log(name: str, turn_log: pd.DataFrame) -> BenchmarkResult:
+    summary = {
+        "turns": float(len(turn_log)),
+        "warm_hit_rate": float(turn_log["warm_hit"].mean()),
+        "fallback_rate": float(turn_log["fallback_used"].mean()),
+        "avg_prompt_tokens": float(turn_log["prompt_tokens"].mean()),
+        "avg_end_to_end_ms": float(turn_log["end_to_end_ms"].mean()),
+        "p95_end_to_end_ms": float(turn_log["end_to_end_ms"].quantile(0.95)),
+        "answer_accuracy": float(turn_log["answer_correct"].mean()),
+        "memory_precision_at_k": float(turn_log["memory_precision_at_k"].mean()),
+        "repeated_tool_calls": float(turn_log["repeated_tool_call"].sum()),
+    }
+    return BenchmarkResult(name=name, turn_log=turn_log, summary=summary)
+def _build_prompt_metrics(config: BenchmarkConfig, retrieved_contents: list[str]) -> tuple[int, float]:
+    tokens = _estimate_tokens(retrieved_contents)
+    prompt_build_ms = len(retrieved_contents) * config.prompt_build_per_item_ms
+    llm_ms = config.llm_base_latency_ms + (tokens * config.llm_latency_per_token_ms)
+    return tokens, prompt_build_ms + llm_ms
+def _markdown_table(frame: pd.DataFrame) -> str:
+    columns = list(frame.columns)
+    header = "| " + " | ".join(columns) + " |"
+    divider = "| " + " | ".join(["---"] * len(columns)) + " |"
+    rows = [
+        "| " + " | ".join(str(row[column]) for column in columns) + " |"
+        for _, row in frame.iterrows()
+    ]
+    return "\n".join([header, divider, *rows])
+def _write_memory(memory: WarmMemoryBuffer, turn: ScenarioTurn, response: str) -> None:
+    memory.add("user", turn.query, metadata={"scenario_id": turn.turn_id, "topic": turn.topic})
+    memory.add("assistant", response, metadata={"scenario_id": turn.turn_id, "topic": turn.topic})
+def _retrieve_recent(memory: WarmMemoryBuffer, limit: int) -> pd.DataFrame:
+    recent = memory.recent(limit=limit).copy(deep=True)
+    if not recent.empty:
+        recent["score"] = 1.0
+    return recent
+def _retrieve_relevant(memory: WarmMemoryBuffer, query: str, limit: int) -> pd.DataFrame:
+    return memory.relevant(query=query, limit=limit)
+def _run_strategy(
+    name: str,
+    config: BenchmarkConfig,
+    turns: list[ScenarioTurn],
+) -> BenchmarkResult:
+    warm = WarmMemoryBuffer(capacity=config.capacity)
+    long_term = WarmMemoryBuffer(capacity=max(len(turns) * 2, config.capacity * 4))
+    rows: list[dict[str, Any]] = []
+    resolved_topics: set[str] = set()
+    for turn in turns:
+        lookup_start = perf_counter()
+        if name == "recency":
+            warm_candidates = _retrieve_recent(warm, config.top_k)
+        else:
+            warm_candidates = _retrieve_relevant(warm, turn.query, config.top_k)
+        warm_lookup_ms = (perf_counter() - lookup_start) * 1000
+        best_score = float(warm_candidates["score"].max()) if "score" in warm_candidates.columns and not warm_candidates.empty else 0.0
+        warm_hit = bool(not warm_candidates.empty and (name == "recency" or best_score >= config.warm_hit_threshold))
+        fallback_used = False
+        retrieval_ms = warm_lookup_ms
+        retrieved = warm_candidates
+        if name == "fallback" and not warm_hit:
+            fallback_used = True
+            retrieval_ms += config.long_term_latency_ms
+            retrieved = long_term.relevant(turn.query, limit=config.long_term_limit)
+        retrieved_contents = retrieved["content"].tolist() if not retrieved.empty else []
+        prompt_tokens, generation_ms = _build_prompt_metrics(config, retrieved_contents)
+        end_to_end_ms = retrieval_ms + generation_ms
+        relevant_retrieved = {
+            str(meta.get("topic"))
+            for meta in retrieved["metadata"].tolist()
+            if isinstance(meta, dict) and meta.get("topic")
+        }
+        expected = set(turn.required_topics)
+        memory_precision = len(expected & relevant_retrieved) / max(len(relevant_retrieved), 1) if relevant_retrieved else 0.0
+        answer_correct = expected.issubset(relevant_retrieved)
+        repeated_tool_call = int(turn.topic in resolved_topics and not answer_correct)
+        if answer_correct:
+            resolved_topics.add(turn.topic)
+        response = f"Response for {turn.topic}: {'correct' if answer_correct else 'incomplete'}"
+        _write_memory(warm, turn, response)
+        _write_memory(long_term, turn, response)
+        if name == "relevance":
+            warm.retain_relevant(turn.query, limit=config.capacity)
+        rows.append(
+            {
+                "turn_id": turn.turn_id,
+                "topic": turn.topic,
+                "query": turn.query,
+                "warm_lookup_ms": warm_lookup_ms,
+                "warm_hit": warm_hit,
+                "fallback_used": fallback_used,
+                "retrieval_ms": retrieval_ms,
+                "prompt_tokens": prompt_tokens,
+                "end_to_end_ms": end_to_end_ms,
+                "answer_correct": answer_correct,
+                "memory_precision_at_k": memory_precision,
+                "repeated_tool_call": repeated_tool_call,
+            }
+        )
+    turn_log = pd.DataFrame(rows)
+    return _summarize_turn_log(name, turn_log)
+def _render_report(config: BenchmarkConfig, results: list[BenchmarkResult]) -> str:
+    summary_frame = pd.DataFrame(
+        [
+            {"strategy": result.name, **result.summary}
+            for result in results
+        ]
+    )
+    best_latency = summary_frame.sort_values("avg_end_to_end_ms").iloc[0]["strategy"]
+    best_accuracy = summary_frame.sort_values("answer_accuracy", ascending=False).iloc[0]["strategy"]
+    best_tokens = summary_frame.sort_values("avg_prompt_tokens").iloc[0]["strategy"]
+    lines = [
+        "# WarmMemory Benchmark Report",
+        "",
+        "## Configuration",
+        "",
+        f"- capacity: {config.capacity}",
+        f"- top_k: {config.top_k}",
+        f"- long_term_limit: {config.long_term_limit}",
+        f"- warm_hit_threshold: {config.warm_hit_threshold}",
+        "",
+        "## Summary",
+        "",
+        _markdown_table(summary_frame),
+        "",
+        "## Readout",
+        "",
+        f"- Lowest average latency: `{best_latency}`",
+        f"- Highest answer accuracy: `{best_accuracy}`",
+        f"- Smallest prompt footprint: `{best_tokens}`",
+        "",
+        "## Interpretation",
+        "",
+        "- `recency` shows the baseline cost of always trusting the latest interactions.",
+        "- `relevance` shows the effect of ranking and retaining the hottest working set.",
+        "- `fallback` shows a two-tier memory architecture where long-term retrieval is only used on warm misses.",
+    ]
+    return "\n".join(lines) + "\n"
+def run_benchmark(
+    *,
+    config: BenchmarkConfig | None = None,
+    report_path: str | Path | None = None,
+) -> dict[str, BenchmarkResult]:
+    active_config = config or BenchmarkConfig()
+    turns = default_workload()
+    results = {
+        name: _run_strategy(name, active_config, turns)
+        for name in ("recency", "relevance", "fallback")
+    }
+    if report_path is not None:
+        report_text = _render_report(active_config, list(results.values()))
+        target = Path(report_path)
+        target.parent.mkdir(parents=True, exist_ok=True)
+        target.write_text(report_text, encoding="utf-8")
+    return results

warm_memory/buffer.py ADDED Viewed

@@ -0,0 +1,171 @@
+from __future__ import annotations
+from dataclasses import asdict, dataclass, field
+from datetime import datetime, timezone
+from itertools import count
+from typing import Any
+import pandas as pd
+from .scoring import ImportanceScorer, KeywordImportanceScorer
+@dataclass(slots=True)
+class InteractionRecord:
+    interaction_id: int
+    timestamp: datetime
+    role: str
+    content: str
+    summary: str = ""
+    tags: tuple[str, ...] = field(default_factory=tuple)
+    metadata: dict[str, Any] = field(default_factory=dict)
+class WarmMemoryBuffer:
+    """
+    Pandas-backed in-memory interaction buffer for agent experiments.
+    Two usage modes are supported:
+    - `recent(limit)`: classic sliding-window retrieval.
+    - `relevant(query, limit)`: query-aware retrieval using a pluggable scorer.
+    """
+    COLUMNS = ["interaction_id", "timestamp", "role", "content", "summary", "tags", "metadata"]
+    def __init__(
+        self,
+        capacity: int = 32,
+        scorer: ImportanceScorer | None = None,
+    ) -> None:
+        if capacity <= 0:
+            raise ValueError("capacity must be positive")
+        self.capacity = capacity
+        self.scorer = scorer or KeywordImportanceScorer()
+        self._id_source = count(1)
+        self._frame = pd.DataFrame(columns=self.COLUMNS)
+    @property
+    def frame(self) -> pd.DataFrame:
+        """Return a copy so callers cannot mutate the live buffer implicitly."""
+        return self._frame.copy(deep=True)
+    def __len__(self) -> int:
+        return len(self._frame.index)
+    def add(
+        self,
+        role: str,
+        content: str,
+        *,
+        summary: str = "",
+        tags: list[str] | tuple[str, ...] | None = None,
+        metadata: dict[str, Any] | None = None,
+    ) -> int:
+        record = InteractionRecord(
+            interaction_id=next(self._id_source),
+            timestamp=datetime.now(timezone.utc),
+            role=role,
+            content=content,
+            summary=summary,
+            tags=tuple(tags or ()),
+            metadata=dict(metadata or {}),
+        )
+        row = pd.DataFrame([asdict(record)], columns=self.COLUMNS)
+        self._frame = pd.concat([self._frame, row], ignore_index=True)
+        self._evict_over_capacity()
+        return record.interaction_id
+    def recent(self, limit: int = 5) -> pd.DataFrame:
+        if limit <= 0:
+            return self._frame.head(0).copy(deep=True)
+        ordered = self._frame.sort_values("interaction_id", ascending=False).head(limit)
+        return ordered.sort_values("interaction_id", ascending=True).reset_index(drop=True)
+    def relevant(self, query: str, limit: int = 5) -> pd.DataFrame:
+        if limit <= 0 or self._frame.empty:
+            return self._frame.head(0).copy(deep=True)
+        scored = self._frame.copy(deep=True)
+        scored["score"] = scored.apply(lambda row: self.scorer.score(query, row), axis=1)
+        scored = scored.sort_values(["score", "interaction_id"], ascending=[False, False]).head(limit)
+        return scored.reset_index(drop=True)
+    def context_window(self, query: str | None = None, limit: int = 5) -> pd.DataFrame:
+        """
+        Return either a recent window or a query-aware relevant window.
+        This gives callers a single method for switching between fixed-window and
+        importance-aware retention policies.
+        """
+        if query is None or not query.strip():
+            return self.recent(limit=limit)
+        return self.relevant(query=query, limit=limit)
+    def retain_relevant(self, query: str, limit: int | None = None) -> pd.DataFrame:
+        """
+        Compact the live buffer down to the top relevant rows for a query.
+        This is useful when you want a strict "working set" rather than keeping the
+        most recent interactions by default.
+        """
+        target_size = self.capacity if limit is None else limit
+        if target_size <= 0:
+            self.clear()
+            return self._frame.copy(deep=True)
+        retained = self.relevant(query=query, limit=target_size).sort_values("interaction_id")
+        self._frame = retained[self.COLUMNS].reset_index(drop=True)
+        return self.frame
+    def clear(self) -> None:
+        self._frame = pd.DataFrame(columns=self.COLUMNS)
+    def find_index_by_metadata(self, field: str, value: Any) -> int | None:
+        """
+        Return the integer index of the first row whose `metadata[field]` equals `value`.
+        Used by external mutation paths (e.g., key-based stores layered on top of
+        the buffer) that need to update or delete a specific logical entry without
+        scanning the dataframe themselves.
+        """
+        if self._frame.empty:
+            return None
+        for idx, metadata in enumerate(self._frame["metadata"].tolist()):
+            if isinstance(metadata, dict) and metadata.get(field) == value:
+                return idx
+        return None
+    def drop_at(self, index: int) -> None:
+        """
+        Remove the row at the given positional index.
+        Pairs with `find_index_by_metadata` to give external code a clean mutation
+        path without reaching into private dataframe internals.
+        """
+        if index < 0 or index >= len(self._frame.index):
+            return
+        self._frame = self._frame.drop(self._frame.index[index]).reset_index(drop=True)
+    def iter_rows(self) -> list[dict[str, Any]]:
+        """
+        Return a list of row dicts in insertion order. Use this for read-only
+        scans (e.g., search) instead of poking at the live dataframe.
+        """
+        return self._frame.to_dict("records")
+    def _evict_over_capacity(self) -> None:
+        overflow = len(self._frame.index) - self.capacity
+        if overflow <= 0:
+            return
+        self._frame = (
+            self._frame.sort_values("interaction_id", ascending=False)
+            .head(self.capacity)
+            .sort_values("interaction_id", ascending=True)
+            .reset_index(drop=True)
+        )

warm_memory/decorators.py ADDED Viewed

@@ -0,0 +1,70 @@
+from __future__ import annotations
+from functools import wraps
+import inspect
+from typing import Any, Callable
+from .buffer import WarmMemoryBuffer
+def _stringify_payload(payload: Any) -> str:
+    if payload is None:
+        return ""
+    if isinstance(payload, str):
+        return payload
+    return repr(payload)
+def remember_interaction(
+    memory: WarmMemoryBuffer,
+    *,
+    input_role: str = "user",
+    output_role: str = "assistant",
+    input_extractor: Callable[[tuple[Any, ...], dict[str, Any]], Any] | None = None,
+    output_extractor: Callable[[Any], Any] | None = None,
+    metadata_factory: Callable[[tuple[Any, ...], dict[str, Any], Any], dict[str, Any]] | None = None,
+) -> Callable[[Callable[..., Any]], Callable[..., Any]]:
+    """
+    Decorate an agent-like function and persist input/output rows in warm memory.
+    Defaults:
+    - input is derived from the first non-memory positional argument or `prompt` kwarg
+    - output is stored as the function return value
+    """
+    def decorator(func: Callable[..., Any]) -> Callable[..., Any]:
+        signature = inspect.signature(func)
+        def default_input_extractor(args: tuple[Any, ...], kwargs: dict[str, Any]) -> Any:
+            bound = signature.bind_partial(*args, **kwargs)
+            if "prompt" in bound.arguments:
+                return bound.arguments["prompt"]
+            if bound.arguments:
+                first_name = next(iter(bound.arguments))
+                return bound.arguments[first_name]
+            return None
+        @wraps(func)
+        def wrapper(*args: Any, **kwargs: Any) -> Any:
+            raw_input = (
+                input_extractor(args, kwargs)
+                if input_extractor is not None
+                else default_input_extractor(args, kwargs)
+            )
+            result = func(*args, **kwargs)
+            raw_output = output_extractor(result) if output_extractor is not None else result
+            metadata = (
+                metadata_factory(args, kwargs, result)
+                if metadata_factory is not None
+                else {"function": func.__name__}
+            )
+            memory.add(role=input_role, content=_stringify_payload(raw_input), metadata=metadata)
+            memory.add(role=output_role, content=_stringify_payload(raw_output), metadata=metadata)
+            return result
+        return wrapper
+    return decorator

warm_memory/langgraph/__init__.py ADDED Viewed

@@ -0,0 +1,17 @@
+"""
+LangGraph integration for WarmMemory.
+Install with the optional extra:
+    pip install WarmMemory[langgraph]
+"""
+from .agent import build_warm_memory_agent
+from .embeddings import EmbeddingsImportanceScorer
+from .store import WarmStore
+__all__ = [
+    "WarmStore",
+    "EmbeddingsImportanceScorer",
+    "build_warm_memory_agent",
+]

warm_memory/langgraph/agent.py ADDED Viewed

@@ -0,0 +1,137 @@
+"""
+Reusable LangGraph agent builder that wires WarmStore into the request loop.
+Pre-call:  search the user's namespace for relevant memories and inject them
+           into the system message.
+Post-call: write the new (user, assistant) exchange back into the warm store.
+Works fully synthetic out of the box (FakeListChatModel + KeywordImportanceScorer).
+Drop in a real chat model and/or EmbeddingsImportanceScorer to turn it into a
+production agent.
+"""
+from __future__ import annotations
+from typing import Any, Callable, TypedDict
+from langchain_core.language_models import BaseChatModel
+from langchain_core.messages import AIMessage, HumanMessage, SystemMessage
+from langgraph.graph import END, START, StateGraph
+from .store import WarmStore
+def _flatten_message_content(content: Any) -> str:
+    """
+    Normalize an AIMessage.content value to a plain string.
+    Newer LangChain chat models can return `content` as a list of content
+    blocks (e.g., `[{"type": "text", "text": "..."}, ...]`) instead of a
+    plain string. Storing the list repr in warm memory is useless for
+    keyword/embedding scoring, so flatten to text here.
+    """
+    if isinstance(content, str):
+        return content
+    if isinstance(content, list):
+        parts: list[str] = []
+        for block in content:
+            if isinstance(block, str):
+                parts.append(block)
+            elif isinstance(block, dict):
+                if block.get("type") == "text" and isinstance(block.get("text"), str):
+                    parts.append(block["text"])
+                elif isinstance(block.get("text"), str):
+                    parts.append(block["text"])
+        return "\n".join(parts)
+    return str(content)
+class WarmAgentState(TypedDict, total=False):
+    query: str
+    namespace: tuple[str, ...]
+    recalled: list[dict[str, Any]]
+    response: str
+_DEFAULT_SYSTEM = (
+    "You are a helpful assistant with access to warm memory of prior exchanges "
+    "with this user. Use the recalled context if relevant; ignore it if not."
+)
+def _format_recalled(recalled: list[dict[str, Any]]) -> str:
+    if not recalled:
+        return "(no prior context)"
+    lines = []
+    for entry in recalled:
+        key = entry.get("key", "?")
+        value = entry.get("value", {})
+        lines.append(f"- [{key}] {value}")
+    return "\n".join(lines)
+def build_warm_memory_agent(
+    *,
+    model: BaseChatModel,
+    store: WarmStore,
+    recall_limit: int = 5,
+    system_prompt: str = _DEFAULT_SYSTEM,
+    namespace_default: tuple[str, ...] = ("default",),
+) -> Callable[[dict[str, Any]], dict[str, Any]]:
+    """
+    Build a compiled LangGraph agent that uses `store` as warm memory.
+    Returns a compiled graph. Invoke it with:
+        agent.invoke({"query": "...", "namespace": ("alice",)})
+    """
+    def memory_lookup(state: WarmAgentState) -> dict[str, Any]:
+        namespace = state.get("namespace") or namespace_default
+        query = state.get("query", "")
+        if not query:
+            return {"recalled": []}
+        hits = store.search(namespace, query=query, limit=recall_limit)
+        return {
+            "recalled": [
+                {"key": h.key, "value": h.value, "score": h.score} for h in hits
+            ],
+            "namespace": namespace,
+        }
+    def respond(state: WarmAgentState) -> dict[str, Any]:
+        recalled = state.get("recalled", []) or []
+        memory_block = _format_recalled(recalled)
+        messages = [
+            SystemMessage(content=f"{system_prompt}\n\nRecalled memory:\n{memory_block}"),
+            HumanMessage(content=state.get("query", "")),
+        ]
+        ai_message = model.invoke(messages)
+        raw_content = ai_message.content if isinstance(ai_message, AIMessage) else ai_message
+        text = _flatten_message_content(raw_content)
+        return {"response": text}
+    def memory_write(state: WarmAgentState) -> dict[str, Any]:
+        namespace = state.get("namespace") or namespace_default
+        query = state.get("query", "")
+        response = state.get("response", "")
+        if not query and not response:
+            return {}
+        next_key = store.next_key(namespace, prefix="exchange-")
+        store.put(
+            namespace,
+            next_key,
+            {"user": query, "assistant": response},
+        )
+        return {}
+    graph = StateGraph(WarmAgentState)
+    graph.add_node("memory_lookup", memory_lookup)
+    graph.add_node("respond", respond)
+    graph.add_node("memory_write", memory_write)
+    graph.add_edge(START, "memory_lookup")
+    graph.add_edge("memory_lookup", "respond")
+    graph.add_edge("respond", "memory_write")
+    graph.add_edge("memory_write", END)
+    return graph.compile()