warm-memory 0.2.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- warm_memory-0.2.1/LICENSE +21 -0
- warm_memory-0.2.1/PKG-INFO +306 -0
- warm_memory-0.2.1/README.md +272 -0
- warm_memory-0.2.1/pyproject.toml +62 -0
- warm_memory-0.2.1/setup.cfg +4 -0
- warm_memory-0.2.1/tests/test_benchmark.py +28 -0
- warm_memory-0.2.1/tests/test_langgraph_agent_and_benchmark.py +61 -0
- warm_memory-0.2.1/tests/test_langgraph_store.py +298 -0
- warm_memory-0.2.1/tests/test_memory.py +68 -0
- warm_memory-0.2.1/warm_memory/__init__.py +14 -0
- warm_memory-0.2.1/warm_memory/benchmark.py +219 -0
- warm_memory-0.2.1/warm_memory/buffer.py +171 -0
- warm_memory-0.2.1/warm_memory/decorators.py +70 -0
- warm_memory-0.2.1/warm_memory/langgraph/__init__.py +17 -0
- warm_memory-0.2.1/warm_memory/langgraph/agent.py +137 -0
- warm_memory-0.2.1/warm_memory/langgraph/benchmark.py +325 -0
- warm_memory-0.2.1/warm_memory/langgraph/embeddings.py +94 -0
- warm_memory-0.2.1/warm_memory/langgraph/store.py +335 -0
- warm_memory-0.2.1/warm_memory/scoring.py +61 -0
- warm_memory-0.2.1/warm_memory/workload.py +35 -0
- warm_memory-0.2.1/warm_memory.egg-info/PKG-INFO +306 -0
- warm_memory-0.2.1/warm_memory.egg-info/SOURCES.txt +23 -0
- warm_memory-0.2.1/warm_memory.egg-info/dependency_links.txt +1 -0
- warm_memory-0.2.1/warm_memory.egg-info/requires.txt +9 -0
- warm_memory-0.2.1/warm_memory.egg-info/top_level.txt +1 -0
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,306 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: warm-memory
|
|
3
|
+
Version: 0.2.1
|
|
4
|
+
Summary: Capacity-bounded warm memory for LLM agents, with a LangGraph BaseStore implementation, embeddings-based importance scoring, and a comparative benchmark.
|
|
5
|
+
Author: Vivek Singh
|
|
6
|
+
License-Expression: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/vsingh45/WarmMemory
|
|
8
|
+
Project-URL: Repository, https://github.com/vsingh45/WarmMemory
|
|
9
|
+
Project-URL: Issues, https://github.com/vsingh45/WarmMemory/issues
|
|
10
|
+
Project-URL: Changelog, https://github.com/vsingh45/WarmMemory/blob/main/CHANGELOG.md
|
|
11
|
+
Keywords: llm,agent,memory,langgraph,langchain,basestore,retrieval,rag,vector-store,short-term-memory,benchmark
|
|
12
|
+
Classifier: Development Status :: 4 - Beta
|
|
13
|
+
Classifier: Intended Audience :: Developers
|
|
14
|
+
Classifier: Intended Audience :: Science/Research
|
|
15
|
+
Classifier: Operating System :: OS Independent
|
|
16
|
+
Classifier: Programming Language :: Python :: 3
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
20
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
21
|
+
Classifier: Topic :: Software Development :: Libraries
|
|
22
|
+
Classifier: Typing :: Typed
|
|
23
|
+
Requires-Python: >=3.11
|
|
24
|
+
Description-Content-Type: text/markdown
|
|
25
|
+
License-File: LICENSE
|
|
26
|
+
Requires-Dist: pandas<3.0,>=2.2.0
|
|
27
|
+
Provides-Extra: langgraph
|
|
28
|
+
Requires-Dist: langgraph<2.0,>=1.0; extra == "langgraph"
|
|
29
|
+
Requires-Dist: langchain-core<2.0,>=1.0; extra == "langgraph"
|
|
30
|
+
Provides-Extra: dev
|
|
31
|
+
Requires-Dist: build>=1.2; extra == "dev"
|
|
32
|
+
Requires-Dist: twine>=5.0; extra == "dev"
|
|
33
|
+
Dynamic: license-file
|
|
34
|
+
|
|
35
|
+
# WarmMemory
|
|
36
|
+
|
|
37
|
+
[](https://github.com/vsingh45/WarmMemory/actions/workflows/ci.yml)
|
|
38
|
+
[](https://www.python.org/)
|
|
39
|
+
[](https://opensource.org/licenses/MIT)
|
|
40
|
+
[](https://langchain-ai.github.io/langgraph/)
|
|
41
|
+
|
|
42
|
+
WarmMemory is a Python package for short-term memory management in LLM agents.
|
|
43
|
+
It adds a small in-process working-memory layer that keeps the most recent or most
|
|
44
|
+
relevant interactions close to the agent, reducing repeated retrieval work and
|
|
45
|
+
helping control prompt growth.
|
|
46
|
+
|
|
47
|
+
The repository provides:
|
|
48
|
+
|
|
49
|
+
- a reusable Python package for warm-memory buffering,
|
|
50
|
+
- a decorator for automatic interaction capture,
|
|
51
|
+
- a pluggable importance scoring interface,
|
|
52
|
+
- a deterministic benchmark for recency vs relevance vs fallback memory policies,
|
|
53
|
+
- a LangGraph `BaseStore` integration with per-namespace eviction, embeddings-based
|
|
54
|
+
ranking, and a pre-built agent,
|
|
55
|
+
- HTML documentation for architecture and usage.
|
|
56
|
+
|
|
57
|
+
## Why This Exists
|
|
58
|
+
|
|
59
|
+
Many agent systems use one of two expensive patterns:
|
|
60
|
+
|
|
61
|
+
- they keep appending conversation history to the prompt,
|
|
62
|
+
- or they query long-term memory on nearly every turn.
|
|
63
|
+
|
|
64
|
+
Both increase latency and cost. WarmMemory introduces a hot path:
|
|
65
|
+
|
|
66
|
+
- keep a small working set in RAM,
|
|
67
|
+
- retrieve from that working set first,
|
|
68
|
+
- fall back to longer-term retrieval only when needed,
|
|
69
|
+
- and send only a compact context window to the model.
|
|
70
|
+
|
|
71
|
+
## Core Ideas
|
|
72
|
+
|
|
73
|
+
### 1. Sliding-Window Memory
|
|
74
|
+
|
|
75
|
+
The system can keep the last `N` interactions using `recent(k)`.
|
|
76
|
+
|
|
77
|
+
### 2. Relevance-Aware Memory
|
|
78
|
+
|
|
79
|
+
Instead of only keeping the latest messages, the system can rank rows against the
|
|
80
|
+
current query using `relevant(query, k)` and compact the active working set with
|
|
81
|
+
`retain_relevant(query, k)`.
|
|
82
|
+
|
|
83
|
+
### 3. Automatic Agent Capture
|
|
84
|
+
|
|
85
|
+
The `@remember_interaction` decorator records agent inputs and outputs without forcing
|
|
86
|
+
changes into the core agent logic.
|
|
87
|
+
|
|
88
|
+
### 4. Two-Tier Memory Architecture
|
|
89
|
+
|
|
90
|
+
The benchmark models a practical split:
|
|
91
|
+
|
|
92
|
+
- warm memory for fast in-process access,
|
|
93
|
+
- long-term memory for slower fallback retrieval.
|
|
94
|
+
|
|
95
|
+
## Repository Layout
|
|
96
|
+
|
|
97
|
+
- `warm_memory/`: package source code
|
|
98
|
+
- `warm_memory/buffer.py`: Pandas-backed warm-memory store
|
|
99
|
+
- `warm_memory/scoring.py`: scoring interface and default heuristic scorer
|
|
100
|
+
- `warm_memory/decorators.py`: function decorator for interaction capture
|
|
101
|
+
- `warm_memory/benchmark.py`: deterministic benchmark harness
|
|
102
|
+
- `warm_memory/workload.py`: synthetic workload for evaluation
|
|
103
|
+
- `warm_memory/langgraph/`: LangGraph integration (optional extra)
|
|
104
|
+
- `store.py`: `WarmStore(BaseStore)` with per-namespace eviction
|
|
105
|
+
- `embeddings.py`: bring-your-own embeddings scorer
|
|
106
|
+
- `agent.py`: pre-built `build_warm_memory_agent` graph
|
|
107
|
+
- `benchmark.py`: full-history vs vector-only vs warm-fallback benchmark
|
|
108
|
+
- `examples/langgraph_warm_agent.py`: runnable LangGraph agent example
|
|
109
|
+
- `scripts/run_benchmark.py`: legacy benchmark entrypoint
|
|
110
|
+
- `scripts/run_langgraph_benchmark.py`: LangGraph-based benchmark entrypoint
|
|
111
|
+
- `reports/warm_memory_benchmark.md`: legacy benchmark output
|
|
112
|
+
- `reports/warm_memory_langgraph_benchmark.md`: LangGraph benchmark output
|
|
113
|
+
- `docs/warm_memory_guide.html`: public-facing HTML documentation
|
|
114
|
+
- `tests/`: unit tests
|
|
115
|
+
|
|
116
|
+
## Installation
|
|
117
|
+
|
|
118
|
+
```bash
|
|
119
|
+
python3 -m pip install -e .
|
|
120
|
+
```
|
|
121
|
+
|
|
122
|
+
## Quick Start
|
|
123
|
+
|
|
124
|
+
```python
|
|
125
|
+
from warm_memory import WarmMemoryBuffer, remember_interaction
|
|
126
|
+
|
|
127
|
+
memory = WarmMemoryBuffer(capacity=8)
|
|
128
|
+
|
|
129
|
+
@remember_interaction(memory)
|
|
130
|
+
def agent(prompt: str) -> str:
|
|
131
|
+
if "billing" in prompt.lower():
|
|
132
|
+
return "Your invoice is available in the billing portal."
|
|
133
|
+
return f"Echo: {prompt}"
|
|
134
|
+
|
|
135
|
+
agent("How do I reset my password?")
|
|
136
|
+
agent("Where is my billing invoice?")
|
|
137
|
+
|
|
138
|
+
recent_rows = memory.recent(4)
|
|
139
|
+
relevant_rows = memory.relevant("invoice", limit=2)
|
|
140
|
+
memory.retain_relevant("invoice", limit=4)
|
|
141
|
+
```
|
|
142
|
+
|
|
143
|
+
## Example Usage Pattern
|
|
144
|
+
|
|
145
|
+
Use WarmMemory in front of a larger memory system:
|
|
146
|
+
|
|
147
|
+
1. Receive a new user query.
|
|
148
|
+
2. Search the warm buffer first.
|
|
149
|
+
3. If warm memory is sufficient, build a compact prompt from those rows.
|
|
150
|
+
4. If warm memory is weak, fall back to long-term retrieval.
|
|
151
|
+
5. Write the new interaction back into warm memory.
|
|
152
|
+
|
|
153
|
+
This pattern is useful for:
|
|
154
|
+
|
|
155
|
+
- coding agents,
|
|
156
|
+
- research assistants,
|
|
157
|
+
- task-oriented copilots,
|
|
158
|
+
- customer support agents,
|
|
159
|
+
- and any multi-turn system with repeated local context.
|
|
160
|
+
|
|
161
|
+
## Benchmark
|
|
162
|
+
|
|
163
|
+
The repository includes a deterministic benchmark that compares:
|
|
164
|
+
|
|
165
|
+
- `recency`: always use the latest warm-memory rows,
|
|
166
|
+
- `relevance`: rank and retain the top relevant warm-memory rows,
|
|
167
|
+
- `fallback`: use warm relevance first, then long-term retrieval on misses.
|
|
168
|
+
|
|
169
|
+
Run it with:
|
|
170
|
+
|
|
171
|
+
```bash
|
|
172
|
+
python3 scripts/run_benchmark.py
|
|
173
|
+
```
|
|
174
|
+
|
|
175
|
+
This writes a report to `reports/warm_memory_benchmark.md`.
|
|
176
|
+
|
|
177
|
+
On the current synthetic workload, the tradeoff looks like this:
|
|
178
|
+
|
|
179
|
+
- `recency` is the fastest policy,
|
|
180
|
+
- `fallback` is the most accurate policy,
|
|
181
|
+
- `relevance` sits between the two and provides a cleaner hot working set.
|
|
182
|
+
|
|
183
|
+
The benchmark is designed to surface that tradeoff rather than name a single
|
|
184
|
+
winner: each policy occupies a different point on the latency-accuracy curve.
|
|
185
|
+
|
|
186
|
+
## Documentation
|
|
187
|
+
|
|
188
|
+
- HTML guide: `docs/warm_memory_guide.html`
|
|
189
|
+
- Benchmark report: `reports/warm_memory_benchmark.md`
|
|
190
|
+
- README visual: `docs/warm_memory_architecture.svg`
|
|
191
|
+
|
|
192
|
+
The HTML guide explains:
|
|
193
|
+
|
|
194
|
+
- how the architecture works,
|
|
195
|
+
- where latency is saved,
|
|
196
|
+
- how to use the package,
|
|
197
|
+
- and how the components fit together.
|
|
198
|
+
|
|
199
|
+
## Architecture Preview
|
|
200
|
+
|
|
201
|
+

|
|
202
|
+
|
|
203
|
+
For a richer visual walkthrough, open `docs/warm_memory_guide.html` locally or publish it with GitHub Pages.
|
|
204
|
+
|
|
205
|
+
## Development
|
|
206
|
+
|
|
207
|
+
Run tests:
|
|
208
|
+
|
|
209
|
+
```bash
|
|
210
|
+
python3 -m unittest discover -s tests -v
|
|
211
|
+
```
|
|
212
|
+
|
|
213
|
+
## LangGraph Integration
|
|
214
|
+
|
|
215
|
+
WarmMemory ships an optional `warm_memory.langgraph` module that plugs directly
|
|
216
|
+
into the LangGraph ecosystem. Install the extra:
|
|
217
|
+
|
|
218
|
+
```bash
|
|
219
|
+
python3 -m pip install -e ".[langgraph]"
|
|
220
|
+
```
|
|
221
|
+
|
|
222
|
+
### Drop-in `BaseStore`
|
|
223
|
+
|
|
224
|
+
`WarmStore` implements LangGraph's `BaseStore` interface with **per-namespace
|
|
225
|
+
warm buffers** — each namespace gets its own bounded buffer, so multi-tenant
|
|
226
|
+
agents don't evict each other's memory.
|
|
227
|
+
|
|
228
|
+
```python
|
|
229
|
+
from warm_memory.langgraph import WarmStore
|
|
230
|
+
|
|
231
|
+
store = WarmStore(capacity=16)
|
|
232
|
+
store.put(("alice",), "preferences", {"text": "wants concise answers"})
|
|
233
|
+
store.put(("alice",), "billing", {"text": "invoice overdue", "topic": "billing"})
|
|
234
|
+
|
|
235
|
+
# query-based recall (keyword scorer by default)
|
|
236
|
+
hits = store.search(("alice",), query="how do I pay my invoice?")
|
|
237
|
+
|
|
238
|
+
# filter operators: $eq, $ne, $gt, $gte, $lt, $lte
|
|
239
|
+
billing = store.search(("alice",), filter={"topic": "billing"})
|
|
240
|
+
```
|
|
241
|
+
|
|
242
|
+
### Bring-your-own embeddings
|
|
243
|
+
|
|
244
|
+
Swap the default keyword scorer for any LangChain `Embeddings`:
|
|
245
|
+
|
|
246
|
+
```python
|
|
247
|
+
from langchain_openai import OpenAIEmbeddings
|
|
248
|
+
from warm_memory.langgraph import EmbeddingsImportanceScorer, WarmStore
|
|
249
|
+
|
|
250
|
+
scorer = EmbeddingsImportanceScorer(OpenAIEmbeddings())
|
|
251
|
+
store = WarmStore(scorer=scorer)
|
|
252
|
+
```
|
|
253
|
+
|
|
254
|
+
Works with any LangChain embeddings provider — OpenAI, HuggingFace, Voyage,
|
|
255
|
+
Anthropic — or `DeterministicFakeEmbedding` for tests.
|
|
256
|
+
|
|
257
|
+
### Pre-built agent
|
|
258
|
+
|
|
259
|
+
`build_warm_memory_agent` returns a compiled LangGraph that reads warm memory
|
|
260
|
+
before responding and writes the new exchange back on the way out:
|
|
261
|
+
|
|
262
|
+
```python
|
|
263
|
+
from warm_memory.langgraph import WarmStore, build_warm_memory_agent
|
|
264
|
+
|
|
265
|
+
store = WarmStore(capacity=8)
|
|
266
|
+
agent = build_warm_memory_agent(model=my_chat_model, store=store)
|
|
267
|
+
agent.invoke({"query": "Where's my invoice?", "namespace": ("alice",)})
|
|
268
|
+
```
|
|
269
|
+
|
|
270
|
+
A runnable example using `FakeListChatModel` (no API keys) lives at
|
|
271
|
+
`examples/langgraph_warm_agent.py`.
|
|
272
|
+
|
|
273
|
+
### Comparative benchmark
|
|
274
|
+
|
|
275
|
+
`scripts/run_langgraph_benchmark.py` compares three retrieval strategies through
|
|
276
|
+
the LangGraph store API:
|
|
277
|
+
|
|
278
|
+
- `full-history`: every prior turn in the prompt (naive baseline)
|
|
279
|
+
- `vector-only`: LangGraph's `InMemoryStore` with an embedding index
|
|
280
|
+
- `warm-fallback`: `WarmStore` in front of the vector store
|
|
281
|
+
|
|
282
|
+
```bash
|
|
283
|
+
python3 scripts/run_langgraph_benchmark.py
|
|
284
|
+
```
|
|
285
|
+
|
|
286
|
+
This writes `reports/warm_memory_langgraph_benchmark.md`. Run it with synthetic
|
|
287
|
+
embeddings by default; set `WARM_BENCH_EMBEDDINGS=openai` (and `OPENAI_API_KEY`)
|
|
288
|
+
to compare against real semantic search.
|
|
289
|
+
|
|
290
|
+
## Roadmap
|
|
291
|
+
|
|
292
|
+
- ~~add an embedding-based or reranker-based importance scorer~~ (done via
|
|
293
|
+
`EmbeddingsImportanceScorer`)
|
|
294
|
+
- ~~compare against vector-store-first baselines~~ (done via
|
|
295
|
+
`warm-fallback` strategy in the LangGraph benchmark)
|
|
296
|
+
- benchmark against real agent traces instead of only synthetic workloads
|
|
297
|
+
- record actual model latency and token usage from a live LLM pipeline
|
|
298
|
+
- add charts and experiment summaries for publication-style reporting
|
|
299
|
+
- TTL support for the LangGraph `BaseStore`
|
|
300
|
+
- publish `warm-memory` to PyPI and propose inclusion in LangGraph's third-party
|
|
301
|
+
store list
|
|
302
|
+
|
|
303
|
+
## License
|
|
304
|
+
|
|
305
|
+
This project is released under the MIT License. See `LICENSE`.
|
|
306
|
+
|
|
@@ -0,0 +1,272 @@
|
|
|
1
|
+
# WarmMemory
|
|
2
|
+
|
|
3
|
+
[](https://github.com/vsingh45/WarmMemory/actions/workflows/ci.yml)
|
|
4
|
+
[](https://www.python.org/)
|
|
5
|
+
[](https://opensource.org/licenses/MIT)
|
|
6
|
+
[](https://langchain-ai.github.io/langgraph/)
|
|
7
|
+
|
|
8
|
+
WarmMemory is a Python package for short-term memory management in LLM agents.
|
|
9
|
+
It adds a small in-process working-memory layer that keeps the most recent or most
|
|
10
|
+
relevant interactions close to the agent, reducing repeated retrieval work and
|
|
11
|
+
helping control prompt growth.
|
|
12
|
+
|
|
13
|
+
The repository provides:
|
|
14
|
+
|
|
15
|
+
- a reusable Python package for warm-memory buffering,
|
|
16
|
+
- a decorator for automatic interaction capture,
|
|
17
|
+
- a pluggable importance scoring interface,
|
|
18
|
+
- a deterministic benchmark for recency vs relevance vs fallback memory policies,
|
|
19
|
+
- a LangGraph `BaseStore` integration with per-namespace eviction, embeddings-based
|
|
20
|
+
ranking, and a pre-built agent,
|
|
21
|
+
- HTML documentation for architecture and usage.
|
|
22
|
+
|
|
23
|
+
## Why This Exists
|
|
24
|
+
|
|
25
|
+
Many agent systems use one of two expensive patterns:
|
|
26
|
+
|
|
27
|
+
- they keep appending conversation history to the prompt,
|
|
28
|
+
- or they query long-term memory on nearly every turn.
|
|
29
|
+
|
|
30
|
+
Both increase latency and cost. WarmMemory introduces a hot path:
|
|
31
|
+
|
|
32
|
+
- keep a small working set in RAM,
|
|
33
|
+
- retrieve from that working set first,
|
|
34
|
+
- fall back to longer-term retrieval only when needed,
|
|
35
|
+
- and send only a compact context window to the model.
|
|
36
|
+
|
|
37
|
+
## Core Ideas
|
|
38
|
+
|
|
39
|
+
### 1. Sliding-Window Memory
|
|
40
|
+
|
|
41
|
+
The system can keep the last `N` interactions using `recent(k)`.
|
|
42
|
+
|
|
43
|
+
### 2. Relevance-Aware Memory
|
|
44
|
+
|
|
45
|
+
Instead of only keeping the latest messages, the system can rank rows against the
|
|
46
|
+
current query using `relevant(query, k)` and compact the active working set with
|
|
47
|
+
`retain_relevant(query, k)`.
|
|
48
|
+
|
|
49
|
+
### 3. Automatic Agent Capture
|
|
50
|
+
|
|
51
|
+
The `@remember_interaction` decorator records agent inputs and outputs without forcing
|
|
52
|
+
changes into the core agent logic.
|
|
53
|
+
|
|
54
|
+
### 4. Two-Tier Memory Architecture
|
|
55
|
+
|
|
56
|
+
The benchmark models a practical split:
|
|
57
|
+
|
|
58
|
+
- warm memory for fast in-process access,
|
|
59
|
+
- long-term memory for slower fallback retrieval.
|
|
60
|
+
|
|
61
|
+
## Repository Layout
|
|
62
|
+
|
|
63
|
+
- `warm_memory/`: package source code
|
|
64
|
+
- `warm_memory/buffer.py`: Pandas-backed warm-memory store
|
|
65
|
+
- `warm_memory/scoring.py`: scoring interface and default heuristic scorer
|
|
66
|
+
- `warm_memory/decorators.py`: function decorator for interaction capture
|
|
67
|
+
- `warm_memory/benchmark.py`: deterministic benchmark harness
|
|
68
|
+
- `warm_memory/workload.py`: synthetic workload for evaluation
|
|
69
|
+
- `warm_memory/langgraph/`: LangGraph integration (optional extra)
|
|
70
|
+
- `store.py`: `WarmStore(BaseStore)` with per-namespace eviction
|
|
71
|
+
- `embeddings.py`: bring-your-own embeddings scorer
|
|
72
|
+
- `agent.py`: pre-built `build_warm_memory_agent` graph
|
|
73
|
+
- `benchmark.py`: full-history vs vector-only vs warm-fallback benchmark
|
|
74
|
+
- `examples/langgraph_warm_agent.py`: runnable LangGraph agent example
|
|
75
|
+
- `scripts/run_benchmark.py`: legacy benchmark entrypoint
|
|
76
|
+
- `scripts/run_langgraph_benchmark.py`: LangGraph-based benchmark entrypoint
|
|
77
|
+
- `reports/warm_memory_benchmark.md`: legacy benchmark output
|
|
78
|
+
- `reports/warm_memory_langgraph_benchmark.md`: LangGraph benchmark output
|
|
79
|
+
- `docs/warm_memory_guide.html`: public-facing HTML documentation
|
|
80
|
+
- `tests/`: unit tests
|
|
81
|
+
|
|
82
|
+
## Installation
|
|
83
|
+
|
|
84
|
+
```bash
|
|
85
|
+
python3 -m pip install -e .
|
|
86
|
+
```
|
|
87
|
+
|
|
88
|
+
## Quick Start
|
|
89
|
+
|
|
90
|
+
```python
|
|
91
|
+
from warm_memory import WarmMemoryBuffer, remember_interaction
|
|
92
|
+
|
|
93
|
+
memory = WarmMemoryBuffer(capacity=8)
|
|
94
|
+
|
|
95
|
+
@remember_interaction(memory)
|
|
96
|
+
def agent(prompt: str) -> str:
|
|
97
|
+
if "billing" in prompt.lower():
|
|
98
|
+
return "Your invoice is available in the billing portal."
|
|
99
|
+
return f"Echo: {prompt}"
|
|
100
|
+
|
|
101
|
+
agent("How do I reset my password?")
|
|
102
|
+
agent("Where is my billing invoice?")
|
|
103
|
+
|
|
104
|
+
recent_rows = memory.recent(4)
|
|
105
|
+
relevant_rows = memory.relevant("invoice", limit=2)
|
|
106
|
+
memory.retain_relevant("invoice", limit=4)
|
|
107
|
+
```
|
|
108
|
+
|
|
109
|
+
## Example Usage Pattern
|
|
110
|
+
|
|
111
|
+
Use WarmMemory in front of a larger memory system:
|
|
112
|
+
|
|
113
|
+
1. Receive a new user query.
|
|
114
|
+
2. Search the warm buffer first.
|
|
115
|
+
3. If warm memory is sufficient, build a compact prompt from those rows.
|
|
116
|
+
4. If warm memory is weak, fall back to long-term retrieval.
|
|
117
|
+
5. Write the new interaction back into warm memory.
|
|
118
|
+
|
|
119
|
+
This pattern is useful for:
|
|
120
|
+
|
|
121
|
+
- coding agents,
|
|
122
|
+
- research assistants,
|
|
123
|
+
- task-oriented copilots,
|
|
124
|
+
- customer support agents,
|
|
125
|
+
- and any multi-turn system with repeated local context.
|
|
126
|
+
|
|
127
|
+
## Benchmark
|
|
128
|
+
|
|
129
|
+
The repository includes a deterministic benchmark that compares:
|
|
130
|
+
|
|
131
|
+
- `recency`: always use the latest warm-memory rows,
|
|
132
|
+
- `relevance`: rank and retain the top relevant warm-memory rows,
|
|
133
|
+
- `fallback`: use warm relevance first, then long-term retrieval on misses.
|
|
134
|
+
|
|
135
|
+
Run it with:
|
|
136
|
+
|
|
137
|
+
```bash
|
|
138
|
+
python3 scripts/run_benchmark.py
|
|
139
|
+
```
|
|
140
|
+
|
|
141
|
+
This writes a report to `reports/warm_memory_benchmark.md`.
|
|
142
|
+
|
|
143
|
+
On the current synthetic workload, the tradeoff looks like this:
|
|
144
|
+
|
|
145
|
+
- `recency` is the fastest policy,
|
|
146
|
+
- `fallback` is the most accurate policy,
|
|
147
|
+
- `relevance` sits between the two and provides a cleaner hot working set.
|
|
148
|
+
|
|
149
|
+
The benchmark is designed to surface that tradeoff rather than name a single
|
|
150
|
+
winner: each policy occupies a different point on the latency-accuracy curve.
|
|
151
|
+
|
|
152
|
+
## Documentation
|
|
153
|
+
|
|
154
|
+
- HTML guide: `docs/warm_memory_guide.html`
|
|
155
|
+
- Benchmark report: `reports/warm_memory_benchmark.md`
|
|
156
|
+
- README visual: `docs/warm_memory_architecture.svg`
|
|
157
|
+
|
|
158
|
+
The HTML guide explains:
|
|
159
|
+
|
|
160
|
+
- how the architecture works,
|
|
161
|
+
- where latency is saved,
|
|
162
|
+
- how to use the package,
|
|
163
|
+
- and how the components fit together.
|
|
164
|
+
|
|
165
|
+
## Architecture Preview
|
|
166
|
+
|
|
167
|
+

|
|
168
|
+
|
|
169
|
+
For a richer visual walkthrough, open `docs/warm_memory_guide.html` locally or publish it with GitHub Pages.
|
|
170
|
+
|
|
171
|
+
## Development
|
|
172
|
+
|
|
173
|
+
Run tests:
|
|
174
|
+
|
|
175
|
+
```bash
|
|
176
|
+
python3 -m unittest discover -s tests -v
|
|
177
|
+
```
|
|
178
|
+
|
|
179
|
+
## LangGraph Integration
|
|
180
|
+
|
|
181
|
+
WarmMemory ships an optional `warm_memory.langgraph` module that plugs directly
|
|
182
|
+
into the LangGraph ecosystem. Install the extra:
|
|
183
|
+
|
|
184
|
+
```bash
|
|
185
|
+
python3 -m pip install -e ".[langgraph]"
|
|
186
|
+
```
|
|
187
|
+
|
|
188
|
+
### Drop-in `BaseStore`
|
|
189
|
+
|
|
190
|
+
`WarmStore` implements LangGraph's `BaseStore` interface with **per-namespace
|
|
191
|
+
warm buffers** — each namespace gets its own bounded buffer, so multi-tenant
|
|
192
|
+
agents don't evict each other's memory.
|
|
193
|
+
|
|
194
|
+
```python
|
|
195
|
+
from warm_memory.langgraph import WarmStore
|
|
196
|
+
|
|
197
|
+
store = WarmStore(capacity=16)
|
|
198
|
+
store.put(("alice",), "preferences", {"text": "wants concise answers"})
|
|
199
|
+
store.put(("alice",), "billing", {"text": "invoice overdue", "topic": "billing"})
|
|
200
|
+
|
|
201
|
+
# query-based recall (keyword scorer by default)
|
|
202
|
+
hits = store.search(("alice",), query="how do I pay my invoice?")
|
|
203
|
+
|
|
204
|
+
# filter operators: $eq, $ne, $gt, $gte, $lt, $lte
|
|
205
|
+
billing = store.search(("alice",), filter={"topic": "billing"})
|
|
206
|
+
```
|
|
207
|
+
|
|
208
|
+
### Bring-your-own embeddings
|
|
209
|
+
|
|
210
|
+
Swap the default keyword scorer for any LangChain `Embeddings`:
|
|
211
|
+
|
|
212
|
+
```python
|
|
213
|
+
from langchain_openai import OpenAIEmbeddings
|
|
214
|
+
from warm_memory.langgraph import EmbeddingsImportanceScorer, WarmStore
|
|
215
|
+
|
|
216
|
+
scorer = EmbeddingsImportanceScorer(OpenAIEmbeddings())
|
|
217
|
+
store = WarmStore(scorer=scorer)
|
|
218
|
+
```
|
|
219
|
+
|
|
220
|
+
Works with any LangChain embeddings provider — OpenAI, HuggingFace, Voyage,
|
|
221
|
+
Anthropic — or `DeterministicFakeEmbedding` for tests.
|
|
222
|
+
|
|
223
|
+
### Pre-built agent
|
|
224
|
+
|
|
225
|
+
`build_warm_memory_agent` returns a compiled LangGraph that reads warm memory
|
|
226
|
+
before responding and writes the new exchange back on the way out:
|
|
227
|
+
|
|
228
|
+
```python
|
|
229
|
+
from warm_memory.langgraph import WarmStore, build_warm_memory_agent
|
|
230
|
+
|
|
231
|
+
store = WarmStore(capacity=8)
|
|
232
|
+
agent = build_warm_memory_agent(model=my_chat_model, store=store)
|
|
233
|
+
agent.invoke({"query": "Where's my invoice?", "namespace": ("alice",)})
|
|
234
|
+
```
|
|
235
|
+
|
|
236
|
+
A runnable example using `FakeListChatModel` (no API keys) lives at
|
|
237
|
+
`examples/langgraph_warm_agent.py`.
|
|
238
|
+
|
|
239
|
+
### Comparative benchmark
|
|
240
|
+
|
|
241
|
+
`scripts/run_langgraph_benchmark.py` compares three retrieval strategies through
|
|
242
|
+
the LangGraph store API:
|
|
243
|
+
|
|
244
|
+
- `full-history`: every prior turn in the prompt (naive baseline)
|
|
245
|
+
- `vector-only`: LangGraph's `InMemoryStore` with an embedding index
|
|
246
|
+
- `warm-fallback`: `WarmStore` in front of the vector store
|
|
247
|
+
|
|
248
|
+
```bash
|
|
249
|
+
python3 scripts/run_langgraph_benchmark.py
|
|
250
|
+
```
|
|
251
|
+
|
|
252
|
+
This writes `reports/warm_memory_langgraph_benchmark.md`. Run it with synthetic
|
|
253
|
+
embeddings by default; set `WARM_BENCH_EMBEDDINGS=openai` (and `OPENAI_API_KEY`)
|
|
254
|
+
to compare against real semantic search.
|
|
255
|
+
|
|
256
|
+
## Roadmap
|
|
257
|
+
|
|
258
|
+
- ~~add an embedding-based or reranker-based importance scorer~~ (done via
|
|
259
|
+
`EmbeddingsImportanceScorer`)
|
|
260
|
+
- ~~compare against vector-store-first baselines~~ (done via
|
|
261
|
+
`warm-fallback` strategy in the LangGraph benchmark)
|
|
262
|
+
- benchmark against real agent traces instead of only synthetic workloads
|
|
263
|
+
- record actual model latency and token usage from a live LLM pipeline
|
|
264
|
+
- add charts and experiment summaries for publication-style reporting
|
|
265
|
+
- TTL support for the LangGraph `BaseStore`
|
|
266
|
+
- publish `warm-memory` to PyPI and propose inclusion in LangGraph's third-party
|
|
267
|
+
store list
|
|
268
|
+
|
|
269
|
+
## License
|
|
270
|
+
|
|
271
|
+
This project is released under the MIT License. See `LICENSE`.
|
|
272
|
+
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=77", "wheel"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "warm-memory"
|
|
7
|
+
version = "0.2.1"
|
|
8
|
+
description = "Capacity-bounded warm memory for LLM agents, with a LangGraph BaseStore implementation, embeddings-based importance scoring, and a comparative benchmark."
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
requires-python = ">=3.11"
|
|
11
|
+
license = "MIT"
|
|
12
|
+
authors = [
|
|
13
|
+
{ name = "Vivek Singh" },
|
|
14
|
+
]
|
|
15
|
+
keywords = [
|
|
16
|
+
"llm",
|
|
17
|
+
"agent",
|
|
18
|
+
"memory",
|
|
19
|
+
"langgraph",
|
|
20
|
+
"langchain",
|
|
21
|
+
"basestore",
|
|
22
|
+
"retrieval",
|
|
23
|
+
"rag",
|
|
24
|
+
"vector-store",
|
|
25
|
+
"short-term-memory",
|
|
26
|
+
"benchmark",
|
|
27
|
+
]
|
|
28
|
+
classifiers = [
|
|
29
|
+
"Development Status :: 4 - Beta",
|
|
30
|
+
"Intended Audience :: Developers",
|
|
31
|
+
"Intended Audience :: Science/Research",
|
|
32
|
+
"Operating System :: OS Independent",
|
|
33
|
+
"Programming Language :: Python :: 3",
|
|
34
|
+
"Programming Language :: Python :: 3.11",
|
|
35
|
+
"Programming Language :: Python :: 3.12",
|
|
36
|
+
"Programming Language :: Python :: 3.13",
|
|
37
|
+
"Topic :: Scientific/Engineering :: Artificial Intelligence",
|
|
38
|
+
"Topic :: Software Development :: Libraries",
|
|
39
|
+
"Typing :: Typed",
|
|
40
|
+
]
|
|
41
|
+
dependencies = [
|
|
42
|
+
"pandas>=2.2.0,<3.0",
|
|
43
|
+
]
|
|
44
|
+
|
|
45
|
+
[project.optional-dependencies]
|
|
46
|
+
langgraph = [
|
|
47
|
+
"langgraph>=1.0,<2.0",
|
|
48
|
+
"langchain-core>=1.0,<2.0",
|
|
49
|
+
]
|
|
50
|
+
dev = [
|
|
51
|
+
"build>=1.2",
|
|
52
|
+
"twine>=5.0",
|
|
53
|
+
]
|
|
54
|
+
|
|
55
|
+
[project.urls]
|
|
56
|
+
Homepage = "https://github.com/vsingh45/WarmMemory"
|
|
57
|
+
Repository = "https://github.com/vsingh45/WarmMemory"
|
|
58
|
+
Issues = "https://github.com/vsingh45/WarmMemory/issues"
|
|
59
|
+
Changelog = "https://github.com/vsingh45/WarmMemory/blob/main/CHANGELOG.md"
|
|
60
|
+
|
|
61
|
+
[tool.setuptools.packages.find]
|
|
62
|
+
include = ["warm_memory*"]
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
import tempfile
|
|
2
|
+
import unittest
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
|
|
5
|
+
from warm_memory.benchmark import BenchmarkConfig, run_benchmark
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class BenchmarkTests(unittest.TestCase):
|
|
9
|
+
def test_benchmark_runs_all_strategies_and_writes_report(self) -> None:
|
|
10
|
+
with tempfile.TemporaryDirectory() as tmpdir:
|
|
11
|
+
report_path = Path(tmpdir) / "report.md"
|
|
12
|
+
results = run_benchmark(
|
|
13
|
+
config=BenchmarkConfig(capacity=6, top_k=4, long_term_limit=6),
|
|
14
|
+
report_path=report_path,
|
|
15
|
+
)
|
|
16
|
+
|
|
17
|
+
self.assertEqual(set(results), {"recency", "relevance", "fallback"})
|
|
18
|
+
self.assertTrue(report_path.exists())
|
|
19
|
+
self.assertIn("WarmMemory Benchmark Report", report_path.read_text(encoding="utf-8"))
|
|
20
|
+
|
|
21
|
+
for result in results.values():
|
|
22
|
+
self.assertGreater(len(result.turn_log), 0)
|
|
23
|
+
self.assertIn("avg_end_to_end_ms", result.summary)
|
|
24
|
+
self.assertIn("answer_accuracy", result.summary)
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
if __name__ == "__main__":
|
|
28
|
+
unittest.main()
|