warm-memory 0.2.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,306 @@
1
+ Metadata-Version: 2.4
2
+ Name: warm-memory
3
+ Version: 0.2.1
4
+ Summary: Capacity-bounded warm memory for LLM agents, with a LangGraph BaseStore implementation, embeddings-based importance scoring, and a comparative benchmark.
5
+ Author: Vivek Singh
6
+ License-Expression: MIT
7
+ Project-URL: Homepage, https://github.com/vsingh45/WarmMemory
8
+ Project-URL: Repository, https://github.com/vsingh45/WarmMemory
9
+ Project-URL: Issues, https://github.com/vsingh45/WarmMemory/issues
10
+ Project-URL: Changelog, https://github.com/vsingh45/WarmMemory/blob/main/CHANGELOG.md
11
+ Keywords: llm,agent,memory,langgraph,langchain,basestore,retrieval,rag,vector-store,short-term-memory,benchmark
12
+ Classifier: Development Status :: 4 - Beta
13
+ Classifier: Intended Audience :: Developers
14
+ Classifier: Intended Audience :: Science/Research
15
+ Classifier: Operating System :: OS Independent
16
+ Classifier: Programming Language :: Python :: 3
17
+ Classifier: Programming Language :: Python :: 3.11
18
+ Classifier: Programming Language :: Python :: 3.12
19
+ Classifier: Programming Language :: Python :: 3.13
20
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
21
+ Classifier: Topic :: Software Development :: Libraries
22
+ Classifier: Typing :: Typed
23
+ Requires-Python: >=3.11
24
+ Description-Content-Type: text/markdown
25
+ License-File: LICENSE
26
+ Requires-Dist: pandas<3.0,>=2.2.0
27
+ Provides-Extra: langgraph
28
+ Requires-Dist: langgraph<2.0,>=1.0; extra == "langgraph"
29
+ Requires-Dist: langchain-core<2.0,>=1.0; extra == "langgraph"
30
+ Provides-Extra: dev
31
+ Requires-Dist: build>=1.2; extra == "dev"
32
+ Requires-Dist: twine>=5.0; extra == "dev"
33
+ Dynamic: license-file
34
+
35
+ # WarmMemory
36
+
37
+ [![CI](https://github.com/vsingh45/WarmMemory/actions/workflows/ci.yml/badge.svg)](https://github.com/vsingh45/WarmMemory/actions/workflows/ci.yml)
38
+ [![Python](https://img.shields.io/badge/python-3.11%20%7C%203.12%20%7C%203.13-blue)](https://www.python.org/)
39
+ [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
40
+ [![LangGraph](https://img.shields.io/badge/LangGraph-BaseStore-1f6feb)](https://langchain-ai.github.io/langgraph/)
41
+
42
+ WarmMemory is a Python package for short-term memory management in LLM agents.
43
+ It adds a small in-process working-memory layer that keeps the most recent or most
44
+ relevant interactions close to the agent, reducing repeated retrieval work and
45
+ helping control prompt growth.
46
+
47
+ The repository provides:
48
+
49
+ - a reusable Python package for warm-memory buffering,
50
+ - a decorator for automatic interaction capture,
51
+ - a pluggable importance scoring interface,
52
+ - a deterministic benchmark for recency vs relevance vs fallback memory policies,
53
+ - a LangGraph `BaseStore` integration with per-namespace eviction, embeddings-based
54
+ ranking, and a pre-built agent,
55
+ - HTML documentation for architecture and usage.
56
+
57
+ ## Why This Exists
58
+
59
+ Many agent systems use one of two expensive patterns:
60
+
61
+ - they keep appending conversation history to the prompt,
62
+ - or they query long-term memory on nearly every turn.
63
+
64
+ Both increase latency and cost. WarmMemory introduces a hot path:
65
+
66
+ - keep a small working set in RAM,
67
+ - retrieve from that working set first,
68
+ - fall back to longer-term retrieval only when needed,
69
+ - and send only a compact context window to the model.
70
+
71
+ ## Core Ideas
72
+
73
+ ### 1. Sliding-Window Memory
74
+
75
+ The system can keep the last `N` interactions using `recent(k)`.
76
+
77
+ ### 2. Relevance-Aware Memory
78
+
79
+ Instead of only keeping the latest messages, the system can rank rows against the
80
+ current query using `relevant(query, k)` and compact the active working set with
81
+ `retain_relevant(query, k)`.
82
+
83
+ ### 3. Automatic Agent Capture
84
+
85
+ The `@remember_interaction` decorator records agent inputs and outputs without forcing
86
+ changes into the core agent logic.
87
+
88
+ ### 4. Two-Tier Memory Architecture
89
+
90
+ The benchmark models a practical split:
91
+
92
+ - warm memory for fast in-process access,
93
+ - long-term memory for slower fallback retrieval.
94
+
95
+ ## Repository Layout
96
+
97
+ - `warm_memory/`: package source code
98
+ - `warm_memory/buffer.py`: Pandas-backed warm-memory store
99
+ - `warm_memory/scoring.py`: scoring interface and default heuristic scorer
100
+ - `warm_memory/decorators.py`: function decorator for interaction capture
101
+ - `warm_memory/benchmark.py`: deterministic benchmark harness
102
+ - `warm_memory/workload.py`: synthetic workload for evaluation
103
+ - `warm_memory/langgraph/`: LangGraph integration (optional extra)
104
+ - `store.py`: `WarmStore(BaseStore)` with per-namespace eviction
105
+ - `embeddings.py`: bring-your-own embeddings scorer
106
+ - `agent.py`: pre-built `build_warm_memory_agent` graph
107
+ - `benchmark.py`: full-history vs vector-only vs warm-fallback benchmark
108
+ - `examples/langgraph_warm_agent.py`: runnable LangGraph agent example
109
+ - `scripts/run_benchmark.py`: legacy benchmark entrypoint
110
+ - `scripts/run_langgraph_benchmark.py`: LangGraph-based benchmark entrypoint
111
+ - `reports/warm_memory_benchmark.md`: legacy benchmark output
112
+ - `reports/warm_memory_langgraph_benchmark.md`: LangGraph benchmark output
113
+ - `docs/warm_memory_guide.html`: public-facing HTML documentation
114
+ - `tests/`: unit tests
115
+
116
+ ## Installation
117
+
118
+ ```bash
119
+ python3 -m pip install -e .
120
+ ```
121
+
122
+ ## Quick Start
123
+
124
+ ```python
125
+ from warm_memory import WarmMemoryBuffer, remember_interaction
126
+
127
+ memory = WarmMemoryBuffer(capacity=8)
128
+
129
+ @remember_interaction(memory)
130
+ def agent(prompt: str) -> str:
131
+ if "billing" in prompt.lower():
132
+ return "Your invoice is available in the billing portal."
133
+ return f"Echo: {prompt}"
134
+
135
+ agent("How do I reset my password?")
136
+ agent("Where is my billing invoice?")
137
+
138
+ recent_rows = memory.recent(4)
139
+ relevant_rows = memory.relevant("invoice", limit=2)
140
+ memory.retain_relevant("invoice", limit=4)
141
+ ```
142
+
143
+ ## Example Usage Pattern
144
+
145
+ Use WarmMemory in front of a larger memory system:
146
+
147
+ 1. Receive a new user query.
148
+ 2. Search the warm buffer first.
149
+ 3. If warm memory is sufficient, build a compact prompt from those rows.
150
+ 4. If warm memory is weak, fall back to long-term retrieval.
151
+ 5. Write the new interaction back into warm memory.
152
+
153
+ This pattern is useful for:
154
+
155
+ - coding agents,
156
+ - research assistants,
157
+ - task-oriented copilots,
158
+ - customer support agents,
159
+ - and any multi-turn system with repeated local context.
160
+
161
+ ## Benchmark
162
+
163
+ The repository includes a deterministic benchmark that compares:
164
+
165
+ - `recency`: always use the latest warm-memory rows,
166
+ - `relevance`: rank and retain the top relevant warm-memory rows,
167
+ - `fallback`: use warm relevance first, then long-term retrieval on misses.
168
+
169
+ Run it with:
170
+
171
+ ```bash
172
+ python3 scripts/run_benchmark.py
173
+ ```
174
+
175
+ This writes a report to `reports/warm_memory_benchmark.md`.
176
+
177
+ On the current synthetic workload, the tradeoff looks like this:
178
+
179
+ - `recency` is the fastest policy,
180
+ - `fallback` is the most accurate policy,
181
+ - `relevance` sits between the two and provides a cleaner hot working set.
182
+
183
+ The benchmark is designed to surface that tradeoff rather than name a single
184
+ winner: each policy occupies a different point on the latency-accuracy curve.
185
+
186
+ ## Documentation
187
+
188
+ - HTML guide: `docs/warm_memory_guide.html`
189
+ - Benchmark report: `reports/warm_memory_benchmark.md`
190
+ - README visual: `docs/warm_memory_architecture.svg`
191
+
192
+ The HTML guide explains:
193
+
194
+ - how the architecture works,
195
+ - where latency is saved,
196
+ - how to use the package,
197
+ - and how the components fit together.
198
+
199
+ ## Architecture Preview
200
+
201
+ ![WarmMemory Architecture](docs/warm_memory_architecture.svg)
202
+
203
+ For a richer visual walkthrough, open `docs/warm_memory_guide.html` locally or publish it with GitHub Pages.
204
+
205
+ ## Development
206
+
207
+ Run tests:
208
+
209
+ ```bash
210
+ python3 -m unittest discover -s tests -v
211
+ ```
212
+
213
+ ## LangGraph Integration
214
+
215
+ WarmMemory ships an optional `warm_memory.langgraph` module that plugs directly
216
+ into the LangGraph ecosystem. Install the extra:
217
+
218
+ ```bash
219
+ python3 -m pip install -e ".[langgraph]"
220
+ ```
221
+
222
+ ### Drop-in `BaseStore`
223
+
224
+ `WarmStore` implements LangGraph's `BaseStore` interface with **per-namespace
225
+ warm buffers** — each namespace gets its own bounded buffer, so multi-tenant
226
+ agents don't evict each other's memory.
227
+
228
+ ```python
229
+ from warm_memory.langgraph import WarmStore
230
+
231
+ store = WarmStore(capacity=16)
232
+ store.put(("alice",), "preferences", {"text": "wants concise answers"})
233
+ store.put(("alice",), "billing", {"text": "invoice overdue", "topic": "billing"})
234
+
235
+ # query-based recall (keyword scorer by default)
236
+ hits = store.search(("alice",), query="how do I pay my invoice?")
237
+
238
+ # filter operators: $eq, $ne, $gt, $gte, $lt, $lte
239
+ billing = store.search(("alice",), filter={"topic": "billing"})
240
+ ```
241
+
242
+ ### Bring-your-own embeddings
243
+
244
+ Swap the default keyword scorer for any LangChain `Embeddings`:
245
+
246
+ ```python
247
+ from langchain_openai import OpenAIEmbeddings
248
+ from warm_memory.langgraph import EmbeddingsImportanceScorer, WarmStore
249
+
250
+ scorer = EmbeddingsImportanceScorer(OpenAIEmbeddings())
251
+ store = WarmStore(scorer=scorer)
252
+ ```
253
+
254
+ Works with any LangChain embeddings provider — OpenAI, HuggingFace, Voyage,
255
+ Anthropic — or `DeterministicFakeEmbedding` for tests.
256
+
257
+ ### Pre-built agent
258
+
259
+ `build_warm_memory_agent` returns a compiled LangGraph that reads warm memory
260
+ before responding and writes the new exchange back on the way out:
261
+
262
+ ```python
263
+ from warm_memory.langgraph import WarmStore, build_warm_memory_agent
264
+
265
+ store = WarmStore(capacity=8)
266
+ agent = build_warm_memory_agent(model=my_chat_model, store=store)
267
+ agent.invoke({"query": "Where's my invoice?", "namespace": ("alice",)})
268
+ ```
269
+
270
+ A runnable example using `FakeListChatModel` (no API keys) lives at
271
+ `examples/langgraph_warm_agent.py`.
272
+
273
+ ### Comparative benchmark
274
+
275
+ `scripts/run_langgraph_benchmark.py` compares three retrieval strategies through
276
+ the LangGraph store API:
277
+
278
+ - `full-history`: every prior turn in the prompt (naive baseline)
279
+ - `vector-only`: LangGraph's `InMemoryStore` with an embedding index
280
+ - `warm-fallback`: `WarmStore` in front of the vector store
281
+
282
+ ```bash
283
+ python3 scripts/run_langgraph_benchmark.py
284
+ ```
285
+
286
+ This writes `reports/warm_memory_langgraph_benchmark.md`. Run it with synthetic
287
+ embeddings by default; set `WARM_BENCH_EMBEDDINGS=openai` (and `OPENAI_API_KEY`)
288
+ to compare against real semantic search.
289
+
290
+ ## Roadmap
291
+
292
+ - ~~add an embedding-based or reranker-based importance scorer~~ (done via
293
+ `EmbeddingsImportanceScorer`)
294
+ - ~~compare against vector-store-first baselines~~ (done via
295
+ `warm-fallback` strategy in the LangGraph benchmark)
296
+ - benchmark against real agent traces instead of only synthetic workloads
297
+ - record actual model latency and token usage from a live LLM pipeline
298
+ - add charts and experiment summaries for publication-style reporting
299
+ - TTL support for the LangGraph `BaseStore`
300
+ - publish `warm-memory` to PyPI and propose inclusion in LangGraph's third-party
301
+ store list
302
+
303
+ ## License
304
+
305
+ This project is released under the MIT License. See `LICENSE`.
306
+
@@ -0,0 +1,272 @@
1
+ # WarmMemory
2
+
3
+ [![CI](https://github.com/vsingh45/WarmMemory/actions/workflows/ci.yml/badge.svg)](https://github.com/vsingh45/WarmMemory/actions/workflows/ci.yml)
4
+ [![Python](https://img.shields.io/badge/python-3.11%20%7C%203.12%20%7C%203.13-blue)](https://www.python.org/)
5
+ [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
6
+ [![LangGraph](https://img.shields.io/badge/LangGraph-BaseStore-1f6feb)](https://langchain-ai.github.io/langgraph/)
7
+
8
+ WarmMemory is a Python package for short-term memory management in LLM agents.
9
+ It adds a small in-process working-memory layer that keeps the most recent or most
10
+ relevant interactions close to the agent, reducing repeated retrieval work and
11
+ helping control prompt growth.
12
+
13
+ The repository provides:
14
+
15
+ - a reusable Python package for warm-memory buffering,
16
+ - a decorator for automatic interaction capture,
17
+ - a pluggable importance scoring interface,
18
+ - a deterministic benchmark for recency vs relevance vs fallback memory policies,
19
+ - a LangGraph `BaseStore` integration with per-namespace eviction, embeddings-based
20
+ ranking, and a pre-built agent,
21
+ - HTML documentation for architecture and usage.
22
+
23
+ ## Why This Exists
24
+
25
+ Many agent systems use one of two expensive patterns:
26
+
27
+ - they keep appending conversation history to the prompt,
28
+ - or they query long-term memory on nearly every turn.
29
+
30
+ Both increase latency and cost. WarmMemory introduces a hot path:
31
+
32
+ - keep a small working set in RAM,
33
+ - retrieve from that working set first,
34
+ - fall back to longer-term retrieval only when needed,
35
+ - and send only a compact context window to the model.
36
+
37
+ ## Core Ideas
38
+
39
+ ### 1. Sliding-Window Memory
40
+
41
+ The system can keep the last `N` interactions using `recent(k)`.
42
+
43
+ ### 2. Relevance-Aware Memory
44
+
45
+ Instead of only keeping the latest messages, the system can rank rows against the
46
+ current query using `relevant(query, k)` and compact the active working set with
47
+ `retain_relevant(query, k)`.
48
+
49
+ ### 3. Automatic Agent Capture
50
+
51
+ The `@remember_interaction` decorator records agent inputs and outputs without forcing
52
+ changes into the core agent logic.
53
+
54
+ ### 4. Two-Tier Memory Architecture
55
+
56
+ The benchmark models a practical split:
57
+
58
+ - warm memory for fast in-process access,
59
+ - long-term memory for slower fallback retrieval.
60
+
61
+ ## Repository Layout
62
+
63
+ - `warm_memory/`: package source code
64
+ - `warm_memory/buffer.py`: Pandas-backed warm-memory store
65
+ - `warm_memory/scoring.py`: scoring interface and default heuristic scorer
66
+ - `warm_memory/decorators.py`: function decorator for interaction capture
67
+ - `warm_memory/benchmark.py`: deterministic benchmark harness
68
+ - `warm_memory/workload.py`: synthetic workload for evaluation
69
+ - `warm_memory/langgraph/`: LangGraph integration (optional extra)
70
+ - `store.py`: `WarmStore(BaseStore)` with per-namespace eviction
71
+ - `embeddings.py`: bring-your-own embeddings scorer
72
+ - `agent.py`: pre-built `build_warm_memory_agent` graph
73
+ - `benchmark.py`: full-history vs vector-only vs warm-fallback benchmark
74
+ - `examples/langgraph_warm_agent.py`: runnable LangGraph agent example
75
+ - `scripts/run_benchmark.py`: legacy benchmark entrypoint
76
+ - `scripts/run_langgraph_benchmark.py`: LangGraph-based benchmark entrypoint
77
+ - `reports/warm_memory_benchmark.md`: legacy benchmark output
78
+ - `reports/warm_memory_langgraph_benchmark.md`: LangGraph benchmark output
79
+ - `docs/warm_memory_guide.html`: public-facing HTML documentation
80
+ - `tests/`: unit tests
81
+
82
+ ## Installation
83
+
84
+ ```bash
85
+ python3 -m pip install -e .
86
+ ```
87
+
88
+ ## Quick Start
89
+
90
+ ```python
91
+ from warm_memory import WarmMemoryBuffer, remember_interaction
92
+
93
+ memory = WarmMemoryBuffer(capacity=8)
94
+
95
+ @remember_interaction(memory)
96
+ def agent(prompt: str) -> str:
97
+ if "billing" in prompt.lower():
98
+ return "Your invoice is available in the billing portal."
99
+ return f"Echo: {prompt}"
100
+
101
+ agent("How do I reset my password?")
102
+ agent("Where is my billing invoice?")
103
+
104
+ recent_rows = memory.recent(4)
105
+ relevant_rows = memory.relevant("invoice", limit=2)
106
+ memory.retain_relevant("invoice", limit=4)
107
+ ```
108
+
109
+ ## Example Usage Pattern
110
+
111
+ Use WarmMemory in front of a larger memory system:
112
+
113
+ 1. Receive a new user query.
114
+ 2. Search the warm buffer first.
115
+ 3. If warm memory is sufficient, build a compact prompt from those rows.
116
+ 4. If warm memory is weak, fall back to long-term retrieval.
117
+ 5. Write the new interaction back into warm memory.
118
+
119
+ This pattern is useful for:
120
+
121
+ - coding agents,
122
+ - research assistants,
123
+ - task-oriented copilots,
124
+ - customer support agents,
125
+ - and any multi-turn system with repeated local context.
126
+
127
+ ## Benchmark
128
+
129
+ The repository includes a deterministic benchmark that compares:
130
+
131
+ - `recency`: always use the latest warm-memory rows,
132
+ - `relevance`: rank and retain the top relevant warm-memory rows,
133
+ - `fallback`: use warm relevance first, then long-term retrieval on misses.
134
+
135
+ Run it with:
136
+
137
+ ```bash
138
+ python3 scripts/run_benchmark.py
139
+ ```
140
+
141
+ This writes a report to `reports/warm_memory_benchmark.md`.
142
+
143
+ On the current synthetic workload, the tradeoff looks like this:
144
+
145
+ - `recency` is the fastest policy,
146
+ - `fallback` is the most accurate policy,
147
+ - `relevance` sits between the two and provides a cleaner hot working set.
148
+
149
+ The benchmark is designed to surface that tradeoff rather than name a single
150
+ winner: each policy occupies a different point on the latency-accuracy curve.
151
+
152
+ ## Documentation
153
+
154
+ - HTML guide: `docs/warm_memory_guide.html`
155
+ - Benchmark report: `reports/warm_memory_benchmark.md`
156
+ - README visual: `docs/warm_memory_architecture.svg`
157
+
158
+ The HTML guide explains:
159
+
160
+ - how the architecture works,
161
+ - where latency is saved,
162
+ - how to use the package,
163
+ - and how the components fit together.
164
+
165
+ ## Architecture Preview
166
+
167
+ ![WarmMemory Architecture](docs/warm_memory_architecture.svg)
168
+
169
+ For a richer visual walkthrough, open `docs/warm_memory_guide.html` locally or publish it with GitHub Pages.
170
+
171
+ ## Development
172
+
173
+ Run tests:
174
+
175
+ ```bash
176
+ python3 -m unittest discover -s tests -v
177
+ ```
178
+
179
+ ## LangGraph Integration
180
+
181
+ WarmMemory ships an optional `warm_memory.langgraph` module that plugs directly
182
+ into the LangGraph ecosystem. Install the extra:
183
+
184
+ ```bash
185
+ python3 -m pip install -e ".[langgraph]"
186
+ ```
187
+
188
+ ### Drop-in `BaseStore`
189
+
190
+ `WarmStore` implements LangGraph's `BaseStore` interface with **per-namespace
191
+ warm buffers** — each namespace gets its own bounded buffer, so multi-tenant
192
+ agents don't evict each other's memory.
193
+
194
+ ```python
195
+ from warm_memory.langgraph import WarmStore
196
+
197
+ store = WarmStore(capacity=16)
198
+ store.put(("alice",), "preferences", {"text": "wants concise answers"})
199
+ store.put(("alice",), "billing", {"text": "invoice overdue", "topic": "billing"})
200
+
201
+ # query-based recall (keyword scorer by default)
202
+ hits = store.search(("alice",), query="how do I pay my invoice?")
203
+
204
+ # filter operators: $eq, $ne, $gt, $gte, $lt, $lte
205
+ billing = store.search(("alice",), filter={"topic": "billing"})
206
+ ```
207
+
208
+ ### Bring-your-own embeddings
209
+
210
+ Swap the default keyword scorer for any LangChain `Embeddings`:
211
+
212
+ ```python
213
+ from langchain_openai import OpenAIEmbeddings
214
+ from warm_memory.langgraph import EmbeddingsImportanceScorer, WarmStore
215
+
216
+ scorer = EmbeddingsImportanceScorer(OpenAIEmbeddings())
217
+ store = WarmStore(scorer=scorer)
218
+ ```
219
+
220
+ Works with any LangChain embeddings provider — OpenAI, HuggingFace, Voyage,
221
+ Anthropic — or `DeterministicFakeEmbedding` for tests.
222
+
223
+ ### Pre-built agent
224
+
225
+ `build_warm_memory_agent` returns a compiled LangGraph that reads warm memory
226
+ before responding and writes the new exchange back on the way out:
227
+
228
+ ```python
229
+ from warm_memory.langgraph import WarmStore, build_warm_memory_agent
230
+
231
+ store = WarmStore(capacity=8)
232
+ agent = build_warm_memory_agent(model=my_chat_model, store=store)
233
+ agent.invoke({"query": "Where's my invoice?", "namespace": ("alice",)})
234
+ ```
235
+
236
+ A runnable example using `FakeListChatModel` (no API keys) lives at
237
+ `examples/langgraph_warm_agent.py`.
238
+
239
+ ### Comparative benchmark
240
+
241
+ `scripts/run_langgraph_benchmark.py` compares three retrieval strategies through
242
+ the LangGraph store API:
243
+
244
+ - `full-history`: every prior turn in the prompt (naive baseline)
245
+ - `vector-only`: LangGraph's `InMemoryStore` with an embedding index
246
+ - `warm-fallback`: `WarmStore` in front of the vector store
247
+
248
+ ```bash
249
+ python3 scripts/run_langgraph_benchmark.py
250
+ ```
251
+
252
+ This writes `reports/warm_memory_langgraph_benchmark.md`. Run it with synthetic
253
+ embeddings by default; set `WARM_BENCH_EMBEDDINGS=openai` (and `OPENAI_API_KEY`)
254
+ to compare against real semantic search.
255
+
256
+ ## Roadmap
257
+
258
+ - ~~add an embedding-based or reranker-based importance scorer~~ (done via
259
+ `EmbeddingsImportanceScorer`)
260
+ - ~~compare against vector-store-first baselines~~ (done via
261
+ `warm-fallback` strategy in the LangGraph benchmark)
262
+ - benchmark against real agent traces instead of only synthetic workloads
263
+ - record actual model latency and token usage from a live LLM pipeline
264
+ - add charts and experiment summaries for publication-style reporting
265
+ - TTL support for the LangGraph `BaseStore`
266
+ - publish `warm-memory` to PyPI and propose inclusion in LangGraph's third-party
267
+ store list
268
+
269
+ ## License
270
+
271
+ This project is released under the MIT License. See `LICENSE`.
272
+
@@ -0,0 +1,62 @@
1
+ [build-system]
2
+ requires = ["setuptools>=77", "wheel"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "warm-memory"
7
+ version = "0.2.1"
8
+ description = "Capacity-bounded warm memory for LLM agents, with a LangGraph BaseStore implementation, embeddings-based importance scoring, and a comparative benchmark."
9
+ readme = "README.md"
10
+ requires-python = ">=3.11"
11
+ license = "MIT"
12
+ authors = [
13
+ { name = "Vivek Singh" },
14
+ ]
15
+ keywords = [
16
+ "llm",
17
+ "agent",
18
+ "memory",
19
+ "langgraph",
20
+ "langchain",
21
+ "basestore",
22
+ "retrieval",
23
+ "rag",
24
+ "vector-store",
25
+ "short-term-memory",
26
+ "benchmark",
27
+ ]
28
+ classifiers = [
29
+ "Development Status :: 4 - Beta",
30
+ "Intended Audience :: Developers",
31
+ "Intended Audience :: Science/Research",
32
+ "Operating System :: OS Independent",
33
+ "Programming Language :: Python :: 3",
34
+ "Programming Language :: Python :: 3.11",
35
+ "Programming Language :: Python :: 3.12",
36
+ "Programming Language :: Python :: 3.13",
37
+ "Topic :: Scientific/Engineering :: Artificial Intelligence",
38
+ "Topic :: Software Development :: Libraries",
39
+ "Typing :: Typed",
40
+ ]
41
+ dependencies = [
42
+ "pandas>=2.2.0,<3.0",
43
+ ]
44
+
45
+ [project.optional-dependencies]
46
+ langgraph = [
47
+ "langgraph>=1.0,<2.0",
48
+ "langchain-core>=1.0,<2.0",
49
+ ]
50
+ dev = [
51
+ "build>=1.2",
52
+ "twine>=5.0",
53
+ ]
54
+
55
+ [project.urls]
56
+ Homepage = "https://github.com/vsingh45/WarmMemory"
57
+ Repository = "https://github.com/vsingh45/WarmMemory"
58
+ Issues = "https://github.com/vsingh45/WarmMemory/issues"
59
+ Changelog = "https://github.com/vsingh45/WarmMemory/blob/main/CHANGELOG.md"
60
+
61
+ [tool.setuptools.packages.find]
62
+ include = ["warm_memory*"]
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,28 @@
1
+ import tempfile
2
+ import unittest
3
+ from pathlib import Path
4
+
5
+ from warm_memory.benchmark import BenchmarkConfig, run_benchmark
6
+
7
+
8
+ class BenchmarkTests(unittest.TestCase):
9
+ def test_benchmark_runs_all_strategies_and_writes_report(self) -> None:
10
+ with tempfile.TemporaryDirectory() as tmpdir:
11
+ report_path = Path(tmpdir) / "report.md"
12
+ results = run_benchmark(
13
+ config=BenchmarkConfig(capacity=6, top_k=4, long_term_limit=6),
14
+ report_path=report_path,
15
+ )
16
+
17
+ self.assertEqual(set(results), {"recency", "relevance", "fallback"})
18
+ self.assertTrue(report_path.exists())
19
+ self.assertIn("WarmMemory Benchmark Report", report_path.read_text(encoding="utf-8"))
20
+
21
+ for result in results.values():
22
+ self.assertGreater(len(result.turn_log), 0)
23
+ self.assertIn("avg_end_to_end_ms", result.summary)
24
+ self.assertIn("answer_accuracy", result.summary)
25
+
26
+
27
+ if __name__ == "__main__":
28
+ unittest.main()