wavemind 2.0.0__tar.gz → 2.0.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. wavemind-2.0.2/PKG-INFO +242 -0
  2. wavemind-2.0.2/README.md +211 -0
  3. {wavemind-2.0.0 → wavemind-2.0.2}/pyproject.toml +7 -1
  4. wavemind-2.0.2/tests/test_agent_memory_benchmark.py +74 -0
  5. wavemind-2.0.2/tests/test_langchain_integration.py +86 -0
  6. {wavemind-2.0.0 → wavemind-2.0.2}/tests/test_packaging_files.py +14 -0
  7. wavemind-2.0.2/wavemind/integrations/__init__.py +2 -0
  8. wavemind-2.0.2/wavemind/integrations/langchain.py +146 -0
  9. wavemind-2.0.2/wavemind.egg-info/PKG-INFO +242 -0
  10. {wavemind-2.0.0 → wavemind-2.0.2}/wavemind.egg-info/SOURCES.txt +5 -1
  11. {wavemind-2.0.0 → wavemind-2.0.2}/wavemind.egg-info/requires.txt +6 -0
  12. wavemind-2.0.0/PKG-INFO +0 -134
  13. wavemind-2.0.0/README.md +0 -107
  14. wavemind-2.0.0/wavemind.egg-info/PKG-INFO +0 -134
  15. {wavemind-2.0.0 → wavemind-2.0.2}/LICENSE +0 -0
  16. {wavemind-2.0.0 → wavemind-2.0.2}/setup.cfg +0 -0
  17. {wavemind-2.0.0 → wavemind-2.0.2}/tests/test_api.py +0 -0
  18. {wavemind-2.0.0 → wavemind-2.0.2}/tests/test_api_process_persistence.py +0 -0
  19. {wavemind-2.0.0 → wavemind-2.0.2}/tests/test_cli_smoke.py +0 -0
  20. {wavemind-2.0.0 → wavemind-2.0.2}/tests/test_core_persistence.py +0 -0
  21. {wavemind-2.0.0 → wavemind-2.0.2}/tests/test_examples.py +0 -0
  22. {wavemind-2.0.0 → wavemind-2.0.2}/tests/test_import_benchmark.py +0 -0
  23. {wavemind-2.0.0 → wavemind-2.0.2}/tests/test_indexes_encoders.py +0 -0
  24. {wavemind-2.0.0 → wavemind-2.0.2}/tests/test_semantic_and_latency.py +0 -0
  25. {wavemind-2.0.0 → wavemind-2.0.2}/wavemind/__init__.py +0 -0
  26. {wavemind-2.0.0 → wavemind-2.0.2}/wavemind/__main__.py +0 -0
  27. {wavemind-2.0.0 → wavemind-2.0.2}/wavemind/api.py +0 -0
  28. {wavemind-2.0.0 → wavemind-2.0.2}/wavemind/benchmark.py +0 -0
  29. {wavemind-2.0.0 → wavemind-2.0.2}/wavemind/cli.py +0 -0
  30. {wavemind-2.0.0 → wavemind-2.0.2}/wavemind/core.py +0 -0
  31. {wavemind-2.0.0 → wavemind-2.0.2}/wavemind/encoders.py +0 -0
  32. {wavemind-2.0.0 → wavemind-2.0.2}/wavemind/importers.py +0 -0
  33. {wavemind-2.0.0 → wavemind-2.0.2}/wavemind/indexes.py +0 -0
  34. {wavemind-2.0.0 → wavemind-2.0.2}/wavemind/storage.py +0 -0
  35. {wavemind-2.0.0 → wavemind-2.0.2}/wavemind.egg-info/dependency_links.txt +0 -0
  36. {wavemind-2.0.0 → wavemind-2.0.2}/wavemind.egg-info/entry_points.txt +0 -0
  37. {wavemind-2.0.0 → wavemind-2.0.2}/wavemind.egg-info/top_level.txt +0 -0
@@ -0,0 +1,242 @@
1
+ Metadata-Version: 2.4
2
+ Name: wavemind
3
+ Version: 2.0.2
4
+ Summary: Persistent dynamic memory engine with vector search and wave-field re-ranking
5
+ License-Expression: MIT
6
+ Project-URL: Homepage, https://github.com/CaspianG/wavemind
7
+ Project-URL: Repository, https://github.com/CaspianG/wavemind
8
+ Requires-Python: >=3.10
9
+ Description-Content-Type: text/markdown
10
+ License-File: LICENSE
11
+ Requires-Dist: numpy>=1.24
12
+ Requires-Dist: fastapi>=0.110
13
+ Requires-Dist: uvicorn[standard]>=0.27
14
+ Requires-Dist: pydantic>=2
15
+ Requires-Dist: pypdf>=4
16
+ Provides-Extra: sentence
17
+ Requires-Dist: sentence-transformers>=3; extra == "sentence"
18
+ Provides-Extra: ml
19
+ Requires-Dist: sentence-transformers>=3; extra == "ml"
20
+ Provides-Extra: indexes
21
+ Requires-Dist: annoy>=1.17; extra == "indexes"
22
+ Requires-Dist: faiss-cpu>=1.8; platform_system != "Windows" and extra == "indexes"
23
+ Provides-Extra: bench
24
+ Requires-Dist: chromadb>=1.0; extra == "bench"
25
+ Provides-Extra: langchain
26
+ Requires-Dist: langchain-classic>=1.0; extra == "langchain"
27
+ Provides-Extra: dev
28
+ Requires-Dist: pytest>=8; extra == "dev"
29
+ Requires-Dist: httpx>=0.27; extra == "dev"
30
+ Dynamic: license-file
31
+
32
+ # WaveMind is persistent dynamic memory for AI agents: vector search first, wave-field priority second, SQLite as the source of truth.
33
+
34
+ ![Python](https://img.shields.io/badge/python-3.10%2B-blue)
35
+ [![Tests](https://github.com/CaspianG/wavemind/actions/workflows/tests.yml/badge.svg)](https://github.com/CaspianG/wavemind/actions/workflows/tests.yml)
36
+ ![License](https://img.shields.io/badge/license-MIT-green)
37
+
38
+ ## Terminal Demo
39
+
40
+ From a cloned repository:
41
+
42
+ ```text
43
+ $ python examples/demo.py
44
+ ✓ Remembered: "Andrey is a trader who tracks market breakouts."
45
+ ✓ Remembered: "Andrey prefers short practical answers about AI agents."
46
+
47
+ Query: "Andrey trader agent"
48
+ → Result 1 (0.54): "Andrey is a trader who tracks market breakouts."
49
+ → Result 2 (0.30): "Andrey prefers short practical answers about AI agents."
50
+ ```
51
+
52
+ The demo is offline, keyless, and uses the built-in hash encoder.
53
+
54
+ ## Quick Start
55
+
56
+ Install from PyPI and create your first local memory:
57
+
58
+ ```sh
59
+ python -m pip install wavemind
60
+ wavemind remember "Andrey is a trader" --namespace demo
61
+ wavemind query "trader" --namespace demo
62
+ ```
63
+
64
+ What happens here:
65
+
66
+ - `remember` writes the text and its vector pattern into a local SQLite database.
67
+ - By default, the database file is `wavemind.sqlite3` in your current working directory.
68
+ - `--namespace demo` keeps this memory separate from other users, agents, or projects.
69
+ - `query` reads from the same SQLite file and returns the closest remembered texts.
70
+
71
+ ## Optional Embeddings
72
+
73
+ For sentence-transformer embeddings:
74
+
75
+ ```sh
76
+ python -m pip install "wavemind[sentence]"
77
+ wavemind --encoder sentence remember "Andrey is a trader" --namespace demo
78
+ wavemind --encoder sentence query "What does Andrey do?" --namespace demo
79
+ ```
80
+
81
+ ## Data Location
82
+
83
+ For an explicit database path, put global options before the command:
84
+
85
+ ```sh
86
+ wavemind --db ./agent_memory.sqlite3 remember "Andrey is a trader" --namespace demo
87
+ wavemind --db ./agent_memory.sqlite3 query "trader" --namespace demo
88
+ ```
89
+
90
+ ## Install From Source
91
+
92
+ For contributors installing from a local clone:
93
+
94
+ ```sh
95
+ git clone https://github.com/CaspianG/wavemind.git
96
+ cd wavemind
97
+ python -m pip install -e ".[sentence]"
98
+ ```
99
+
100
+ One-file setup scripts are also included in the repository:
101
+
102
+ ```sh
103
+ sh install.sh
104
+ ```
105
+
106
+ ```bat
107
+ install.bat
108
+ ```
109
+
110
+ ## LangChain Memory
111
+
112
+ Install the optional integration:
113
+
114
+ ```sh
115
+ pip install "wavemind[langchain]"
116
+ ```
117
+
118
+ Use WaveMind as a drop-in LangChain memory object:
119
+
120
+ ```python
121
+ from wavemind.integrations.langchain import WaveMindMemory
122
+
123
+ memory = WaveMindMemory(db_path="agent_memory.sqlite3")
124
+ # Replace: memory = ConversationBufferMemory()
125
+ ```
126
+
127
+ Offline runnable example from a cloned repository:
128
+
129
+ ```sh
130
+ python examples/langchain_memory.py
131
+ ```
132
+
133
+ ## Why Dynamic Memory
134
+
135
+ WaveMind is not positioned as "a faster Chroma." Chroma, Qdrant, Pinecone, and Weaviate are vector databases: they store embeddings and return nearest neighbors. That is the right tool for many static RAG workloads.
136
+
137
+ WaveMind is an agent memory layer. It still uses vector search first, but then applies memory-specific signals that a plain vector store does not model by default:
138
+
139
+ | memory behavior | Why it matters for agents | WaveMind mechanism |
140
+ |---|---|---|
141
+ | Hot memories | Facts recalled repeatedly should become easier to recall again. | Wave-field hotness and priority updates. |
142
+ | Aging memories | Old low-value facts should fade instead of competing forever. | TTL and decay-aware scoring. |
143
+ | Scoped memory | One user, agent, workspace, or project should not leak into another. | Namespaces and tags. |
144
+ | Explicit forgetting | Agents need deletion, privacy cleanup, and correction workflows. | `forget()` plus SQLite persistence. |
145
+ | Stable restart behavior | A memory system must survive process restarts. | SQLite source of truth, reloadable indexes. |
146
+ | Vector plus memory rank | Semantic similarity is necessary but not sufficient for long-running agents. | k-NN candidates first, wave field as re-ranker. |
147
+
148
+ The current Chroma benchmark below is intentionally conservative: it compares static retrieval on the same facts and the same hash embeddings. That benchmark is useful, but it does not exercise WaveMind's main product thesis: memory that changes over time as an agent recalls, reinforces, ages, and forgets information.
149
+
150
+ The benchmark that should decide whether WaveMind is worth using is a dynamic agent-memory benchmark:
151
+
152
+ | scenario | What should happen |
153
+ |---|---|
154
+ | A user repeats a preference many times. | WaveMind should rank it higher than equally similar but unused facts. |
155
+ | A fact expires via TTL. | WaveMind should suppress it without requiring manual vector cleanup. |
156
+ | A user corrects an old fact. | WaveMind should prefer the newer or reinforced memory. |
157
+ | A query is ambiguous across namespaces. | WaveMind should return only the scoped user's memory. |
158
+ | A long conversation has many irrelevant facts. | WaveMind should preserve useful recall instead of treating all vectors equally. |
159
+
160
+ In short: static vector search answers "what is nearest?" Agent memory also asks "what is still relevant, reinforced, scoped, and allowed to be remembered?"
161
+
162
+ ## Benchmark
163
+
164
+ Real Russian sentences from Tatoeba, 50 one-word queries, NumPy exact index.
165
+
166
+ | metric | hash | sentence-transformers |
167
+ |---|---:|---:|
168
+ | precision@1 | 1.00 | 1.00 |
169
+ | precision@3 | 1.00 | 1.00 |
170
+ | avg query | 0.49 ms | 52.84 ms |
171
+
172
+ Capacity check with the hash encoder:
173
+
174
+ | memories | precision@1 | precision@3 | avg query |
175
+ |---:|---:|---:|---:|
176
+ | 200 | 1.00 | 1.00 | 0.49 ms |
177
+ | 1000 | 0.88 | 1.00 | 1.50 ms |
178
+ | 5000 | 0.72 | 0.88 | 5.68 ms |
179
+
180
+ Run locally from a cloned repository:
181
+
182
+ ```sh
183
+ python benchmarks/ru_sentences_benchmark.py --sentences 200 --queries 50 --encoder hash --index numpy
184
+ python benchmarks/ru_sentences_benchmark.py --sentences 200 --queries 50 --encoder sentence --index numpy
185
+ ```
186
+
187
+ Agent-memory benchmark against Chroma:
188
+
189
+ 200 Russian user facts, 50 natural-language questions, same precomputed `HashingTextEncoder` embeddings for WaveMind and Chroma.
190
+ Full machine-readable result: `benchmarks/agent_memory_results.json`.
191
+
192
+ This is a static retrieval benchmark. It measures baseline ranking and latency, not hotness, TTL, repeated recall, or memory aging.
193
+
194
+ | engine | precision@1 | precision@3 | avg latency |
195
+ |---|---:|---:|---:|
196
+ | WaveMind | 0.82 | 0.90 | 2.25 ms |
197
+ | Chroma | 0.82 | 0.88 | 0.93 ms |
198
+
199
+ Run locally from a cloned repository:
200
+
201
+ ```sh
202
+ pip install -e ".[bench]"
203
+ python benchmarks/agent_memory_benchmark.py --engines wavemind chroma --facts 200 --queries 50
204
+ ```
205
+
206
+ ## Comparison
207
+
208
+ | feature | WaveMind | Chroma | Qdrant |
209
+ |---|---|---|---|
210
+ | Primary role | Agent memory engine | Embedding database | Production vector database |
211
+ | Local SQLite persistence | Yes | Yes | No, separate service/storage |
212
+ | HTTP API | FastAPI included | Included | Included |
213
+ | Dynamic memory priority | Wave-field hotness, TTL, priority | Metadata/filter driven | Payload/filter driven |
214
+ | Built-in forgetting | TTL and explicit forget | Manual delete/filtering | Manual delete/filtering |
215
+ | Best fit | Small to medium agent memory with dynamic recall | Local RAG apps and prototypes | Large-scale vector search |
216
+ | Scale target today | Up to 1000 optimal on NumPy, FAISS recommended beyond 5000 | Larger than WaveMind local mode | Production scale |
217
+
218
+ WaveMind is not trying to replace dedicated vector databases at scale. The intended product gap is dynamic priority: frequently used memories can become hotter while old or low-priority memories fade. For static RAG over large document collections, use a mature vector database. For agent memory that needs persistence, scoped recall, TTL, forgetting, and reinforcement, WaveMind is designed to sit above or beside the vector index.
219
+
220
+ ## Known Limitations
221
+
222
+ - Optimal capacity on the current NumPy exact index is up to 1000 records.
223
+ - At 5000 records, one-word `precision@1` is currently 0.72 with the hash encoder; many misses are ambiguous queries where another sentence containing the same word ranks first.
224
+ - For `N > 5000`, use the FAISS backend with `--index faiss` or another production vector index.
225
+ - `sentence-transformers/paraphrase-multilingual-mpnet-base-v2` requires about 420 MB of model files and measured about 53 ms per query on the benchmark machine.
226
+ - The Chroma comparison currently uses shared precomputed hash embeddings to isolate retrieval/ranking behavior; semantic model comparisons should be run separately.
227
+ - In the 200-fact agent benchmark, Chroma is faster on average while WaveMind is slightly higher at `precision@3`.
228
+ - The current public benchmark does not yet prove the dynamic-memory advantage. The next benchmark must test hotness, TTL, corrections, namespace isolation, and repeated recall.
229
+
230
+ ## Roadmap
231
+
232
+ - FAISS-first production index path with persisted index rebuilds.
233
+ - Dynamic agent-memory benchmark against Chroma/Qdrant: hotness, TTL, stale-fact suppression, corrections, and namespace isolation.
234
+ - Expand the agent-memory benchmark to sentence-transformers, FAISS, Chroma default embeddings, and Qdrant.
235
+ - Better semantic query expansion for short and ambiguous queries.
236
+ - Namespace quotas, backups, and daemon hardening for SaaS use.
237
+ - Webhook on recall for agent runtimes.
238
+ - OHLCV pattern-memory experiments for market research and backtests.
239
+
240
+ ## License
241
+
242
+ MIT. See [LICENSE](LICENSE).
@@ -0,0 +1,211 @@
1
+ # WaveMind is persistent dynamic memory for AI agents: vector search first, wave-field priority second, SQLite as the source of truth.
2
+
3
+ ![Python](https://img.shields.io/badge/python-3.10%2B-blue)
4
+ [![Tests](https://github.com/CaspianG/wavemind/actions/workflows/tests.yml/badge.svg)](https://github.com/CaspianG/wavemind/actions/workflows/tests.yml)
5
+ ![License](https://img.shields.io/badge/license-MIT-green)
6
+
7
+ ## Terminal Demo
8
+
9
+ From a cloned repository:
10
+
11
+ ```text
12
+ $ python examples/demo.py
13
+ ✓ Remembered: "Andrey is a trader who tracks market breakouts."
14
+ ✓ Remembered: "Andrey prefers short practical answers about AI agents."
15
+
16
+ Query: "Andrey trader agent"
17
+ → Result 1 (0.54): "Andrey is a trader who tracks market breakouts."
18
+ → Result 2 (0.30): "Andrey prefers short practical answers about AI agents."
19
+ ```
20
+
21
+ The demo is offline, keyless, and uses the built-in hash encoder.
22
+
23
+ ## Quick Start
24
+
25
+ Install from PyPI and create your first local memory:
26
+
27
+ ```sh
28
+ python -m pip install wavemind
29
+ wavemind remember "Andrey is a trader" --namespace demo
30
+ wavemind query "trader" --namespace demo
31
+ ```
32
+
33
+ What happens here:
34
+
35
+ - `remember` writes the text and its vector pattern into a local SQLite database.
36
+ - By default, the database file is `wavemind.sqlite3` in your current working directory.
37
+ - `--namespace demo` keeps this memory separate from other users, agents, or projects.
38
+ - `query` reads from the same SQLite file and returns the closest remembered texts.
39
+
40
+ ## Optional Embeddings
41
+
42
+ For sentence-transformer embeddings:
43
+
44
+ ```sh
45
+ python -m pip install "wavemind[sentence]"
46
+ wavemind --encoder sentence remember "Andrey is a trader" --namespace demo
47
+ wavemind --encoder sentence query "What does Andrey do?" --namespace demo
48
+ ```
49
+
50
+ ## Data Location
51
+
52
+ For an explicit database path, put global options before the command:
53
+
54
+ ```sh
55
+ wavemind --db ./agent_memory.sqlite3 remember "Andrey is a trader" --namespace demo
56
+ wavemind --db ./agent_memory.sqlite3 query "trader" --namespace demo
57
+ ```
58
+
59
+ ## Install From Source
60
+
61
+ For contributors installing from a local clone:
62
+
63
+ ```sh
64
+ git clone https://github.com/CaspianG/wavemind.git
65
+ cd wavemind
66
+ python -m pip install -e ".[sentence]"
67
+ ```
68
+
69
+ One-file setup scripts are also included in the repository:
70
+
71
+ ```sh
72
+ sh install.sh
73
+ ```
74
+
75
+ ```bat
76
+ install.bat
77
+ ```
78
+
79
+ ## LangChain Memory
80
+
81
+ Install the optional integration:
82
+
83
+ ```sh
84
+ pip install "wavemind[langchain]"
85
+ ```
86
+
87
+ Use WaveMind as a drop-in LangChain memory object:
88
+
89
+ ```python
90
+ from wavemind.integrations.langchain import WaveMindMemory
91
+
92
+ memory = WaveMindMemory(db_path="agent_memory.sqlite3")
93
+ # Replace: memory = ConversationBufferMemory()
94
+ ```
95
+
96
+ Offline runnable example from a cloned repository:
97
+
98
+ ```sh
99
+ python examples/langchain_memory.py
100
+ ```
101
+
102
+ ## Why Dynamic Memory
103
+
104
+ WaveMind is not positioned as "a faster Chroma." Chroma, Qdrant, Pinecone, and Weaviate are vector databases: they store embeddings and return nearest neighbors. That is the right tool for many static RAG workloads.
105
+
106
+ WaveMind is an agent memory layer. It still uses vector search first, but then applies memory-specific signals that a plain vector store does not model by default:
107
+
108
+ | memory behavior | Why it matters for agents | WaveMind mechanism |
109
+ |---|---|---|
110
+ | Hot memories | Facts recalled repeatedly should become easier to recall again. | Wave-field hotness and priority updates. |
111
+ | Aging memories | Old low-value facts should fade instead of competing forever. | TTL and decay-aware scoring. |
112
+ | Scoped memory | One user, agent, workspace, or project should not leak into another. | Namespaces and tags. |
113
+ | Explicit forgetting | Agents need deletion, privacy cleanup, and correction workflows. | `forget()` plus SQLite persistence. |
114
+ | Stable restart behavior | A memory system must survive process restarts. | SQLite source of truth, reloadable indexes. |
115
+ | Vector plus memory rank | Semantic similarity is necessary but not sufficient for long-running agents. | k-NN candidates first, wave field as re-ranker. |
116
+
117
+ The current Chroma benchmark below is intentionally conservative: it compares static retrieval on the same facts and the same hash embeddings. That benchmark is useful, but it does not exercise WaveMind's main product thesis: memory that changes over time as an agent recalls, reinforces, ages, and forgets information.
118
+
119
+ The benchmark that should decide whether WaveMind is worth using is a dynamic agent-memory benchmark:
120
+
121
+ | scenario | What should happen |
122
+ |---|---|
123
+ | A user repeats a preference many times. | WaveMind should rank it higher than equally similar but unused facts. |
124
+ | A fact expires via TTL. | WaveMind should suppress it without requiring manual vector cleanup. |
125
+ | A user corrects an old fact. | WaveMind should prefer the newer or reinforced memory. |
126
+ | A query is ambiguous across namespaces. | WaveMind should return only the scoped user's memory. |
127
+ | A long conversation has many irrelevant facts. | WaveMind should preserve useful recall instead of treating all vectors equally. |
128
+
129
+ In short: static vector search answers "what is nearest?" Agent memory also asks "what is still relevant, reinforced, scoped, and allowed to be remembered?"
130
+
131
+ ## Benchmark
132
+
133
+ Real Russian sentences from Tatoeba, 50 one-word queries, NumPy exact index.
134
+
135
+ | metric | hash | sentence-transformers |
136
+ |---|---:|---:|
137
+ | precision@1 | 1.00 | 1.00 |
138
+ | precision@3 | 1.00 | 1.00 |
139
+ | avg query | 0.49 ms | 52.84 ms |
140
+
141
+ Capacity check with the hash encoder:
142
+
143
+ | memories | precision@1 | precision@3 | avg query |
144
+ |---:|---:|---:|---:|
145
+ | 200 | 1.00 | 1.00 | 0.49 ms |
146
+ | 1000 | 0.88 | 1.00 | 1.50 ms |
147
+ | 5000 | 0.72 | 0.88 | 5.68 ms |
148
+
149
+ Run locally from a cloned repository:
150
+
151
+ ```sh
152
+ python benchmarks/ru_sentences_benchmark.py --sentences 200 --queries 50 --encoder hash --index numpy
153
+ python benchmarks/ru_sentences_benchmark.py --sentences 200 --queries 50 --encoder sentence --index numpy
154
+ ```
155
+
156
+ Agent-memory benchmark against Chroma:
157
+
158
+ 200 Russian user facts, 50 natural-language questions, same precomputed `HashingTextEncoder` embeddings for WaveMind and Chroma.
159
+ Full machine-readable result: `benchmarks/agent_memory_results.json`.
160
+
161
+ This is a static retrieval benchmark. It measures baseline ranking and latency, not hotness, TTL, repeated recall, or memory aging.
162
+
163
+ | engine | precision@1 | precision@3 | avg latency |
164
+ |---|---:|---:|---:|
165
+ | WaveMind | 0.82 | 0.90 | 2.25 ms |
166
+ | Chroma | 0.82 | 0.88 | 0.93 ms |
167
+
168
+ Run locally from a cloned repository:
169
+
170
+ ```sh
171
+ pip install -e ".[bench]"
172
+ python benchmarks/agent_memory_benchmark.py --engines wavemind chroma --facts 200 --queries 50
173
+ ```
174
+
175
+ ## Comparison
176
+
177
+ | feature | WaveMind | Chroma | Qdrant |
178
+ |---|---|---|---|
179
+ | Primary role | Agent memory engine | Embedding database | Production vector database |
180
+ | Local SQLite persistence | Yes | Yes | No, separate service/storage |
181
+ | HTTP API | FastAPI included | Included | Included |
182
+ | Dynamic memory priority | Wave-field hotness, TTL, priority | Metadata/filter driven | Payload/filter driven |
183
+ | Built-in forgetting | TTL and explicit forget | Manual delete/filtering | Manual delete/filtering |
184
+ | Best fit | Small to medium agent memory with dynamic recall | Local RAG apps and prototypes | Large-scale vector search |
185
+ | Scale target today | Up to 1000 optimal on NumPy, FAISS recommended beyond 5000 | Larger than WaveMind local mode | Production scale |
186
+
187
+ WaveMind is not trying to replace dedicated vector databases at scale. The intended product gap is dynamic priority: frequently used memories can become hotter while old or low-priority memories fade. For static RAG over large document collections, use a mature vector database. For agent memory that needs persistence, scoped recall, TTL, forgetting, and reinforcement, WaveMind is designed to sit above or beside the vector index.
188
+
189
+ ## Known Limitations
190
+
191
+ - Optimal capacity on the current NumPy exact index is up to 1000 records.
192
+ - At 5000 records, one-word `precision@1` is currently 0.72 with the hash encoder; many misses are ambiguous queries where another sentence containing the same word ranks first.
193
+ - For `N > 5000`, use the FAISS backend with `--index faiss` or another production vector index.
194
+ - `sentence-transformers/paraphrase-multilingual-mpnet-base-v2` requires about 420 MB of model files and measured about 53 ms per query on the benchmark machine.
195
+ - The Chroma comparison currently uses shared precomputed hash embeddings to isolate retrieval/ranking behavior; semantic model comparisons should be run separately.
196
+ - In the 200-fact agent benchmark, Chroma is faster on average while WaveMind is slightly higher at `precision@3`.
197
+ - The current public benchmark does not yet prove the dynamic-memory advantage. The next benchmark must test hotness, TTL, corrections, namespace isolation, and repeated recall.
198
+
199
+ ## Roadmap
200
+
201
+ - FAISS-first production index path with persisted index rebuilds.
202
+ - Dynamic agent-memory benchmark against Chroma/Qdrant: hotness, TTL, stale-fact suppression, corrections, and namespace isolation.
203
+ - Expand the agent-memory benchmark to sentence-transformers, FAISS, Chroma default embeddings, and Qdrant.
204
+ - Better semantic query expansion for short and ambiguous queries.
205
+ - Namespace quotas, backups, and daemon hardening for SaaS use.
206
+ - Webhook on recall for agent runtimes.
207
+ - OHLCV pattern-memory experiments for market research and backtests.
208
+
209
+ ## License
210
+
211
+ MIT. See [LICENSE](LICENSE).
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "wavemind"
7
- version = "2.0.0"
7
+ version = "2.0.2"
8
8
  description = "Persistent dynamic memory engine with vector search and wave-field re-ranking"
9
9
  readme = "README.md"
10
10
  license = "MIT"
@@ -28,6 +28,12 @@ indexes = [
28
28
  "annoy>=1.17",
29
29
  "faiss-cpu>=1.8; platform_system != 'Windows'",
30
30
  ]
31
+ bench = [
32
+ "chromadb>=1.0",
33
+ ]
34
+ langchain = [
35
+ "langchain-classic>=1.0",
36
+ ]
31
37
  dev = [
32
38
  "pytest>=8",
33
39
  "httpx>=0.27",
@@ -0,0 +1,74 @@
1
+ import json
2
+ import os
3
+ import subprocess
4
+ import sys
5
+ from pathlib import Path
6
+
7
+
8
+ def test_agent_memory_scenario_has_requested_shape():
9
+ from benchmarks.agent_memory_benchmark import build_agent_memory_scenario
10
+
11
+ scenario = build_agent_memory_scenario()
12
+ fact_ids = {fact.id for fact in scenario.facts}
13
+
14
+ assert len(scenario.facts) == 200
15
+ assert len(scenario.queries) == 50
16
+ assert len(fact_ids) == 200
17
+ assert all(query.expected_id in fact_ids for query in scenario.queries)
18
+ assert any("бюджет" in query.text.lower() for query in scenario.queries)
19
+ assert any("зовут" in query.text.lower() for query in scenario.queries)
20
+
21
+
22
+ def test_agent_memory_metrics_use_expected_fact_in_top_k():
23
+ from benchmarks.agent_memory_benchmark import AgentQuery, compute_metrics
24
+
25
+ queries = [
26
+ AgentQuery(id="q1", text="как зовут пользователя?", expected_id="fact_name"),
27
+ AgentQuery(id="q2", text="что знаем про бюджет?", expected_id="fact_budget"),
28
+ ]
29
+ rankings = {
30
+ "q1": ["fact_name", "fact_role", "fact_budget"],
31
+ "q2": ["fact_role", "fact_budget", "fact_name"],
32
+ }
33
+
34
+ metrics = compute_metrics(queries, rankings, [1.0, 3.0])
35
+
36
+ assert metrics.precision_at_1 == 0.5
37
+ assert metrics.precision_at_3 == 1.0
38
+ assert metrics.avg_latency_ms == 2.0
39
+
40
+
41
+ def test_agent_memory_benchmark_cli_writes_json_for_wavemind(tmp_path):
42
+ output = tmp_path / "agent-memory-result.json"
43
+ project_root = Path(__file__).resolve().parents[1]
44
+ env = os.environ.copy()
45
+ env["PYTHONPATH"] = str(project_root) + os.pathsep + env.get("PYTHONPATH", "")
46
+
47
+ subprocess.run(
48
+ [
49
+ sys.executable,
50
+ "benchmarks/agent_memory_benchmark.py",
51
+ "--engines",
52
+ "wavemind",
53
+ "--facts",
54
+ "20",
55
+ "--queries",
56
+ "5",
57
+ "--output",
58
+ str(output),
59
+ ],
60
+ cwd=project_root,
61
+ env=env,
62
+ text=True,
63
+ encoding="utf-8",
64
+ capture_output=True,
65
+ check=True,
66
+ )
67
+
68
+ payload = json.loads(output.read_text(encoding="utf-8"))
69
+
70
+ assert payload["scenario"]["facts"] == 20
71
+ assert payload["scenario"]["queries"] == 5
72
+ assert payload["results"][0]["engine"] == "WaveMind"
73
+ assert "precision_at_1" in payload["results"][0]
74
+ assert "avg_latency_ms" in payload["results"][0]
@@ -0,0 +1,86 @@
1
+ import subprocess
2
+ import sys
3
+ from pathlib import Path
4
+
5
+ from wavemind import HashingTextEncoder, WaveMind
6
+
7
+
8
+ def make_memory(tmp_path):
9
+ from wavemind.integrations.langchain import WaveMindMemory
10
+
11
+ mind = WaveMind(
12
+ db_path=tmp_path / "langchain.sqlite3",
13
+ encoder=HashingTextEncoder(vector_dim=128),
14
+ width=32,
15
+ height=32,
16
+ layers=2,
17
+ evolve_on_feed=1,
18
+ score_threshold=0.0,
19
+ )
20
+ return WaveMindMemory(memory=mind, top_k=3)
21
+
22
+
23
+ def test_wavemind_memory_exposes_langchain_base_memory_methods(tmp_path):
24
+ memory = make_memory(tmp_path)
25
+
26
+ assert memory.memory_variables == ["history"]
27
+ assert hasattr(memory, "load_memory_variables")
28
+ assert hasattr(memory, "save_context")
29
+ assert hasattr(memory, "clear")
30
+
31
+
32
+ def test_wavemind_memory_saves_context_and_recalls_relevant_history(tmp_path):
33
+ memory = make_memory(tmp_path)
34
+
35
+ memory.save_context(
36
+ {"input": "my name is Andrey and I am a trader"},
37
+ {"output": "remembered"},
38
+ )
39
+ loaded = memory.load_memory_variables({"input": "what is my name?"})
40
+
41
+ assert set(loaded) == {"history"}
42
+ assert "Andrey" in loaded["history"]
43
+ assert "trader" in loaded["history"]
44
+
45
+
46
+ def test_wavemind_memory_supports_custom_input_output_keys(tmp_path):
47
+ memory = make_memory(tmp_path)
48
+ memory.input_key = "question"
49
+ memory.output_key = "answer"
50
+
51
+ memory.save_context(
52
+ {"question": "the user budget is 2000 dollars", "irrelevant": "skip me"},
53
+ {"answer": "saved", "other": "ignore me"},
54
+ )
55
+ loaded = memory.load_memory_variables({"question": "what is the budget?"})
56
+
57
+ assert "2000 dollars" in loaded["history"]
58
+ assert "skip me" not in loaded["history"]
59
+ assert "ignore me" not in loaded["history"]
60
+
61
+
62
+ def test_wavemind_memory_clear_forgets_namespace(tmp_path):
63
+ memory = make_memory(tmp_path)
64
+
65
+ memory.save_context({"input": "Andrey likes short answers"}, {"output": "ok"})
66
+ assert "Andrey" in memory.load_memory_variables({"input": "short answers"})["history"]
67
+
68
+ memory.clear()
69
+
70
+ assert memory.load_memory_variables({"input": "short answers"})["history"] == ""
71
+
72
+
73
+ def test_langchain_example_runs_without_external_keys():
74
+ project_root = Path(__file__).resolve().parents[1]
75
+
76
+ result = subprocess.run(
77
+ [sys.executable, "examples/langchain_memory.py"],
78
+ cwd=project_root,
79
+ text=True,
80
+ encoding="utf-8",
81
+ capture_output=True,
82
+ check=True,
83
+ )
84
+
85
+ assert "WaveMindMemory history:" in result.stdout
86
+ assert "Andrey" in result.stdout
@@ -8,6 +8,20 @@ def test_sentence_extra_is_available_for_install_scripts():
8
8
  assert '"sentence-transformers>=3"' in pyproject
9
9
 
10
10
 
11
+ def test_benchmark_extra_installs_chroma():
12
+ pyproject = Path("pyproject.toml").read_text(encoding="utf-8")
13
+
14
+ assert "bench = [" in pyproject
15
+ assert '"chromadb>=1.0"' in pyproject
16
+
17
+
18
+ def test_langchain_extra_installs_classic_memory_api():
19
+ pyproject = Path("pyproject.toml").read_text(encoding="utf-8")
20
+
21
+ assert "langchain = [" in pyproject
22
+ assert '"langchain-classic>=1.0"' in pyproject
23
+
24
+
11
25
  def test_install_scripts_create_venv_and_install_sentence_extra():
12
26
  install_sh = Path("install.sh").read_text(encoding="utf-8")
13
27
  install_bat = Path("install.bat").read_text(encoding="utf-8")
@@ -0,0 +1,2 @@
1
+ """Optional integrations for external agent frameworks."""
2
+