wavemind 2.0.0__tar.gz → 2.0.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {wavemind-2.0.0 → wavemind-2.0.1}/PKG-INFO +48 -3
- {wavemind-2.0.0 → wavemind-2.0.1}/README.md +43 -2
- {wavemind-2.0.0 → wavemind-2.0.1}/pyproject.toml +7 -1
- wavemind-2.0.1/tests/test_agent_memory_benchmark.py +74 -0
- wavemind-2.0.1/tests/test_langchain_integration.py +86 -0
- {wavemind-2.0.0 → wavemind-2.0.1}/tests/test_packaging_files.py +14 -0
- wavemind-2.0.1/wavemind/integrations/__init__.py +2 -0
- wavemind-2.0.1/wavemind/integrations/langchain.py +146 -0
- {wavemind-2.0.0 → wavemind-2.0.1}/wavemind.egg-info/PKG-INFO +48 -3
- {wavemind-2.0.0 → wavemind-2.0.1}/wavemind.egg-info/SOURCES.txt +5 -1
- {wavemind-2.0.0 → wavemind-2.0.1}/wavemind.egg-info/requires.txt +6 -0
- {wavemind-2.0.0 → wavemind-2.0.1}/LICENSE +0 -0
- {wavemind-2.0.0 → wavemind-2.0.1}/setup.cfg +0 -0
- {wavemind-2.0.0 → wavemind-2.0.1}/tests/test_api.py +0 -0
- {wavemind-2.0.0 → wavemind-2.0.1}/tests/test_api_process_persistence.py +0 -0
- {wavemind-2.0.0 → wavemind-2.0.1}/tests/test_cli_smoke.py +0 -0
- {wavemind-2.0.0 → wavemind-2.0.1}/tests/test_core_persistence.py +0 -0
- {wavemind-2.0.0 → wavemind-2.0.1}/tests/test_examples.py +0 -0
- {wavemind-2.0.0 → wavemind-2.0.1}/tests/test_import_benchmark.py +0 -0
- {wavemind-2.0.0 → wavemind-2.0.1}/tests/test_indexes_encoders.py +0 -0
- {wavemind-2.0.0 → wavemind-2.0.1}/tests/test_semantic_and_latency.py +0 -0
- {wavemind-2.0.0 → wavemind-2.0.1}/wavemind/__init__.py +0 -0
- {wavemind-2.0.0 → wavemind-2.0.1}/wavemind/__main__.py +0 -0
- {wavemind-2.0.0 → wavemind-2.0.1}/wavemind/api.py +0 -0
- {wavemind-2.0.0 → wavemind-2.0.1}/wavemind/benchmark.py +0 -0
- {wavemind-2.0.0 → wavemind-2.0.1}/wavemind/cli.py +0 -0
- {wavemind-2.0.0 → wavemind-2.0.1}/wavemind/core.py +0 -0
- {wavemind-2.0.0 → wavemind-2.0.1}/wavemind/encoders.py +0 -0
- {wavemind-2.0.0 → wavemind-2.0.1}/wavemind/importers.py +0 -0
- {wavemind-2.0.0 → wavemind-2.0.1}/wavemind/indexes.py +0 -0
- {wavemind-2.0.0 → wavemind-2.0.1}/wavemind/storage.py +0 -0
- {wavemind-2.0.0 → wavemind-2.0.1}/wavemind.egg-info/dependency_links.txt +0 -0
- {wavemind-2.0.0 → wavemind-2.0.1}/wavemind.egg-info/entry_points.txt +0 -0
- {wavemind-2.0.0 → wavemind-2.0.1}/wavemind.egg-info/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: wavemind
|
|
3
|
-
Version: 2.0.
|
|
3
|
+
Version: 2.0.1
|
|
4
4
|
Summary: Persistent dynamic memory engine with vector search and wave-field re-ranking
|
|
5
5
|
License-Expression: MIT
|
|
6
6
|
Project-URL: Homepage, https://github.com/CaspianG/wavemind
|
|
@@ -20,6 +20,10 @@ Requires-Dist: sentence-transformers>=3; extra == "ml"
|
|
|
20
20
|
Provides-Extra: indexes
|
|
21
21
|
Requires-Dist: annoy>=1.17; extra == "indexes"
|
|
22
22
|
Requires-Dist: faiss-cpu>=1.8; platform_system != "Windows" and extra == "indexes"
|
|
23
|
+
Provides-Extra: bench
|
|
24
|
+
Requires-Dist: chromadb>=1.0; extra == "bench"
|
|
25
|
+
Provides-Extra: langchain
|
|
26
|
+
Requires-Dist: langchain-classic>=1.0; extra == "langchain"
|
|
23
27
|
Provides-Extra: dev
|
|
24
28
|
Requires-Dist: pytest>=8; extra == "dev"
|
|
25
29
|
Requires-Dist: httpx>=0.27; extra == "dev"
|
|
@@ -73,6 +77,29 @@ sh install.sh
|
|
|
73
77
|
install.bat
|
|
74
78
|
```
|
|
75
79
|
|
|
80
|
+
## LangChain Memory
|
|
81
|
+
|
|
82
|
+
Install the optional integration:
|
|
83
|
+
|
|
84
|
+
```sh
|
|
85
|
+
pip install "wavemind[langchain]"
|
|
86
|
+
```
|
|
87
|
+
|
|
88
|
+
Use WaveMind as a drop-in LangChain memory object:
|
|
89
|
+
|
|
90
|
+
```python
|
|
91
|
+
from wavemind.integrations.langchain import WaveMindMemory
|
|
92
|
+
|
|
93
|
+
memory = WaveMindMemory(db_path="agent_memory.sqlite3")
|
|
94
|
+
# Replace: memory = ConversationBufferMemory()
|
|
95
|
+
```
|
|
96
|
+
|
|
97
|
+
Offline runnable example:
|
|
98
|
+
|
|
99
|
+
```sh
|
|
100
|
+
python examples/langchain_memory.py
|
|
101
|
+
```
|
|
102
|
+
|
|
76
103
|
## Benchmark
|
|
77
104
|
|
|
78
105
|
Real Russian sentences from Tatoeba, 50 one-word queries, NumPy exact index.
|
|
@@ -98,6 +125,23 @@ python benchmarks/ru_sentences_benchmark.py --sentences 200 --queries 50 --encod
|
|
|
98
125
|
python benchmarks/ru_sentences_benchmark.py --sentences 200 --queries 50 --encoder sentence --index numpy
|
|
99
126
|
```
|
|
100
127
|
|
|
128
|
+
Agent-memory benchmark against Chroma:
|
|
129
|
+
|
|
130
|
+
200 Russian user facts, 50 natural-language questions, same precomputed `HashingTextEncoder` embeddings for WaveMind and Chroma.
|
|
131
|
+
Full machine-readable result: `benchmarks/agent_memory_results.json`.
|
|
132
|
+
|
|
133
|
+
| engine | precision@1 | precision@3 | avg latency |
|
|
134
|
+
|---|---:|---:|---:|
|
|
135
|
+
| WaveMind | 0.82 | 0.90 | 2.25 ms |
|
|
136
|
+
| Chroma | 0.82 | 0.88 | 0.93 ms |
|
|
137
|
+
|
|
138
|
+
Run locally:
|
|
139
|
+
|
|
140
|
+
```sh
|
|
141
|
+
pip install -e ".[bench]"
|
|
142
|
+
python benchmarks/agent_memory_benchmark.py --engines wavemind chroma --facts 200 --queries 50
|
|
143
|
+
```
|
|
144
|
+
|
|
101
145
|
## Comparison
|
|
102
146
|
|
|
103
147
|
| feature | WaveMind | Chroma | Qdrant |
|
|
@@ -118,12 +162,13 @@ WaveMind is not trying to replace dedicated vector databases at scale. Its diffe
|
|
|
118
162
|
- At 5000 records, one-word `precision@1` is currently 0.72 with the hash encoder; many misses are ambiguous queries where another sentence containing the same word ranks first.
|
|
119
163
|
- For `N > 5000`, use the FAISS backend with `--index faiss` or another production vector index.
|
|
120
164
|
- `sentence-transformers/paraphrase-multilingual-mpnet-base-v2` requires about 420 MB of model files and measured about 53 ms per query on the benchmark machine.
|
|
121
|
-
- The
|
|
165
|
+
- The Chroma comparison currently uses shared precomputed hash embeddings to isolate retrieval/ranking behavior; semantic model comparisons should be run separately.
|
|
166
|
+
- In the 200-fact agent benchmark, Chroma is faster on average while WaveMind is slightly higher at `precision@3`.
|
|
122
167
|
|
|
123
168
|
## Roadmap
|
|
124
169
|
|
|
125
170
|
- FAISS-first production index path with persisted index rebuilds.
|
|
126
|
-
-
|
|
171
|
+
- Expand the agent-memory benchmark to sentence-transformers, FAISS, Chroma default embeddings, and Qdrant.
|
|
127
172
|
- Better semantic query expansion for short and ambiguous queries.
|
|
128
173
|
- Namespace quotas, backups, and daemon hardening for SaaS use.
|
|
129
174
|
- Webhook on recall for agent runtimes.
|
|
@@ -46,6 +46,29 @@ sh install.sh
|
|
|
46
46
|
install.bat
|
|
47
47
|
```
|
|
48
48
|
|
|
49
|
+
## LangChain Memory
|
|
50
|
+
|
|
51
|
+
Install the optional integration:
|
|
52
|
+
|
|
53
|
+
```sh
|
|
54
|
+
pip install "wavemind[langchain]"
|
|
55
|
+
```
|
|
56
|
+
|
|
57
|
+
Use WaveMind as a drop-in LangChain memory object:
|
|
58
|
+
|
|
59
|
+
```python
|
|
60
|
+
from wavemind.integrations.langchain import WaveMindMemory
|
|
61
|
+
|
|
62
|
+
memory = WaveMindMemory(db_path="agent_memory.sqlite3")
|
|
63
|
+
# Replace: memory = ConversationBufferMemory()
|
|
64
|
+
```
|
|
65
|
+
|
|
66
|
+
Offline runnable example:
|
|
67
|
+
|
|
68
|
+
```sh
|
|
69
|
+
python examples/langchain_memory.py
|
|
70
|
+
```
|
|
71
|
+
|
|
49
72
|
## Benchmark
|
|
50
73
|
|
|
51
74
|
Real Russian sentences from Tatoeba, 50 one-word queries, NumPy exact index.
|
|
@@ -71,6 +94,23 @@ python benchmarks/ru_sentences_benchmark.py --sentences 200 --queries 50 --encod
|
|
|
71
94
|
python benchmarks/ru_sentences_benchmark.py --sentences 200 --queries 50 --encoder sentence --index numpy
|
|
72
95
|
```
|
|
73
96
|
|
|
97
|
+
Agent-memory benchmark against Chroma:
|
|
98
|
+
|
|
99
|
+
200 Russian user facts, 50 natural-language questions, same precomputed `HashingTextEncoder` embeddings for WaveMind and Chroma.
|
|
100
|
+
Full machine-readable result: `benchmarks/agent_memory_results.json`.
|
|
101
|
+
|
|
102
|
+
| engine | precision@1 | precision@3 | avg latency |
|
|
103
|
+
|---|---:|---:|---:|
|
|
104
|
+
| WaveMind | 0.82 | 0.90 | 2.25 ms |
|
|
105
|
+
| Chroma | 0.82 | 0.88 | 0.93 ms |
|
|
106
|
+
|
|
107
|
+
Run locally:
|
|
108
|
+
|
|
109
|
+
```sh
|
|
110
|
+
pip install -e ".[bench]"
|
|
111
|
+
python benchmarks/agent_memory_benchmark.py --engines wavemind chroma --facts 200 --queries 50
|
|
112
|
+
```
|
|
113
|
+
|
|
74
114
|
## Comparison
|
|
75
115
|
|
|
76
116
|
| feature | WaveMind | Chroma | Qdrant |
|
|
@@ -91,12 +131,13 @@ WaveMind is not trying to replace dedicated vector databases at scale. Its diffe
|
|
|
91
131
|
- At 5000 records, one-word `precision@1` is currently 0.72 with the hash encoder; many misses are ambiguous queries where another sentence containing the same word ranks first.
|
|
92
132
|
- For `N > 5000`, use the FAISS backend with `--index faiss` or another production vector index.
|
|
93
133
|
- `sentence-transformers/paraphrase-multilingual-mpnet-base-v2` requires about 420 MB of model files and measured about 53 ms per query on the benchmark machine.
|
|
94
|
-
- The
|
|
134
|
+
- The Chroma comparison currently uses shared precomputed hash embeddings to isolate retrieval/ranking behavior; semantic model comparisons should be run separately.
|
|
135
|
+
- In the 200-fact agent benchmark, Chroma is faster on average while WaveMind is slightly higher at `precision@3`.
|
|
95
136
|
|
|
96
137
|
## Roadmap
|
|
97
138
|
|
|
98
139
|
- FAISS-first production index path with persisted index rebuilds.
|
|
99
|
-
-
|
|
140
|
+
- Expand the agent-memory benchmark to sentence-transformers, FAISS, Chroma default embeddings, and Qdrant.
|
|
100
141
|
- Better semantic query expansion for short and ambiguous queries.
|
|
101
142
|
- Namespace quotas, backups, and daemon hardening for SaaS use.
|
|
102
143
|
- Webhook on recall for agent runtimes.
|
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "wavemind"
|
|
7
|
-
version = "2.0.
|
|
7
|
+
version = "2.0.1"
|
|
8
8
|
description = "Persistent dynamic memory engine with vector search and wave-field re-ranking"
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
license = "MIT"
|
|
@@ -28,6 +28,12 @@ indexes = [
|
|
|
28
28
|
"annoy>=1.17",
|
|
29
29
|
"faiss-cpu>=1.8; platform_system != 'Windows'",
|
|
30
30
|
]
|
|
31
|
+
bench = [
|
|
32
|
+
"chromadb>=1.0",
|
|
33
|
+
]
|
|
34
|
+
langchain = [
|
|
35
|
+
"langchain-classic>=1.0",
|
|
36
|
+
]
|
|
31
37
|
dev = [
|
|
32
38
|
"pytest>=8",
|
|
33
39
|
"httpx>=0.27",
|
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import os
|
|
3
|
+
import subprocess
|
|
4
|
+
import sys
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def test_agent_memory_scenario_has_requested_shape():
|
|
9
|
+
from benchmarks.agent_memory_benchmark import build_agent_memory_scenario
|
|
10
|
+
|
|
11
|
+
scenario = build_agent_memory_scenario()
|
|
12
|
+
fact_ids = {fact.id for fact in scenario.facts}
|
|
13
|
+
|
|
14
|
+
assert len(scenario.facts) == 200
|
|
15
|
+
assert len(scenario.queries) == 50
|
|
16
|
+
assert len(fact_ids) == 200
|
|
17
|
+
assert all(query.expected_id in fact_ids for query in scenario.queries)
|
|
18
|
+
assert any("бюджет" in query.text.lower() for query in scenario.queries)
|
|
19
|
+
assert any("зовут" in query.text.lower() for query in scenario.queries)
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def test_agent_memory_metrics_use_expected_fact_in_top_k():
|
|
23
|
+
from benchmarks.agent_memory_benchmark import AgentQuery, compute_metrics
|
|
24
|
+
|
|
25
|
+
queries = [
|
|
26
|
+
AgentQuery(id="q1", text="как зовут пользователя?", expected_id="fact_name"),
|
|
27
|
+
AgentQuery(id="q2", text="что знаем про бюджет?", expected_id="fact_budget"),
|
|
28
|
+
]
|
|
29
|
+
rankings = {
|
|
30
|
+
"q1": ["fact_name", "fact_role", "fact_budget"],
|
|
31
|
+
"q2": ["fact_role", "fact_budget", "fact_name"],
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
metrics = compute_metrics(queries, rankings, [1.0, 3.0])
|
|
35
|
+
|
|
36
|
+
assert metrics.precision_at_1 == 0.5
|
|
37
|
+
assert metrics.precision_at_3 == 1.0
|
|
38
|
+
assert metrics.avg_latency_ms == 2.0
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def test_agent_memory_benchmark_cli_writes_json_for_wavemind(tmp_path):
|
|
42
|
+
output = tmp_path / "agent-memory-result.json"
|
|
43
|
+
project_root = Path(__file__).resolve().parents[1]
|
|
44
|
+
env = os.environ.copy()
|
|
45
|
+
env["PYTHONPATH"] = str(project_root) + os.pathsep + env.get("PYTHONPATH", "")
|
|
46
|
+
|
|
47
|
+
subprocess.run(
|
|
48
|
+
[
|
|
49
|
+
sys.executable,
|
|
50
|
+
"benchmarks/agent_memory_benchmark.py",
|
|
51
|
+
"--engines",
|
|
52
|
+
"wavemind",
|
|
53
|
+
"--facts",
|
|
54
|
+
"20",
|
|
55
|
+
"--queries",
|
|
56
|
+
"5",
|
|
57
|
+
"--output",
|
|
58
|
+
str(output),
|
|
59
|
+
],
|
|
60
|
+
cwd=project_root,
|
|
61
|
+
env=env,
|
|
62
|
+
text=True,
|
|
63
|
+
encoding="utf-8",
|
|
64
|
+
capture_output=True,
|
|
65
|
+
check=True,
|
|
66
|
+
)
|
|
67
|
+
|
|
68
|
+
payload = json.loads(output.read_text(encoding="utf-8"))
|
|
69
|
+
|
|
70
|
+
assert payload["scenario"]["facts"] == 20
|
|
71
|
+
assert payload["scenario"]["queries"] == 5
|
|
72
|
+
assert payload["results"][0]["engine"] == "WaveMind"
|
|
73
|
+
assert "precision_at_1" in payload["results"][0]
|
|
74
|
+
assert "avg_latency_ms" in payload["results"][0]
|
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
import subprocess
|
|
2
|
+
import sys
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
|
|
5
|
+
from wavemind import HashingTextEncoder, WaveMind
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def make_memory(tmp_path):
|
|
9
|
+
from wavemind.integrations.langchain import WaveMindMemory
|
|
10
|
+
|
|
11
|
+
mind = WaveMind(
|
|
12
|
+
db_path=tmp_path / "langchain.sqlite3",
|
|
13
|
+
encoder=HashingTextEncoder(vector_dim=128),
|
|
14
|
+
width=32,
|
|
15
|
+
height=32,
|
|
16
|
+
layers=2,
|
|
17
|
+
evolve_on_feed=1,
|
|
18
|
+
score_threshold=0.0,
|
|
19
|
+
)
|
|
20
|
+
return WaveMindMemory(memory=mind, top_k=3)
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def test_wavemind_memory_exposes_langchain_base_memory_methods(tmp_path):
|
|
24
|
+
memory = make_memory(tmp_path)
|
|
25
|
+
|
|
26
|
+
assert memory.memory_variables == ["history"]
|
|
27
|
+
assert hasattr(memory, "load_memory_variables")
|
|
28
|
+
assert hasattr(memory, "save_context")
|
|
29
|
+
assert hasattr(memory, "clear")
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def test_wavemind_memory_saves_context_and_recalls_relevant_history(tmp_path):
|
|
33
|
+
memory = make_memory(tmp_path)
|
|
34
|
+
|
|
35
|
+
memory.save_context(
|
|
36
|
+
{"input": "my name is Andrey and I am a trader"},
|
|
37
|
+
{"output": "remembered"},
|
|
38
|
+
)
|
|
39
|
+
loaded = memory.load_memory_variables({"input": "what is my name?"})
|
|
40
|
+
|
|
41
|
+
assert set(loaded) == {"history"}
|
|
42
|
+
assert "Andrey" in loaded["history"]
|
|
43
|
+
assert "trader" in loaded["history"]
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def test_wavemind_memory_supports_custom_input_output_keys(tmp_path):
|
|
47
|
+
memory = make_memory(tmp_path)
|
|
48
|
+
memory.input_key = "question"
|
|
49
|
+
memory.output_key = "answer"
|
|
50
|
+
|
|
51
|
+
memory.save_context(
|
|
52
|
+
{"question": "the user budget is 2000 dollars", "irrelevant": "skip me"},
|
|
53
|
+
{"answer": "saved", "other": "ignore me"},
|
|
54
|
+
)
|
|
55
|
+
loaded = memory.load_memory_variables({"question": "what is the budget?"})
|
|
56
|
+
|
|
57
|
+
assert "2000 dollars" in loaded["history"]
|
|
58
|
+
assert "skip me" not in loaded["history"]
|
|
59
|
+
assert "ignore me" not in loaded["history"]
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def test_wavemind_memory_clear_forgets_namespace(tmp_path):
|
|
63
|
+
memory = make_memory(tmp_path)
|
|
64
|
+
|
|
65
|
+
memory.save_context({"input": "Andrey likes short answers"}, {"output": "ok"})
|
|
66
|
+
assert "Andrey" in memory.load_memory_variables({"input": "short answers"})["history"]
|
|
67
|
+
|
|
68
|
+
memory.clear()
|
|
69
|
+
|
|
70
|
+
assert memory.load_memory_variables({"input": "short answers"})["history"] == ""
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
def test_langchain_example_runs_without_external_keys():
|
|
74
|
+
project_root = Path(__file__).resolve().parents[1]
|
|
75
|
+
|
|
76
|
+
result = subprocess.run(
|
|
77
|
+
[sys.executable, "examples/langchain_memory.py"],
|
|
78
|
+
cwd=project_root,
|
|
79
|
+
text=True,
|
|
80
|
+
encoding="utf-8",
|
|
81
|
+
capture_output=True,
|
|
82
|
+
check=True,
|
|
83
|
+
)
|
|
84
|
+
|
|
85
|
+
assert "WaveMindMemory history:" in result.stdout
|
|
86
|
+
assert "Andrey" in result.stdout
|
|
@@ -8,6 +8,20 @@ def test_sentence_extra_is_available_for_install_scripts():
|
|
|
8
8
|
assert '"sentence-transformers>=3"' in pyproject
|
|
9
9
|
|
|
10
10
|
|
|
11
|
+
def test_benchmark_extra_installs_chroma():
|
|
12
|
+
pyproject = Path("pyproject.toml").read_text(encoding="utf-8")
|
|
13
|
+
|
|
14
|
+
assert "bench = [" in pyproject
|
|
15
|
+
assert '"chromadb>=1.0"' in pyproject
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def test_langchain_extra_installs_classic_memory_api():
|
|
19
|
+
pyproject = Path("pyproject.toml").read_text(encoding="utf-8")
|
|
20
|
+
|
|
21
|
+
assert "langchain = [" in pyproject
|
|
22
|
+
assert '"langchain-classic>=1.0"' in pyproject
|
|
23
|
+
|
|
24
|
+
|
|
11
25
|
def test_install_scripts_create_venv_and_install_sentence_extra():
|
|
12
26
|
install_sh = Path("install.sh").read_text(encoding="utf-8")
|
|
13
27
|
install_bat = Path("install.bat").read_text(encoding="utf-8")
|
|
@@ -0,0 +1,146 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
from typing import Any
|
|
6
|
+
|
|
7
|
+
from pydantic import ConfigDict
|
|
8
|
+
|
|
9
|
+
from wavemind import WaveMind
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
try:
|
|
13
|
+
from langchain_classic.base_memory import BaseMemory
|
|
14
|
+
except ImportError:
|
|
15
|
+
try:
|
|
16
|
+
from langchain.schema import BaseMemory
|
|
17
|
+
except ImportError:
|
|
18
|
+
|
|
19
|
+
class BaseMemory:
|
|
20
|
+
"""Small fallback so the integration can be imported without LangChain."""
|
|
21
|
+
|
|
22
|
+
def __init__(self, **data: Any):
|
|
23
|
+
for key, value in data.items():
|
|
24
|
+
setattr(self, key, value)
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
class WaveMindMemory(BaseMemory):
|
|
28
|
+
"""LangChain BaseMemory implementation backed by WaveMind."""
|
|
29
|
+
|
|
30
|
+
model_config = ConfigDict(arbitrary_types_allowed=True)
|
|
31
|
+
|
|
32
|
+
memory: WaveMind
|
|
33
|
+
db_path: str | Path | None = None
|
|
34
|
+
memory_key: str = "history"
|
|
35
|
+
input_key: str | None = None
|
|
36
|
+
output_key: str | None = None
|
|
37
|
+
namespace: str = "langchain"
|
|
38
|
+
tags: tuple[str, ...] = ("langchain", "conversation")
|
|
39
|
+
top_k: int = 5
|
|
40
|
+
min_score: float | None = None
|
|
41
|
+
human_prefix: str = "Human"
|
|
42
|
+
ai_prefix: str = "AI"
|
|
43
|
+
include_scores: bool = False
|
|
44
|
+
max_context_chars: int = 4000
|
|
45
|
+
|
|
46
|
+
def __init__(self, **data: Any):
|
|
47
|
+
if data.get("memory") is None:
|
|
48
|
+
data["memory"] = WaveMind(db_path=data.get("db_path"))
|
|
49
|
+
if "tags" in data and data["tags"] is not None:
|
|
50
|
+
data["tags"] = tuple(data["tags"])
|
|
51
|
+
super().__init__(**data)
|
|
52
|
+
|
|
53
|
+
@property
|
|
54
|
+
def memory_variables(self) -> list[str]:
|
|
55
|
+
return [self.memory_key]
|
|
56
|
+
|
|
57
|
+
def load_memory_variables(self, inputs: dict[str, Any]) -> dict[str, Any]:
|
|
58
|
+
query = self._select_input_text(inputs, allow_empty=True)
|
|
59
|
+
if not query:
|
|
60
|
+
return {self.memory_key: ""}
|
|
61
|
+
|
|
62
|
+
results = self.memory.query(
|
|
63
|
+
query,
|
|
64
|
+
namespace=self.namespace,
|
|
65
|
+
tags=self.tags,
|
|
66
|
+
top_k=self.top_k,
|
|
67
|
+
min_score=self.min_score,
|
|
68
|
+
)
|
|
69
|
+
lines = []
|
|
70
|
+
for index, result in enumerate(results, start=1):
|
|
71
|
+
prefix = f"[{index}]"
|
|
72
|
+
if self.include_scores:
|
|
73
|
+
prefix = f"{prefix} ({result.score:.2f})"
|
|
74
|
+
lines.append(f"{prefix} {result.text}")
|
|
75
|
+
return {self.memory_key: self._truncate("\n".join(lines))}
|
|
76
|
+
|
|
77
|
+
def save_context(self, inputs: dict[str, Any], outputs: dict[str, str]) -> None:
|
|
78
|
+
input_text = self._select_input_text(inputs)
|
|
79
|
+
output_text = self._select_output_text(outputs)
|
|
80
|
+
turn = f"{self.human_prefix}: {input_text}\n{self.ai_prefix}: {output_text}"
|
|
81
|
+
self.memory.remember(
|
|
82
|
+
turn,
|
|
83
|
+
namespace=self.namespace,
|
|
84
|
+
tags=self.tags,
|
|
85
|
+
metadata={
|
|
86
|
+
"kind": "langchain_conversation_turn",
|
|
87
|
+
"input": input_text,
|
|
88
|
+
"output": output_text,
|
|
89
|
+
},
|
|
90
|
+
priority=1.2,
|
|
91
|
+
)
|
|
92
|
+
|
|
93
|
+
def clear(self) -> None:
|
|
94
|
+
self.memory.forget(namespace=self.namespace)
|
|
95
|
+
|
|
96
|
+
def _select_input_text(self, inputs: dict[str, Any], allow_empty: bool = False) -> str:
|
|
97
|
+
if self.input_key is not None:
|
|
98
|
+
return self._stringify(inputs.get(self.input_key, ""))
|
|
99
|
+
|
|
100
|
+
candidate_keys = [key for key in inputs if key not in self.memory_variables]
|
|
101
|
+
for preferred in ("input", "question", "query", "prompt"):
|
|
102
|
+
if preferred in candidate_keys:
|
|
103
|
+
return self._stringify(inputs[preferred])
|
|
104
|
+
|
|
105
|
+
if len(candidate_keys) == 1:
|
|
106
|
+
return self._stringify(inputs[candidate_keys[0]])
|
|
107
|
+
if not candidate_keys and allow_empty:
|
|
108
|
+
return ""
|
|
109
|
+
raise ValueError(
|
|
110
|
+
"Could not infer the LangChain input key. Set input_key explicitly."
|
|
111
|
+
)
|
|
112
|
+
|
|
113
|
+
def _select_output_text(self, outputs: dict[str, Any]) -> str:
|
|
114
|
+
if self.output_key is not None:
|
|
115
|
+
return self._stringify(outputs.get(self.output_key, ""))
|
|
116
|
+
|
|
117
|
+
for preferred in ("output", "answer", "response", "text"):
|
|
118
|
+
if preferred in outputs:
|
|
119
|
+
return self._stringify(outputs[preferred])
|
|
120
|
+
|
|
121
|
+
if len(outputs) == 1:
|
|
122
|
+
return self._stringify(next(iter(outputs.values())))
|
|
123
|
+
raise ValueError(
|
|
124
|
+
"Could not infer the LangChain output key. Set output_key explicitly."
|
|
125
|
+
)
|
|
126
|
+
|
|
127
|
+
def _truncate(self, text: str) -> str:
|
|
128
|
+
if self.max_context_chars <= 0 or len(text) <= self.max_context_chars:
|
|
129
|
+
return text
|
|
130
|
+
return text[: self.max_context_chars].rstrip()
|
|
131
|
+
|
|
132
|
+
def _stringify(self, value: Any) -> str:
|
|
133
|
+
if value is None:
|
|
134
|
+
return ""
|
|
135
|
+
if isinstance(value, str):
|
|
136
|
+
return value
|
|
137
|
+
if hasattr(value, "content"):
|
|
138
|
+
return str(value.content)
|
|
139
|
+
if isinstance(value, (list, tuple)):
|
|
140
|
+
return "\n".join(self._stringify(item) for item in value)
|
|
141
|
+
if isinstance(value, dict):
|
|
142
|
+
return json.dumps(value, ensure_ascii=False, sort_keys=True)
|
|
143
|
+
return str(value)
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
__all__ = ["WaveMindMemory"]
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: wavemind
|
|
3
|
-
Version: 2.0.
|
|
3
|
+
Version: 2.0.1
|
|
4
4
|
Summary: Persistent dynamic memory engine with vector search and wave-field re-ranking
|
|
5
5
|
License-Expression: MIT
|
|
6
6
|
Project-URL: Homepage, https://github.com/CaspianG/wavemind
|
|
@@ -20,6 +20,10 @@ Requires-Dist: sentence-transformers>=3; extra == "ml"
|
|
|
20
20
|
Provides-Extra: indexes
|
|
21
21
|
Requires-Dist: annoy>=1.17; extra == "indexes"
|
|
22
22
|
Requires-Dist: faiss-cpu>=1.8; platform_system != "Windows" and extra == "indexes"
|
|
23
|
+
Provides-Extra: bench
|
|
24
|
+
Requires-Dist: chromadb>=1.0; extra == "bench"
|
|
25
|
+
Provides-Extra: langchain
|
|
26
|
+
Requires-Dist: langchain-classic>=1.0; extra == "langchain"
|
|
23
27
|
Provides-Extra: dev
|
|
24
28
|
Requires-Dist: pytest>=8; extra == "dev"
|
|
25
29
|
Requires-Dist: httpx>=0.27; extra == "dev"
|
|
@@ -73,6 +77,29 @@ sh install.sh
|
|
|
73
77
|
install.bat
|
|
74
78
|
```
|
|
75
79
|
|
|
80
|
+
## LangChain Memory
|
|
81
|
+
|
|
82
|
+
Install the optional integration:
|
|
83
|
+
|
|
84
|
+
```sh
|
|
85
|
+
pip install "wavemind[langchain]"
|
|
86
|
+
```
|
|
87
|
+
|
|
88
|
+
Use WaveMind as a drop-in LangChain memory object:
|
|
89
|
+
|
|
90
|
+
```python
|
|
91
|
+
from wavemind.integrations.langchain import WaveMindMemory
|
|
92
|
+
|
|
93
|
+
memory = WaveMindMemory(db_path="agent_memory.sqlite3")
|
|
94
|
+
# Replace: memory = ConversationBufferMemory()
|
|
95
|
+
```
|
|
96
|
+
|
|
97
|
+
Offline runnable example:
|
|
98
|
+
|
|
99
|
+
```sh
|
|
100
|
+
python examples/langchain_memory.py
|
|
101
|
+
```
|
|
102
|
+
|
|
76
103
|
## Benchmark
|
|
77
104
|
|
|
78
105
|
Real Russian sentences from Tatoeba, 50 one-word queries, NumPy exact index.
|
|
@@ -98,6 +125,23 @@ python benchmarks/ru_sentences_benchmark.py --sentences 200 --queries 50 --encod
|
|
|
98
125
|
python benchmarks/ru_sentences_benchmark.py --sentences 200 --queries 50 --encoder sentence --index numpy
|
|
99
126
|
```
|
|
100
127
|
|
|
128
|
+
Agent-memory benchmark against Chroma:
|
|
129
|
+
|
|
130
|
+
200 Russian user facts, 50 natural-language questions, same precomputed `HashingTextEncoder` embeddings for WaveMind and Chroma.
|
|
131
|
+
Full machine-readable result: `benchmarks/agent_memory_results.json`.
|
|
132
|
+
|
|
133
|
+
| engine | precision@1 | precision@3 | avg latency |
|
|
134
|
+
|---|---:|---:|---:|
|
|
135
|
+
| WaveMind | 0.82 | 0.90 | 2.25 ms |
|
|
136
|
+
| Chroma | 0.82 | 0.88 | 0.93 ms |
|
|
137
|
+
|
|
138
|
+
Run locally:
|
|
139
|
+
|
|
140
|
+
```sh
|
|
141
|
+
pip install -e ".[bench]"
|
|
142
|
+
python benchmarks/agent_memory_benchmark.py --engines wavemind chroma --facts 200 --queries 50
|
|
143
|
+
```
|
|
144
|
+
|
|
101
145
|
## Comparison
|
|
102
146
|
|
|
103
147
|
| feature | WaveMind | Chroma | Qdrant |
|
|
@@ -118,12 +162,13 @@ WaveMind is not trying to replace dedicated vector databases at scale. Its diffe
|
|
|
118
162
|
- At 5000 records, one-word `precision@1` is currently 0.72 with the hash encoder; many misses are ambiguous queries where another sentence containing the same word ranks first.
|
|
119
163
|
- For `N > 5000`, use the FAISS backend with `--index faiss` or another production vector index.
|
|
120
164
|
- `sentence-transformers/paraphrase-multilingual-mpnet-base-v2` requires about 420 MB of model files and measured about 53 ms per query on the benchmark machine.
|
|
121
|
-
- The
|
|
165
|
+
- The Chroma comparison currently uses shared precomputed hash embeddings to isolate retrieval/ranking behavior; semantic model comparisons should be run separately.
|
|
166
|
+
- In the 200-fact agent benchmark, Chroma is faster on average while WaveMind is slightly higher at `precision@3`.
|
|
122
167
|
|
|
123
168
|
## Roadmap
|
|
124
169
|
|
|
125
170
|
- FAISS-first production index path with persisted index rebuilds.
|
|
126
|
-
-
|
|
171
|
+
- Expand the agent-memory benchmark to sentence-transformers, FAISS, Chroma default embeddings, and Qdrant.
|
|
127
172
|
- Better semantic query expansion for short and ambiguous queries.
|
|
128
173
|
- Namespace quotas, backups, and daemon hardening for SaaS use.
|
|
129
174
|
- Webhook on recall for agent runtimes.
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
LICENSE
|
|
2
2
|
README.md
|
|
3
3
|
pyproject.toml
|
|
4
|
+
tests/test_agent_memory_benchmark.py
|
|
4
5
|
tests/test_api.py
|
|
5
6
|
tests/test_api_process_persistence.py
|
|
6
7
|
tests/test_cli_smoke.py
|
|
@@ -8,6 +9,7 @@ tests/test_core_persistence.py
|
|
|
8
9
|
tests/test_examples.py
|
|
9
10
|
tests/test_import_benchmark.py
|
|
10
11
|
tests/test_indexes_encoders.py
|
|
12
|
+
tests/test_langchain_integration.py
|
|
11
13
|
tests/test_packaging_files.py
|
|
12
14
|
tests/test_semantic_and_latency.py
|
|
13
15
|
wavemind/__init__.py
|
|
@@ -25,4 +27,6 @@ wavemind.egg-info/SOURCES.txt
|
|
|
25
27
|
wavemind.egg-info/dependency_links.txt
|
|
26
28
|
wavemind.egg-info/entry_points.txt
|
|
27
29
|
wavemind.egg-info/requires.txt
|
|
28
|
-
wavemind.egg-info/top_level.txt
|
|
30
|
+
wavemind.egg-info/top_level.txt
|
|
31
|
+
wavemind/integrations/__init__.py
|
|
32
|
+
wavemind/integrations/langchain.py
|
|
@@ -4,6 +4,9 @@ uvicorn[standard]>=0.27
|
|
|
4
4
|
pydantic>=2
|
|
5
5
|
pypdf>=4
|
|
6
6
|
|
|
7
|
+
[bench]
|
|
8
|
+
chromadb>=1.0
|
|
9
|
+
|
|
7
10
|
[dev]
|
|
8
11
|
pytest>=8
|
|
9
12
|
httpx>=0.27
|
|
@@ -14,6 +17,9 @@ annoy>=1.17
|
|
|
14
17
|
[indexes:platform_system != "Windows"]
|
|
15
18
|
faiss-cpu>=1.8
|
|
16
19
|
|
|
20
|
+
[langchain]
|
|
21
|
+
langchain-classic>=1.0
|
|
22
|
+
|
|
17
23
|
[ml]
|
|
18
24
|
sentence-transformers>=3
|
|
19
25
|
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|