wavemind 2.2.5__tar.gz → 2.2.7__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {wavemind-2.2.5 → wavemind-2.2.7}/PKG-INFO +75 -13
- {wavemind-2.2.5 → wavemind-2.2.7}/README.md +74 -12
- {wavemind-2.2.5 → wavemind-2.2.7}/benchmarks/BENCHMARK_LEADERBOARD.md +1 -1
- {wavemind-2.2.5 → wavemind-2.2.7}/benchmarks/BENCHMARK_REPORT.md +1 -1
- {wavemind-2.2.5 → wavemind-2.2.7}/benchmarks/benchmark_matrix_results.json +34 -7
- {wavemind-2.2.5 → wavemind-2.2.7}/benchmarks/benchmark_registry.py +39 -3
- {wavemind-2.2.5 → wavemind-2.2.7}/benchmarks/scale_readiness_benchmark.py +143 -2
- {wavemind-2.2.5 → wavemind-2.2.7}/benchmarks/scale_readiness_results.json +29 -8
- {wavemind-2.2.5 → wavemind-2.2.7}/docker-compose.yml +1 -1
- {wavemind-2.2.5 → wavemind-2.2.7}/docs/BENCHMARK_BRIEF.md +1 -1
- {wavemind-2.2.5 → wavemind-2.2.7}/docs/ROADMAP.md +8 -5
- {wavemind-2.2.5 → wavemind-2.2.7}/pyproject.toml +1 -1
- {wavemind-2.2.5 → wavemind-2.2.7}/tests/test_replication.py +175 -0
- {wavemind-2.2.5 → wavemind-2.2.7}/tests/test_scale_readiness_benchmark.py +7 -0
- {wavemind-2.2.5 → wavemind-2.2.7}/wavemind/__init__.py +7 -1
- {wavemind-2.2.5 → wavemind-2.2.7}/wavemind/replication.py +504 -0
- {wavemind-2.2.5 → wavemind-2.2.7}/wavemind.egg-info/PKG-INFO +75 -13
- {wavemind-2.2.5 → wavemind-2.2.7}/CONTRIBUTING.md +0 -0
- {wavemind-2.2.5 → wavemind-2.2.7}/Dockerfile +0 -0
- {wavemind-2.2.5 → wavemind-2.2.7}/LICENSE +0 -0
- {wavemind-2.2.5 → wavemind-2.2.7}/MANIFEST.in +0 -0
- {wavemind-2.2.5 → wavemind-2.2.7}/SECURITY.md +0 -0
- {wavemind-2.2.5 → wavemind-2.2.7}/SUPPORT.md +0 -0
- {wavemind-2.2.5 → wavemind-2.2.7}/benchmarks/agent_memory_benchmark.py +0 -0
- {wavemind-2.2.5 → wavemind-2.2.7}/benchmarks/agent_memory_results.json +0 -0
- {wavemind-2.2.5 → wavemind-2.2.7}/benchmarks/ann_index_curve_benchmark.py +0 -0
- {wavemind-2.2.5 → wavemind-2.2.7}/benchmarks/ann_index_curve_results.json +0 -0
- {wavemind-2.2.5 → wavemind-2.2.7}/benchmarks/dynamic_memory_benchmark.py +0 -0
- {wavemind-2.2.5 → wavemind-2.2.7}/benchmarks/dynamic_memory_results.json +0 -0
- {wavemind-2.2.5 → wavemind-2.2.7}/benchmarks/field_memory_dynamics_benchmark.py +0 -0
- {wavemind-2.2.5 → wavemind-2.2.7}/benchmarks/field_memory_dynamics_results.json +0 -0
- {wavemind-2.2.5 → wavemind-2.2.7}/benchmarks/locomo_evidence_results.json +0 -0
- {wavemind-2.2.5 → wavemind-2.2.7}/benchmarks/locomo_memory_benchmark.py +0 -0
- {wavemind-2.2.5 → wavemind-2.2.7}/benchmarks/locomo_sentence_evidence_results.json +0 -0
- {wavemind-2.2.5 → wavemind-2.2.7}/benchmarks/long_memory_evidence_benchmark.py +0 -0
- {wavemind-2.2.5 → wavemind-2.2.7}/benchmarks/long_memory_evidence_results.json +0 -0
- {wavemind-2.2.5 → wavemind-2.2.7}/benchmarks/longmemeval_answer_benchmark.py +0 -0
- {wavemind-2.2.5 → wavemind-2.2.7}/benchmarks/longmemeval_answer_extractive_20_results.json +0 -0
- {wavemind-2.2.5 → wavemind-2.2.7}/benchmarks/longmemeval_answer_qwen25_0_5b_50_results.json +0 -0
- {wavemind-2.2.5 → wavemind-2.2.7}/benchmarks/longmemeval_answer_qwen25_1_5b_50_results.json +0 -0
- {wavemind-2.2.5 → wavemind-2.2.7}/benchmarks/longmemeval_evidence_50_results.json +0 -0
- {wavemind-2.2.5 → wavemind-2.2.7}/benchmarks/longmemeval_evidence_results.json +0 -0
- {wavemind-2.2.5 → wavemind-2.2.7}/benchmarks/longmemeval_memory_benchmark.py +0 -0
- {wavemind-2.2.5 → wavemind-2.2.7}/benchmarks/memory_competitor_benchmark.py +0 -0
- {wavemind-2.2.5 → wavemind-2.2.7}/benchmarks/memory_competitor_results.json +0 -0
- {wavemind-2.2.5 → wavemind-2.2.7}/benchmarks/nomiracl_russian_benchmark.py +0 -0
- {wavemind-2.2.5 → wavemind-2.2.7}/benchmarks/nomiracl_russian_results.json +0 -0
- {wavemind-2.2.5 → wavemind-2.2.7}/benchmarks/open_retrieval_benchmark.py +0 -0
- {wavemind-2.2.5 → wavemind-2.2.7}/benchmarks/open_retrieval_scifact_results.json +0 -0
- {wavemind-2.2.5 → wavemind-2.2.7}/benchmarks/production_index_profile_results.json +0 -0
- {wavemind-2.2.5 → wavemind-2.2.7}/benchmarks/production_load_benchmark.py +0 -0
- {wavemind-2.2.5 → wavemind-2.2.7}/benchmarks/production_load_qdrant_100k_tuned_results.json +0 -0
- {wavemind-2.2.5 → wavemind-2.2.7}/benchmarks/production_load_qdrant_1m_ef_sweep_results.json +0 -0
- {wavemind-2.2.5 → wavemind-2.2.7}/benchmarks/production_load_qdrant_1m_results.json +0 -0
- {wavemind-2.2.5 → wavemind-2.2.7}/benchmarks/production_load_qdrant_1m_tuned_results.json +0 -0
- {wavemind-2.2.5 → wavemind-2.2.7}/benchmarks/production_load_results.json +0 -0
- {wavemind-2.2.5 → wavemind-2.2.7}/benchmarks/render_benchmark_charts.py +0 -0
- {wavemind-2.2.5 → wavemind-2.2.7}/benchmarks/render_benchmark_leaderboard.py +0 -0
- {wavemind-2.2.5 → wavemind-2.2.7}/benchmarks/render_benchmark_report.py +0 -0
- {wavemind-2.2.5 → wavemind-2.2.7}/benchmarks/ru_sentences_benchmark.py +0 -0
- {wavemind-2.2.5 → wavemind-2.2.7}/benchmarks/wavemind_capacity_results.json +0 -0
- {wavemind-2.2.5 → wavemind-2.2.7}/docs/CHROMA_MIGRATION.md +0 -0
- {wavemind-2.2.5 → wavemind-2.2.7}/docs/DEMO_SCRIPT.md +0 -0
- {wavemind-2.2.5 → wavemind-2.2.7}/docs/LAUNCH_KIT.md +0 -0
- {wavemind-2.2.5 → wavemind-2.2.7}/docs/OBSERVABILITY.md +0 -0
- {wavemind-2.2.5 → wavemind-2.2.7}/docs/PROJECT_BOARD.md +0 -0
- {wavemind-2.2.5 → wavemind-2.2.7}/docs/RELEASE.md +0 -0
- {wavemind-2.2.5 → wavemind-2.2.7}/docs/RU_LAUNCH_POSTS.md +0 -0
- {wavemind-2.2.5 → wavemind-2.2.7}/docs/USE_CASES.md +0 -0
- {wavemind-2.2.5 → wavemind-2.2.7}/docs/assets/benchmark-summary.svg +0 -0
- {wavemind-2.2.5 → wavemind-2.2.7}/docs/assets/wavemind-demo.gif +0 -0
- {wavemind-2.2.5 → wavemind-2.2.7}/docs/assets/wavemind-social-card.svg +0 -0
- {wavemind-2.2.5 → wavemind-2.2.7}/examples/agent_with_memory.py +0 -0
- {wavemind-2.2.5 → wavemind-2.2.7}/examples/chroma_migration.py +0 -0
- {wavemind-2.2.5 → wavemind-2.2.7}/examples/customer_support_memory.py +0 -0
- {wavemind-2.2.5 → wavemind-2.2.7}/examples/demo.py +0 -0
- {wavemind-2.2.5 → wavemind-2.2.7}/examples/dynamic_memory_demo.py +0 -0
- {wavemind-2.2.5 → wavemind-2.2.7}/examples/framework_integrations.py +0 -0
- {wavemind-2.2.5 → wavemind-2.2.7}/examples/langchain_memory.py +0 -0
- {wavemind-2.2.5 → wavemind-2.2.7}/examples/llamaindex_retriever.py +0 -0
- {wavemind-2.2.5 → wavemind-2.2.7}/examples/observability/README.md +0 -0
- {wavemind-2.2.5 → wavemind-2.2.7}/examples/observability/docker-compose.yml +0 -0
- {wavemind-2.2.5 → wavemind-2.2.7}/examples/observability/otel-collector.yaml +0 -0
- {wavemind-2.2.5 → wavemind-2.2.7}/examples/observability/prometheus-alerts.yml +0 -0
- {wavemind-2.2.5 → wavemind-2.2.7}/examples/observability/prometheus.yml +0 -0
- {wavemind-2.2.5 → wavemind-2.2.7}/examples/production-index-profile/README.md +0 -0
- {wavemind-2.2.5 → wavemind-2.2.7}/examples/production-index-profile/docker-compose.yml +0 -0
- {wavemind-2.2.5 → wavemind-2.2.7}/examples/research_notebook_memory.py +0 -0
- {wavemind-2.2.5 → wavemind-2.2.7}/examples/sharded_memory.py +0 -0
- {wavemind-2.2.5 → wavemind-2.2.7}/install.bat +0 -0
- {wavemind-2.2.5 → wavemind-2.2.7}/install.sh +0 -0
- {wavemind-2.2.5 → wavemind-2.2.7}/requirements-optional.txt +0 -0
- {wavemind-2.2.5 → wavemind-2.2.7}/requirements.txt +0 -0
- {wavemind-2.2.5 → wavemind-2.2.7}/setup.cfg +0 -0
- {wavemind-2.2.5 → wavemind-2.2.7}/tests/test_agent_memory_benchmark.py +0 -0
- {wavemind-2.2.5 → wavemind-2.2.7}/tests/test_ann_index_curve_benchmark.py +0 -0
- {wavemind-2.2.5 → wavemind-2.2.7}/tests/test_api.py +0 -0
- {wavemind-2.2.5 → wavemind-2.2.7}/tests/test_api_process_persistence.py +0 -0
- {wavemind-2.2.5 → wavemind-2.2.7}/tests/test_benchmark_brief.py +0 -0
- {wavemind-2.2.5 → wavemind-2.2.7}/tests/test_benchmark_charts.py +0 -0
- {wavemind-2.2.5 → wavemind-2.2.7}/tests/test_benchmark_leaderboard.py +0 -0
- {wavemind-2.2.5 → wavemind-2.2.7}/tests/test_benchmark_registry.py +0 -0
- {wavemind-2.2.5 → wavemind-2.2.7}/tests/test_benchmark_report.py +0 -0
- {wavemind-2.2.5 → wavemind-2.2.7}/tests/test_chroma_migration_example.py +0 -0
- {wavemind-2.2.5 → wavemind-2.2.7}/tests/test_cli_smoke.py +0 -0
- {wavemind-2.2.5 → wavemind-2.2.7}/tests/test_cluster.py +0 -0
- {wavemind-2.2.5 → wavemind-2.2.7}/tests/test_core_persistence.py +0 -0
- {wavemind-2.2.5 → wavemind-2.2.7}/tests/test_dynamic_memory_benchmark.py +0 -0
- {wavemind-2.2.5 → wavemind-2.2.7}/tests/test_examples.py +0 -0
- {wavemind-2.2.5 → wavemind-2.2.7}/tests/test_field_graph.py +0 -0
- {wavemind-2.2.5 → wavemind-2.2.7}/tests/test_field_graph_integration.py +0 -0
- {wavemind-2.2.5 → wavemind-2.2.7}/tests/test_field_memory_dynamics_benchmark.py +0 -0
- {wavemind-2.2.5 → wavemind-2.2.7}/tests/test_framework_adapters.py +0 -0
- {wavemind-2.2.5 → wavemind-2.2.7}/tests/test_import_benchmark.py +0 -0
- {wavemind-2.2.5 → wavemind-2.2.7}/tests/test_indexes_encoders.py +0 -0
- {wavemind-2.2.5 → wavemind-2.2.7}/tests/test_jobs.py +0 -0
- {wavemind-2.2.5 → wavemind-2.2.7}/tests/test_langchain_integration.py +0 -0
- {wavemind-2.2.5 → wavemind-2.2.7}/tests/test_locomo_memory_benchmark.py +0 -0
- {wavemind-2.2.5 → wavemind-2.2.7}/tests/test_long_memory_evidence_benchmark.py +0 -0
- {wavemind-2.2.5 → wavemind-2.2.7}/tests/test_longmemeval_answer_benchmark.py +0 -0
- {wavemind-2.2.5 → wavemind-2.2.7}/tests/test_longmemeval_memory_benchmark.py +0 -0
- {wavemind-2.2.5 → wavemind-2.2.7}/tests/test_memory_competitor_benchmark.py +0 -0
- {wavemind-2.2.5 → wavemind-2.2.7}/tests/test_multimodal.py +0 -0
- {wavemind-2.2.5 → wavemind-2.2.7}/tests/test_nomiracl_russian_benchmark.py +0 -0
- {wavemind-2.2.5 → wavemind-2.2.7}/tests/test_observability.py +0 -0
- {wavemind-2.2.5 → wavemind-2.2.7}/tests/test_observability_docs.py +0 -0
- {wavemind-2.2.5 → wavemind-2.2.7}/tests/test_open_retrieval_benchmark.py +0 -0
- {wavemind-2.2.5 → wavemind-2.2.7}/tests/test_packaging_files.py +0 -0
- {wavemind-2.2.5 → wavemind-2.2.7}/tests/test_postgres_storage.py +0 -0
- {wavemind-2.2.5 → wavemind-2.2.7}/tests/test_production_index_profile.py +0 -0
- {wavemind-2.2.5 → wavemind-2.2.7}/tests/test_production_load_benchmark.py +0 -0
- {wavemind-2.2.5 → wavemind-2.2.7}/tests/test_scale_plan.py +0 -0
- {wavemind-2.2.5 → wavemind-2.2.7}/tests/test_semantic_and_latency.py +0 -0
- {wavemind-2.2.5 → wavemind-2.2.7}/tests/test_sharding.py +0 -0
- {wavemind-2.2.5 → wavemind-2.2.7}/wavemind/__main__.py +0 -0
- {wavemind-2.2.5 → wavemind-2.2.7}/wavemind/api.py +0 -0
- {wavemind-2.2.5 → wavemind-2.2.7}/wavemind/benchmark.py +0 -0
- {wavemind-2.2.5 → wavemind-2.2.7}/wavemind/cli.py +0 -0
- {wavemind-2.2.5 → wavemind-2.2.7}/wavemind/cluster.py +0 -0
- {wavemind-2.2.5 → wavemind-2.2.7}/wavemind/core.py +0 -0
- {wavemind-2.2.5 → wavemind-2.2.7}/wavemind/encoders.py +0 -0
- {wavemind-2.2.5 → wavemind-2.2.7}/wavemind/field_graph.py +0 -0
- {wavemind-2.2.5 → wavemind-2.2.7}/wavemind/importers.py +0 -0
- {wavemind-2.2.5 → wavemind-2.2.7}/wavemind/indexes.py +0 -0
- {wavemind-2.2.5 → wavemind-2.2.7}/wavemind/integrations/__init__.py +0 -0
- {wavemind-2.2.5 → wavemind-2.2.7}/wavemind/integrations/autogen.py +0 -0
- {wavemind-2.2.5 → wavemind-2.2.7}/wavemind/integrations/crewai.py +0 -0
- {wavemind-2.2.5 → wavemind-2.2.7}/wavemind/integrations/langchain.py +0 -0
- {wavemind-2.2.5 → wavemind-2.2.7}/wavemind/integrations/langgraph.py +0 -0
- {wavemind-2.2.5 → wavemind-2.2.7}/wavemind/integrations/llamaindex.py +0 -0
- {wavemind-2.2.5 → wavemind-2.2.7}/wavemind/jobs.py +0 -0
- {wavemind-2.2.5 → wavemind-2.2.7}/wavemind/multimodal.py +0 -0
- {wavemind-2.2.5 → wavemind-2.2.7}/wavemind/observability.py +0 -0
- {wavemind-2.2.5 → wavemind-2.2.7}/wavemind/scale.py +0 -0
- {wavemind-2.2.5 → wavemind-2.2.7}/wavemind/sharding.py +0 -0
- {wavemind-2.2.5 → wavemind-2.2.7}/wavemind/storage.py +0 -0
- {wavemind-2.2.5 → wavemind-2.2.7}/wavemind/studio.py +0 -0
- {wavemind-2.2.5 → wavemind-2.2.7}/wavemind.egg-info/SOURCES.txt +0 -0
- {wavemind-2.2.5 → wavemind-2.2.7}/wavemind.egg-info/dependency_links.txt +0 -0
- {wavemind-2.2.5 → wavemind-2.2.7}/wavemind.egg-info/entry_points.txt +0 -0
- {wavemind-2.2.5 → wavemind-2.2.7}/wavemind.egg-info/requires.txt +0 -0
- {wavemind-2.2.5 → wavemind-2.2.7}/wavemind.egg-info/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: wavemind
|
|
3
|
-
Version: 2.2.
|
|
3
|
+
Version: 2.2.7
|
|
4
4
|
Summary: Local-first dynamic memory field with vector search and wave-field re-ranking
|
|
5
5
|
License-Expression: MIT
|
|
6
6
|
Project-URL: Homepage, https://github.com/CaspianG/wavemind
|
|
@@ -542,14 +542,17 @@ Checked-in result:
|
|
|
542
542
|
| profile | result |
|
|
543
543
|
|---|---:|
|
|
544
544
|
| Cluster planner | 4096 namespaces, 4 nodes, replication factor 2, node-loss availability `1.000`, zone-loss availability `1.000`, write quorum `2`. |
|
|
545
|
-
| Hot cache | 2000 lookups, hit rate `0.920`, p99 lookup `0.
|
|
546
|
-
| Replicated runtime | 3 physical WaveMind stores, replication factor 3, write quorum 2, node-loss recall `true`, repair copied `1` missing record, tombstone repair deleted `1` stale record, p99 query-after-loss `1.
|
|
547
|
-
|
|
|
545
|
+
| Hot cache | 2000 lookups, hit rate `0.920`, p99 lookup `0.003 ms`. |
|
|
546
|
+
| Replicated runtime | 3 physical WaveMind stores, replication factor 3, write quorum 2, node-loss recall `true`, repair copied `1` missing record, tombstone repair deleted `1` stale record, p99 query-after-loss `1.29 ms`. |
|
|
547
|
+
| Active-active delta sync | 2 regions, bidirectional convergence `true`, stale import suppressed after delete `true`, tombstone convergence `true`, sync `112.50 ms`. |
|
|
548
|
+
| Replicated snapshot | 3 replica files, manifest checksum validation `true`, restore `11.42 ms`, recall after restored-primary loss `true`. |
|
|
549
|
+
| Structured payloads | image/audio/table/event retrieval, precision@1 `1.000`, p99 `0.67 ms`. |
|
|
548
550
|
|
|
549
551
|
This profile validates routing, quorum-replicated runtime behavior, cache
|
|
550
|
-
behavior,
|
|
551
|
-
|
|
552
|
-
|
|
552
|
+
behavior, active-active namespace delta sync, replicated snapshot/restore, and
|
|
553
|
+
structured payload handling. It is not a 10M-vector load test. Real 100k, 1M,
|
|
554
|
+
and 10M latency claims should come from service-backed FAISS/Qdrant/pgvector
|
|
555
|
+
load tests on production-like hardware.
|
|
553
556
|
|
|
554
557
|
Cluster placement planning:
|
|
555
558
|
|
|
@@ -668,6 +671,34 @@ For Postgres storage, use database-native backup tooling such as `pg_dump`,
|
|
|
668
671
|
managed snapshots, or point-in-time recovery instead of WaveMind's SQLite file
|
|
669
672
|
backup command.
|
|
670
673
|
|
|
674
|
+
Replicated runtime snapshot/restore:
|
|
675
|
+
|
|
676
|
+
```python
|
|
677
|
+
from wavemind import HashingTextEncoder, ReplicatedWaveMind
|
|
678
|
+
|
|
679
|
+
memory = ReplicatedWaveMind(
|
|
680
|
+
root_path="./state/replicas",
|
|
681
|
+
nodes=["node-a", "node-b", "node-c"],
|
|
682
|
+
replication_factor=3,
|
|
683
|
+
encoder=HashingTextEncoder(vector_dim=64),
|
|
684
|
+
)
|
|
685
|
+
memory.remember("Tenant A prefers short support replies.", namespace="tenant:a")
|
|
686
|
+
|
|
687
|
+
snapshot = memory.snapshot("./backups/replicated")
|
|
688
|
+
assert ReplicatedWaveMind.verify_snapshot(snapshot.snapshot_path)["healthy"]
|
|
689
|
+
|
|
690
|
+
restored, report = ReplicatedWaveMind.restore_snapshot(
|
|
691
|
+
snapshot.snapshot_path,
|
|
692
|
+
"./state/restored-replicas",
|
|
693
|
+
encoder=HashingTextEncoder(vector_dim=64),
|
|
694
|
+
)
|
|
695
|
+
```
|
|
696
|
+
|
|
697
|
+
The replicated snapshot writes one SQLite backup per replica plus
|
|
698
|
+
`manifest.json` with SHA-256 checksums, replica metadata, quorum settings, and
|
|
699
|
+
node definitions. Restore refuses to overwrite a non-empty root unless
|
|
700
|
+
`overwrite=True` is passed.
|
|
701
|
+
|
|
671
702
|
## HTTP API
|
|
672
703
|
|
|
673
704
|
Run the local FastAPI server:
|
|
@@ -1041,7 +1072,7 @@ Current read:
|
|
|
1041
1072
|
| LongMemEval 50-query smoke | On the first 50 non-abstention LongMemEval-S questions, WaveMind reaches `evidence_recall@5 0.920`, `precision@1 0.760`, and `MRR@5 0.827`; Chroma/Qdrant static reach `0.600`, `0.260`, and `0.385`. | This is the fast regression profile for checking current changes before rerunning the full LongMemEval profile. WaveMind wins on quality; latency still needs work. |
|
|
1042
1073
|
| ANN/index curve | At 50000 generated 128-d vectors, NumPy exact keeps `recall@10 1.000` at `6.49 ms`; quantized int8 keeps `0.934` at `24.92 ms`; Annoy is faster at `4.92 ms` but drops to `0.730` recall; Qdrant local keeps `1.000` recall at `43.49 ms`. | Current local scale boundary is clear: quantized search needs kernel work, Annoy needs tuning/FAISS, and Qdrant should be tested in service mode for a fair production comparison. |
|
|
1043
1074
|
| Production load | At 100000 generated 128-d vectors, service-mode Qdrant reaches `recall@10 1.000`, avg `10.28 ms`, p99 `21.26 ms`. At 1M, tuned Qdrant reaches `recall@10 0.984`, avg `116.80 ms`, p99 `209.28 ms`; an EF sweep finds `recall@10 0.977`, avg `64.76 ms`, p99 `103.77 ms` at `hnsw_ef=2048` on 30 queries. | 100k is production-grade on the tested machine. 1M recall is now strong, but p99 still needs tuning before claiming a stable sub-100 ms SLO. |
|
|
1044
|
-
| Scale readiness | Deterministic 1M-memory simulation validates 4096 namespace placements over 4 nodes with replication factor 2, node-loss availability `1.000`, zone-loss availability `1.000`, hot-cache hit rate `0.920`, quorum-replicated runtime recall after node loss, missing-record repair, tombstone repair, and structured payload precision@1 `1.000`. | This proves routing, cache, payload,
|
|
1075
|
+
| Scale readiness | Deterministic 1M-memory simulation validates 4096 namespace placements over 4 nodes with replication factor 2, node-loss availability `1.000`, zone-loss availability `1.000`, hot-cache hit rate `0.920`, quorum-replicated runtime recall after node loss, missing-record repair, tombstone repair, active-active delta sync, checksummed replicated snapshot/restore, and structured payload precision@1 `1.000`. | This proves routing, cache, payload, replicated-runtime, namespace-delta, and restore-drill foundations. It is not a 10M-vector latency claim; real 10M latency still needs service-backed load tests on larger hardware. |
|
|
1045
1076
|
| Memory competitor adapters | WaveMind reaches `precision@1 0.80`, `precision@3 1.00`, stale suppression `1.00` on the small adapter profile. Mem0, Zep, and LangGraph are listed as skipped unless their real packages/services are configured. | This prevents fake competitor claims. The adapter harness is ready; real Mem0/Zep/LangGraph results still need configured installs. |
|
|
1046
1077
|
| LongMemEval local answer generation | With the same local Ollama `qwen2.5:1.5b`, WaveMind reaches `exact_match 0.240`, `contains_answer 0.380`, `token_f1 0.333`, and `evidence_recall@5 0.920`; Chroma and Qdrant static both reach `0.120`, `0.160`, `0.170`, and `0.600`. | This is the first checked-in end-to-end answer benchmark against Chroma/Qdrant. It is still a 50-question lightweight smoke run, not a full LongMemEval leaderboard score. |
|
|
1047
1078
|
|
|
@@ -1060,7 +1091,7 @@ Current read:
|
|
|
1060
1091
|
| Production index profile | Docker-backed 50000-vector profile for persisted FAISS, Qdrant service, and PostgreSQL/pgvector HNSW. | implemented | FAISS / Qdrant service / pgvector | Keep service-mode candidate generation above `0.95` recall@10 and below 10 ms average query latency at 50000 vectors. |
|
|
1061
1092
|
| Production load profile | 100k and 1M service-backed candidate-index checks with p95/p99 latency. | implemented | Qdrant service / pgvector HNSW / FAISS persisted | Keep 100k at recall@10 `1.000`; push 1M p99 below 100 ms with recall@10 >= 0.95. |
|
|
1062
1093
|
| Qdrant 1M HNSW ef sweep | One 1M Qdrant collection queried with multiple `hnsw_ef` values. | implemented | Qdrant service | Repeat with 100+ queries and collection-level HNSW build parameters before claiming a stable 1M SLO. |
|
|
1063
|
-
| Scale readiness profile | Cluster placement, node/zone-loss simulation, quorum report, replicated runtime, hot-cache behavior, and structured/multimodal payload retrieval. | implemented | Mem0 / Zep / LangGraph persistent memory / GraphRAG target adapters | Keep quorum replication
|
|
1094
|
+
| Scale readiness profile | Cluster placement, node/zone-loss simulation, quorum report, replicated runtime, active-active delta sync, replicated snapshot/restore, hot-cache behavior, and structured/multimodal payload retrieval. | implemented | Mem0 / Zep / LangGraph persistent memory / GraphRAG target adapters | Keep quorum replication, namespace-delta sync, repair, and restore drills green while adding larger service-backed 10M load tests. |
|
|
1064
1095
|
| Memory competitor adapter profile | Dynamic-memory scenario wired for external memory frameworks. | implemented | Mem0 / Zep / LangGraph persistent memory | Report real competitor results only when their packages/services are explicitly configured. |
|
|
1065
1096
|
| [BEIR](https://github.com/beir-cellar/beir) | Standard zero-shot information retrieval quality. | planned | Chroma / Qdrant / FAISS | Stay within 0.02 `nDCG@10` on identical embeddings. |
|
|
1066
1097
|
| [MTEB Retrieval](https://github.com/embeddings-benchmark/mteb) | Separates encoder quality from retrieval-store quality. | planned | Chroma / Qdrant / FAISS | Prove WaveMind does not reduce same-embedding retrieval quality. |
|
|
@@ -1220,6 +1251,36 @@ production foundation for namespace-level HA and eventual-consistency behavior;
|
|
|
1220
1251
|
for full consensus across independent network services, deploy WaveMind with
|
|
1221
1252
|
Postgres/Qdrant/ops-layer replication.
|
|
1222
1253
|
|
|
1254
|
+
For multi-region active-active experiments, export and import namespace deltas:
|
|
1255
|
+
|
|
1256
|
+
```python
|
|
1257
|
+
region_a.remember("Tenant A billing preference.", namespace="tenant:a")
|
|
1258
|
+
delta = region_a.export_namespace_delta("tenant:a")
|
|
1259
|
+
region_b.import_namespace_delta(delta)
|
|
1260
|
+
|
|
1261
|
+
region_a.forget(text="Tenant A billing preference.", namespace="tenant:a")
|
|
1262
|
+
region_b.import_namespace_delta(region_a.export_namespace_delta("tenant:a"))
|
|
1263
|
+
```
|
|
1264
|
+
|
|
1265
|
+
The delta contains active records plus tombstones. Import is idempotent and
|
|
1266
|
+
tombstone-aware, so a stale region export cannot resurrect a deleted memory.
|
|
1267
|
+
|
|
1268
|
+
For operational recovery, `snapshot()` creates a checksummed replicated snapshot
|
|
1269
|
+
and `restore_snapshot()` restores it into a fresh replica root:
|
|
1270
|
+
|
|
1271
|
+
```python
|
|
1272
|
+
snapshot = memory.snapshot("./backups/replicated")
|
|
1273
|
+
health = ReplicatedWaveMind.verify_snapshot(snapshot.snapshot_path)
|
|
1274
|
+
restored, report = ReplicatedWaveMind.restore_snapshot(
|
|
1275
|
+
snapshot.snapshot_path,
|
|
1276
|
+
"./state/restored-replicas",
|
|
1277
|
+
)
|
|
1278
|
+
```
|
|
1279
|
+
|
|
1280
|
+
The checked-in scale-readiness profile verifies manifest checksums, restores
|
|
1281
|
+
three replica files, then disables the restored primary and confirms the memory
|
|
1282
|
+
is still recalled from the remaining replicas.
|
|
1283
|
+
|
|
1223
1284
|
Checked-in official LoCoMo retrieval result:
|
|
1224
1285
|
|
|
1225
1286
|
10 conversations, 5882 memory turns, 1977 evidence-labeled questions,
|
|
@@ -1568,8 +1629,9 @@ If you already use Chroma for local memory, see the practical migration guide:
|
|
|
1568
1629
|
- The dynamic benchmark currently compares WaveMind against a static Chroma baseline. Chroma and Qdrant can implement similar behavior with extra application-layer metadata policy, deletes, filters, and reinforcement logic.
|
|
1569
1630
|
- `MemoryFieldGraph` is a discrete graph over stored memories, not a continuous mathematical field. Its current build path should be optimized with incremental edge updates before large production use.
|
|
1570
1631
|
- pgvector is a candidate-index backend. PostgreSQL source-of-truth storage is
|
|
1571
|
-
also available separately, but migrations, PITR docs,
|
|
1572
|
-
profiles still need more real deployment
|
|
1632
|
+
also available separately, but migrations, PITR docs, scheduled backup
|
|
1633
|
+
runbooks, and service benchmark profiles still need more real deployment
|
|
1634
|
+
coverage.
|
|
1573
1635
|
- The Qdrant backend is also a candidate-index backend. WaveMind rebuilds it
|
|
1574
1636
|
from SQLite on load/build, so large service-mode deployments still need a
|
|
1575
1637
|
measured rebuild strategy and index-health monitoring.
|
|
@@ -1608,8 +1670,8 @@ Near-term priorities:
|
|
|
1608
1670
|
- Faster dynamic re-ranking through smaller candidate windows, caching, and
|
|
1609
1671
|
background updates.
|
|
1610
1672
|
- Better production operations: OpenTelemetry is optional and implemented;
|
|
1611
|
-
richer latency histograms, index-health metrics, alerting examples,
|
|
1612
|
-
|
|
1673
|
+
richer latency histograms, index-health metrics, alerting examples, scheduled
|
|
1674
|
+
offsite snapshots, and Postgres PITR runbooks are next.
|
|
1613
1675
|
|
|
1614
1676
|
Longer-term direction:
|
|
1615
1677
|
|
|
@@ -489,14 +489,17 @@ Checked-in result:
|
|
|
489
489
|
| profile | result |
|
|
490
490
|
|---|---:|
|
|
491
491
|
| Cluster planner | 4096 namespaces, 4 nodes, replication factor 2, node-loss availability `1.000`, zone-loss availability `1.000`, write quorum `2`. |
|
|
492
|
-
| Hot cache | 2000 lookups, hit rate `0.920`, p99 lookup `0.
|
|
493
|
-
| Replicated runtime | 3 physical WaveMind stores, replication factor 3, write quorum 2, node-loss recall `true`, repair copied `1` missing record, tombstone repair deleted `1` stale record, p99 query-after-loss `1.
|
|
494
|
-
|
|
|
492
|
+
| Hot cache | 2000 lookups, hit rate `0.920`, p99 lookup `0.003 ms`. |
|
|
493
|
+
| Replicated runtime | 3 physical WaveMind stores, replication factor 3, write quorum 2, node-loss recall `true`, repair copied `1` missing record, tombstone repair deleted `1` stale record, p99 query-after-loss `1.29 ms`. |
|
|
494
|
+
| Active-active delta sync | 2 regions, bidirectional convergence `true`, stale import suppressed after delete `true`, tombstone convergence `true`, sync `112.50 ms`. |
|
|
495
|
+
| Replicated snapshot | 3 replica files, manifest checksum validation `true`, restore `11.42 ms`, recall after restored-primary loss `true`. |
|
|
496
|
+
| Structured payloads | image/audio/table/event retrieval, precision@1 `1.000`, p99 `0.67 ms`. |
|
|
495
497
|
|
|
496
498
|
This profile validates routing, quorum-replicated runtime behavior, cache
|
|
497
|
-
behavior,
|
|
498
|
-
|
|
499
|
-
|
|
499
|
+
behavior, active-active namespace delta sync, replicated snapshot/restore, and
|
|
500
|
+
structured payload handling. It is not a 10M-vector load test. Real 100k, 1M,
|
|
501
|
+
and 10M latency claims should come from service-backed FAISS/Qdrant/pgvector
|
|
502
|
+
load tests on production-like hardware.
|
|
500
503
|
|
|
501
504
|
Cluster placement planning:
|
|
502
505
|
|
|
@@ -615,6 +618,34 @@ For Postgres storage, use database-native backup tooling such as `pg_dump`,
|
|
|
615
618
|
managed snapshots, or point-in-time recovery instead of WaveMind's SQLite file
|
|
616
619
|
backup command.
|
|
617
620
|
|
|
621
|
+
Replicated runtime snapshot/restore:
|
|
622
|
+
|
|
623
|
+
```python
|
|
624
|
+
from wavemind import HashingTextEncoder, ReplicatedWaveMind
|
|
625
|
+
|
|
626
|
+
memory = ReplicatedWaveMind(
|
|
627
|
+
root_path="./state/replicas",
|
|
628
|
+
nodes=["node-a", "node-b", "node-c"],
|
|
629
|
+
replication_factor=3,
|
|
630
|
+
encoder=HashingTextEncoder(vector_dim=64),
|
|
631
|
+
)
|
|
632
|
+
memory.remember("Tenant A prefers short support replies.", namespace="tenant:a")
|
|
633
|
+
|
|
634
|
+
snapshot = memory.snapshot("./backups/replicated")
|
|
635
|
+
assert ReplicatedWaveMind.verify_snapshot(snapshot.snapshot_path)["healthy"]
|
|
636
|
+
|
|
637
|
+
restored, report = ReplicatedWaveMind.restore_snapshot(
|
|
638
|
+
snapshot.snapshot_path,
|
|
639
|
+
"./state/restored-replicas",
|
|
640
|
+
encoder=HashingTextEncoder(vector_dim=64),
|
|
641
|
+
)
|
|
642
|
+
```
|
|
643
|
+
|
|
644
|
+
The replicated snapshot writes one SQLite backup per replica plus
|
|
645
|
+
`manifest.json` with SHA-256 checksums, replica metadata, quorum settings, and
|
|
646
|
+
node definitions. Restore refuses to overwrite a non-empty root unless
|
|
647
|
+
`overwrite=True` is passed.
|
|
648
|
+
|
|
618
649
|
## HTTP API
|
|
619
650
|
|
|
620
651
|
Run the local FastAPI server:
|
|
@@ -988,7 +1019,7 @@ Current read:
|
|
|
988
1019
|
| LongMemEval 50-query smoke | On the first 50 non-abstention LongMemEval-S questions, WaveMind reaches `evidence_recall@5 0.920`, `precision@1 0.760`, and `MRR@5 0.827`; Chroma/Qdrant static reach `0.600`, `0.260`, and `0.385`. | This is the fast regression profile for checking current changes before rerunning the full LongMemEval profile. WaveMind wins on quality; latency still needs work. |
|
|
989
1020
|
| ANN/index curve | At 50000 generated 128-d vectors, NumPy exact keeps `recall@10 1.000` at `6.49 ms`; quantized int8 keeps `0.934` at `24.92 ms`; Annoy is faster at `4.92 ms` but drops to `0.730` recall; Qdrant local keeps `1.000` recall at `43.49 ms`. | Current local scale boundary is clear: quantized search needs kernel work, Annoy needs tuning/FAISS, and Qdrant should be tested in service mode for a fair production comparison. |
|
|
990
1021
|
| Production load | At 100000 generated 128-d vectors, service-mode Qdrant reaches `recall@10 1.000`, avg `10.28 ms`, p99 `21.26 ms`. At 1M, tuned Qdrant reaches `recall@10 0.984`, avg `116.80 ms`, p99 `209.28 ms`; an EF sweep finds `recall@10 0.977`, avg `64.76 ms`, p99 `103.77 ms` at `hnsw_ef=2048` on 30 queries. | 100k is production-grade on the tested machine. 1M recall is now strong, but p99 still needs tuning before claiming a stable sub-100 ms SLO. |
|
|
991
|
-
| Scale readiness | Deterministic 1M-memory simulation validates 4096 namespace placements over 4 nodes with replication factor 2, node-loss availability `1.000`, zone-loss availability `1.000`, hot-cache hit rate `0.920`, quorum-replicated runtime recall after node loss, missing-record repair, tombstone repair, and structured payload precision@1 `1.000`. | This proves routing, cache, payload,
|
|
1022
|
+
| Scale readiness | Deterministic 1M-memory simulation validates 4096 namespace placements over 4 nodes with replication factor 2, node-loss availability `1.000`, zone-loss availability `1.000`, hot-cache hit rate `0.920`, quorum-replicated runtime recall after node loss, missing-record repair, tombstone repair, active-active delta sync, checksummed replicated snapshot/restore, and structured payload precision@1 `1.000`. | This proves routing, cache, payload, replicated-runtime, namespace-delta, and restore-drill foundations. It is not a 10M-vector latency claim; real 10M latency still needs service-backed load tests on larger hardware. |
|
|
992
1023
|
| Memory competitor adapters | WaveMind reaches `precision@1 0.80`, `precision@3 1.00`, stale suppression `1.00` on the small adapter profile. Mem0, Zep, and LangGraph are listed as skipped unless their real packages/services are configured. | This prevents fake competitor claims. The adapter harness is ready; real Mem0/Zep/LangGraph results still need configured installs. |
|
|
993
1024
|
| LongMemEval local answer generation | With the same local Ollama `qwen2.5:1.5b`, WaveMind reaches `exact_match 0.240`, `contains_answer 0.380`, `token_f1 0.333`, and `evidence_recall@5 0.920`; Chroma and Qdrant static both reach `0.120`, `0.160`, `0.170`, and `0.600`. | This is the first checked-in end-to-end answer benchmark against Chroma/Qdrant. It is still a 50-question lightweight smoke run, not a full LongMemEval leaderboard score. |
|
|
994
1025
|
|
|
@@ -1007,7 +1038,7 @@ Current read:
|
|
|
1007
1038
|
| Production index profile | Docker-backed 50000-vector profile for persisted FAISS, Qdrant service, and PostgreSQL/pgvector HNSW. | implemented | FAISS / Qdrant service / pgvector | Keep service-mode candidate generation above `0.95` recall@10 and below 10 ms average query latency at 50000 vectors. |
|
|
1008
1039
|
| Production load profile | 100k and 1M service-backed candidate-index checks with p95/p99 latency. | implemented | Qdrant service / pgvector HNSW / FAISS persisted | Keep 100k at recall@10 `1.000`; push 1M p99 below 100 ms with recall@10 >= 0.95. |
|
|
1009
1040
|
| Qdrant 1M HNSW ef sweep | One 1M Qdrant collection queried with multiple `hnsw_ef` values. | implemented | Qdrant service | Repeat with 100+ queries and collection-level HNSW build parameters before claiming a stable 1M SLO. |
|
|
1010
|
-
| Scale readiness profile | Cluster placement, node/zone-loss simulation, quorum report, replicated runtime, hot-cache behavior, and structured/multimodal payload retrieval. | implemented | Mem0 / Zep / LangGraph persistent memory / GraphRAG target adapters | Keep quorum replication
|
|
1041
|
+
| Scale readiness profile | Cluster placement, node/zone-loss simulation, quorum report, replicated runtime, active-active delta sync, replicated snapshot/restore, hot-cache behavior, and structured/multimodal payload retrieval. | implemented | Mem0 / Zep / LangGraph persistent memory / GraphRAG target adapters | Keep quorum replication, namespace-delta sync, repair, and restore drills green while adding larger service-backed 10M load tests. |
|
|
1011
1042
|
| Memory competitor adapter profile | Dynamic-memory scenario wired for external memory frameworks. | implemented | Mem0 / Zep / LangGraph persistent memory | Report real competitor results only when their packages/services are explicitly configured. |
|
|
1012
1043
|
| [BEIR](https://github.com/beir-cellar/beir) | Standard zero-shot information retrieval quality. | planned | Chroma / Qdrant / FAISS | Stay within 0.02 `nDCG@10` on identical embeddings. |
|
|
1013
1044
|
| [MTEB Retrieval](https://github.com/embeddings-benchmark/mteb) | Separates encoder quality from retrieval-store quality. | planned | Chroma / Qdrant / FAISS | Prove WaveMind does not reduce same-embedding retrieval quality. |
|
|
@@ -1167,6 +1198,36 @@ production foundation for namespace-level HA and eventual-consistency behavior;
|
|
|
1167
1198
|
for full consensus across independent network services, deploy WaveMind with
|
|
1168
1199
|
Postgres/Qdrant/ops-layer replication.
|
|
1169
1200
|
|
|
1201
|
+
For multi-region active-active experiments, export and import namespace deltas:
|
|
1202
|
+
|
|
1203
|
+
```python
|
|
1204
|
+
region_a.remember("Tenant A billing preference.", namespace="tenant:a")
|
|
1205
|
+
delta = region_a.export_namespace_delta("tenant:a")
|
|
1206
|
+
region_b.import_namespace_delta(delta)
|
|
1207
|
+
|
|
1208
|
+
region_a.forget(text="Tenant A billing preference.", namespace="tenant:a")
|
|
1209
|
+
region_b.import_namespace_delta(region_a.export_namespace_delta("tenant:a"))
|
|
1210
|
+
```
|
|
1211
|
+
|
|
1212
|
+
The delta contains active records plus tombstones. Import is idempotent and
|
|
1213
|
+
tombstone-aware, so a stale region export cannot resurrect a deleted memory.
|
|
1214
|
+
|
|
1215
|
+
For operational recovery, `snapshot()` creates a checksummed replicated snapshot
|
|
1216
|
+
and `restore_snapshot()` restores it into a fresh replica root:
|
|
1217
|
+
|
|
1218
|
+
```python
|
|
1219
|
+
snapshot = memory.snapshot("./backups/replicated")
|
|
1220
|
+
health = ReplicatedWaveMind.verify_snapshot(snapshot.snapshot_path)
|
|
1221
|
+
restored, report = ReplicatedWaveMind.restore_snapshot(
|
|
1222
|
+
snapshot.snapshot_path,
|
|
1223
|
+
"./state/restored-replicas",
|
|
1224
|
+
)
|
|
1225
|
+
```
|
|
1226
|
+
|
|
1227
|
+
The checked-in scale-readiness profile verifies manifest checksums, restores
|
|
1228
|
+
three replica files, then disables the restored primary and confirms the memory
|
|
1229
|
+
is still recalled from the remaining replicas.
|
|
1230
|
+
|
|
1170
1231
|
Checked-in official LoCoMo retrieval result:
|
|
1171
1232
|
|
|
1172
1233
|
10 conversations, 5882 memory turns, 1977 evidence-labeled questions,
|
|
@@ -1515,8 +1576,9 @@ If you already use Chroma for local memory, see the practical migration guide:
|
|
|
1515
1576
|
- The dynamic benchmark currently compares WaveMind against a static Chroma baseline. Chroma and Qdrant can implement similar behavior with extra application-layer metadata policy, deletes, filters, and reinforcement logic.
|
|
1516
1577
|
- `MemoryFieldGraph` is a discrete graph over stored memories, not a continuous mathematical field. Its current build path should be optimized with incremental edge updates before large production use.
|
|
1517
1578
|
- pgvector is a candidate-index backend. PostgreSQL source-of-truth storage is
|
|
1518
|
-
also available separately, but migrations, PITR docs,
|
|
1519
|
-
profiles still need more real deployment
|
|
1579
|
+
also available separately, but migrations, PITR docs, scheduled backup
|
|
1580
|
+
runbooks, and service benchmark profiles still need more real deployment
|
|
1581
|
+
coverage.
|
|
1520
1582
|
- The Qdrant backend is also a candidate-index backend. WaveMind rebuilds it
|
|
1521
1583
|
from SQLite on load/build, so large service-mode deployments still need a
|
|
1522
1584
|
measured rebuild strategy and index-health monitoring.
|
|
@@ -1555,8 +1617,8 @@ Near-term priorities:
|
|
|
1555
1617
|
- Faster dynamic re-ranking through smaller candidate windows, caching, and
|
|
1556
1618
|
background updates.
|
|
1557
1619
|
- Better production operations: OpenTelemetry is optional and implemented;
|
|
1558
|
-
richer latency histograms, index-health metrics, alerting examples,
|
|
1559
|
-
|
|
1620
|
+
richer latency histograms, index-health metrics, alerting examples, scheduled
|
|
1621
|
+
offsite snapshots, and Postgres PITR runbooks are next.
|
|
1560
1622
|
|
|
1561
1623
|
Longer-term direction:
|
|
1562
1624
|
|
|
@@ -21,7 +21,7 @@ This is a compact reader-facing view of checked-in benchmark results. It is not
|
|
|
21
21
|
| Production load profile 100k | production-scale | Recall@k | WaveMind pgvector: 0.736 / 17.8 ms | Qdrant service: 1 / 10.3 ms | Baseline leads on quality |
|
|
22
22
|
| Production load profile 1M | production-scale | Recall@k | - | Qdrant service: 0.984 / 116.8 ms | No WaveMind result |
|
|
23
23
|
| Qdrant 1M HNSW ef sweep | production-scale | Recall@k | - | hnsw_ef=2048: 0.977 / 64.8 ms | No WaveMind result |
|
|
24
|
-
| Scale readiness profile | production-scale | precision@1 | WaveMind structured payloads: 1 / 0.
|
|
24
|
+
| Scale readiness profile | production-scale | precision@1 | WaveMind structured payloads: 1 / 0.448 ms | - | WaveMind-only check |
|
|
25
25
|
| Memory competitor adapter profile | agent-memory | precision@1 | WaveMind: 0.8 / 0.554 ms | - | WaveMind-only check |
|
|
26
26
|
| [LongMemEval answer generation](https://github.com/xiaowu0162/LongMemEval) | long-term-agent-memory | token F1 | WaveMind + qwen2.5:1.5b: 0.333 / - | Chroma static + qwen2.5:1.5b: 0.17 / - | WaveMind leads on quality |
|
|
27
27
|
|
|
@@ -24,7 +24,7 @@ Planned rows are not claimed wins. They are the public proof path WaveMind must
|
|
|
24
24
|
| Production load profile 100k | production-scale | implemented | Qdrant service: Recall@k 1.00, avg latency 10.3, p95 latency 19.0, p99 latency ms 21.3, build ms 27439.3<br>WaveMind pgvector: Recall@k 0.74, avg latency 17.8, p95 latency 23.5, build ms 455703.7<br>WaveMind faiss-persisted: skipped - Set WAVEMIND_FAISS_PATH to use the persisted FAISS backend | Tune pgvector HNSW build/search parameters and add persisted FAISS from the Linux benchmark container. |
|
|
25
25
|
| Production load profile 1M | production-scale | implemented | Qdrant service: Recall@k 0.98, avg latency 116.8, p95 latency 153.8, p99 latency ms 209.3, build ms 450674.6 | Tune Qdrant indexing/search params further, then add FAISS IVF/HNSW and pgvector 1M profiles on a larger disk. |
|
|
26
26
|
| Qdrant 1M HNSW ef sweep | production-scale | implemented | hnsw_ef=512: Recall@k 0.75, avg latency 47.2, p95 latency 68.5, p99 latency ms 68.5, max latency ms 68.5<br>hnsw_ef=768: Recall@k 0.85, avg latency 44.0, p95 latency 69.1, p99 latency ms 69.8, max latency ms 69.8<br>hnsw_ef=1024: Recall@k 0.88, avg latency 62.9, p95 latency 81.1, p99 latency ms 85.5, max latency ms 85.5<br>hnsw_ef=1536: Recall@k 0.94, avg latency 65.6, p95 latency 111.2, p99 latency ms 119.7, max latency ms 119.7<br>hnsw_ef=2048: Recall@k 0.98, avg latency 64.8, p95 latency 91.2, p99 latency ms 103.8, max latency ms 103.8 | Repeat with 100+ queries and collection-level HNSW build parameters before claiming a stable production SLO. |
|
|
27
|
-
| Scale readiness profile | production-scale | implemented | WaveMind cluster planner: simulated memories 1000000, namespaces 4096, nodes 4, replication factor 2, node loss min availability 1.00, zone loss min availability 1.00, read quorum 1, write quorum 2, placement ms
|
|
27
|
+
| Scale readiness profile | production-scale | implemented | WaveMind cluster planner: simulated memories 1000000, namespaces 4096, nodes 4, replication factor 2, node loss min availability 1.00, zone loss min availability 1.00, read quorum 1, write quorum 2, placement ms 59.2<br>WaveMind hot cache: queries 2000, capacity 512, hit rate 0.92, evictions 0, p99 lookup ms 0.00<br>WaveMind replicated runtime: nodes 3, replication factor 3, write quorum 2, read quorum 1, recalled after node loss True, repair copied records 1, tombstone repair deleted records 1, p99 query after loss ms 1.29<br>WaveMind active-active delta sync: regions 2, replication factor per region 3, records imported 6, converged after bidirectional sync True, suppressed stale import after delete True, tombstone converged True, sync ms 112.5<br>WaveMind replicated snapshot: nodes 3, manifest healthy True, restored files 3, recalled after restore node loss True, snapshot ms 49.2, restore ms 11.4<br>WaveMind structured payloads: queries 4, precision@1 1.00, avg latency 0.45, p99 latency ms 0.67 | Move from local replicated runtime to service-backed replicated runs, scheduled/offsite snapshots, and larger 10M candidate-index load tests. |
|
|
28
28
|
| Memory competitor adapter profile | agent-memory | implemented | WaveMind: precision@1 0.80, precision@3 1.00, stale suppression 1.00, avg latency 0.55, p95 latency 0.83<br>Mem0: skipped - Install Mem0 to run this adapter profile: pip install "mem0ai"<br>Zep: skipped - Install the Zep client package and set ZEP_API_KEY or ZEP_API_URL.<br>LangGraph persistent memory: skipped - Install LangGraph to run this adapter profile: pip install "langgraph" | Add documented setup commands for each competitor adapter and store checked-in results only when those real adapters run. |
|
|
29
29
|
| [LongMemEval answer generation](https://github.com/xiaowu0162/LongMemEval) | long-term-agent-memory | implemented | extractive smoke: queries 20, evidence recall@k 1.00, exact match 0.00, contains answer 0.05, token f1 0.02, avg retrieval ms 3.79, avg generation ms 0.77<br>WaveMind + qwen2.5:0.5b: queries 50, evidence recall@k 0.92, exact match 0.12, contains answer 0.18, token f1 0.18, avg retrieval ms 2.98, avg generation ms 1428.2<br>Chroma static + qwen2.5:0.5b: queries 50, evidence recall@k 0.60, exact match 0.10, contains answer 0.12, token f1 0.13, avg retrieval ms 4.10, avg generation ms 1234.7<br>Qdrant static + qwen2.5:0.5b: queries 50, evidence recall@k 0.60, exact match 0.10, contains answer 0.12, token f1 0.13, avg retrieval ms 63.8, avg generation ms 893.5<br>WaveMind + qwen2.5:1.5b: queries 50, evidence recall@k 0.92, exact match 0.24, contains answer 0.38, token f1 0.33, avg retrieval ms 2.00, avg generation ms 2153.0<br>Chroma static + qwen2.5:1.5b: queries 50, evidence recall@k 0.60, exact match 0.12, contains answer 0.16, token f1 0.17, avg retrieval ms 7.05, avg generation ms 2082.4<br>Qdrant static + qwen2.5:1.5b: queries 50, evidence recall@k 0.60, exact match 0.12, contains answer 0.16, token f1 0.17, avg retrieval ms 100.2, avg generation ms 758.1 | Run all 470 non-abstention questions with a stronger local/API model and add faithfulness/abstention scoring. |
|
|
30
30
|
|
|
@@ -741,7 +741,7 @@
|
|
|
741
741
|
"category": "production-scale",
|
|
742
742
|
"status": "implemented",
|
|
743
743
|
"source": "benchmarks/scale_readiness_benchmark.py",
|
|
744
|
-
"dataset": "Deterministic 1M-memory simulation for namespace placement
|
|
744
|
+
"dataset": "Deterministic 1M-memory simulation for namespace placement, quorum runtime, active-active delta sync, replicated snapshot/restore, hot-cache, and structured-payload retrieval checks.",
|
|
745
745
|
"competitors": [
|
|
746
746
|
"Mem0",
|
|
747
747
|
"Zep",
|
|
@@ -764,24 +764,51 @@
|
|
|
764
764
|
"zone_loss_min_availability": 1.0,
|
|
765
765
|
"read_quorum": 1,
|
|
766
766
|
"write_quorum": 2,
|
|
767
|
-
"placement_ms":
|
|
767
|
+
"placement_ms": 59.205800003837794
|
|
768
768
|
},
|
|
769
769
|
"WaveMind hot cache": {
|
|
770
770
|
"queries": 2000,
|
|
771
771
|
"capacity": 512,
|
|
772
772
|
"hit_rate": 0.92,
|
|
773
773
|
"evictions": 0,
|
|
774
|
-
"p99_lookup_ms": 0.
|
|
774
|
+
"p99_lookup_ms": 0.002700020559132099
|
|
775
|
+
},
|
|
776
|
+
"WaveMind replicated runtime": {
|
|
777
|
+
"nodes": 3,
|
|
778
|
+
"replication_factor": 3,
|
|
779
|
+
"write_quorum": 2,
|
|
780
|
+
"read_quorum": 1,
|
|
781
|
+
"recalled_after_node_loss": true,
|
|
782
|
+
"repair_copied_records": 1,
|
|
783
|
+
"tombstone_repair_deleted_records": 1,
|
|
784
|
+
"p99_query_after_loss_ms": 1.2885000323876739
|
|
785
|
+
},
|
|
786
|
+
"WaveMind active-active delta sync": {
|
|
787
|
+
"regions": 2,
|
|
788
|
+
"replication_factor_per_region": 3,
|
|
789
|
+
"records_imported": 6,
|
|
790
|
+
"converged_after_bidirectional_sync": true,
|
|
791
|
+
"suppressed_stale_import_after_delete": true,
|
|
792
|
+
"tombstone_converged": true,
|
|
793
|
+
"sync_ms": 112.49550001230091
|
|
794
|
+
},
|
|
795
|
+
"WaveMind replicated snapshot": {
|
|
796
|
+
"nodes": 3,
|
|
797
|
+
"manifest_healthy": true,
|
|
798
|
+
"restored_files": 3,
|
|
799
|
+
"recalled_after_restore_node_loss": true,
|
|
800
|
+
"snapshot_ms": 49.16400002548471,
|
|
801
|
+
"restore_ms": 11.4187000435777
|
|
775
802
|
},
|
|
776
803
|
"WaveMind structured payloads": {
|
|
777
804
|
"queries": 4,
|
|
778
805
|
"precision_at_1": 1.0,
|
|
779
|
-
"avg_latency_ms": 0.
|
|
780
|
-
"p99_latency_ms":
|
|
806
|
+
"avg_latency_ms": 0.44750000233761966,
|
|
807
|
+
"p99_latency_ms": 0.6680000224150717
|
|
781
808
|
}
|
|
782
809
|
},
|
|
783
|
-
"target": "Prove the production foundation before heavier 100k, 1M, and 10M vector load tests: deterministic placement, survivable replicas, hot-cache behavior, and structured payload recall.",
|
|
784
|
-
"next_step": "Move from
|
|
810
|
+
"target": "Prove the production foundation before heavier 100k, 1M, and 10M vector load tests: deterministic placement, survivable replicas, active-active sync, restore drills, hot-cache behavior, and structured payload recall.",
|
|
811
|
+
"next_step": "Move from local replicated runtime to service-backed replicated runs, scheduled/offsite snapshots, and larger 10M candidate-index load tests."
|
|
785
812
|
},
|
|
786
813
|
{
|
|
787
814
|
"id": "memory_competitor_adapter_profile",
|
|
@@ -702,7 +702,7 @@ def _implemented_entries(root: Path) -> list[dict[str, Any]]:
|
|
|
702
702
|
"category": "production-scale",
|
|
703
703
|
"status": "implemented",
|
|
704
704
|
"source": "benchmarks/scale_readiness_benchmark.py",
|
|
705
|
-
"dataset": "Deterministic 1M-memory simulation for namespace placement
|
|
705
|
+
"dataset": "Deterministic 1M-memory simulation for namespace placement, quorum runtime, active-active delta sync, replicated snapshot/restore, hot-cache, and structured-payload retrieval checks.",
|
|
706
706
|
"competitors": ["Mem0", "Zep", "LangGraph persistent memory", "GraphRAG"],
|
|
707
707
|
"metrics": [
|
|
708
708
|
"node_loss_min_availability",
|
|
@@ -735,6 +735,42 @@ def _implemented_entries(root: Path) -> list[dict[str, Any]]:
|
|
|
735
735
|
"p99_lookup_ms",
|
|
736
736
|
),
|
|
737
737
|
),
|
|
738
|
+
"WaveMind replicated runtime": _metric_summary(
|
|
739
|
+
scale_readiness_results.get("WaveMind replicated runtime"),
|
|
740
|
+
(
|
|
741
|
+
"nodes",
|
|
742
|
+
"replication_factor",
|
|
743
|
+
"write_quorum",
|
|
744
|
+
"read_quorum",
|
|
745
|
+
"recalled_after_node_loss",
|
|
746
|
+
"repair_copied_records",
|
|
747
|
+
"tombstone_repair_deleted_records",
|
|
748
|
+
"p99_query_after_loss_ms",
|
|
749
|
+
),
|
|
750
|
+
),
|
|
751
|
+
"WaveMind active-active delta sync": _metric_summary(
|
|
752
|
+
scale_readiness_results.get("WaveMind active-active delta sync"),
|
|
753
|
+
(
|
|
754
|
+
"regions",
|
|
755
|
+
"replication_factor_per_region",
|
|
756
|
+
"records_imported",
|
|
757
|
+
"converged_after_bidirectional_sync",
|
|
758
|
+
"suppressed_stale_import_after_delete",
|
|
759
|
+
"tombstone_converged",
|
|
760
|
+
"sync_ms",
|
|
761
|
+
),
|
|
762
|
+
),
|
|
763
|
+
"WaveMind replicated snapshot": _metric_summary(
|
|
764
|
+
scale_readiness_results.get("WaveMind replicated snapshot"),
|
|
765
|
+
(
|
|
766
|
+
"nodes",
|
|
767
|
+
"manifest_healthy",
|
|
768
|
+
"restored_files",
|
|
769
|
+
"recalled_after_restore_node_loss",
|
|
770
|
+
"snapshot_ms",
|
|
771
|
+
"restore_ms",
|
|
772
|
+
),
|
|
773
|
+
),
|
|
738
774
|
"WaveMind structured payloads": _metric_summary(
|
|
739
775
|
scale_readiness_results.get("WaveMind structured payloads"),
|
|
740
776
|
(
|
|
@@ -745,8 +781,8 @@ def _implemented_entries(root: Path) -> list[dict[str, Any]]:
|
|
|
745
781
|
),
|
|
746
782
|
),
|
|
747
783
|
},
|
|
748
|
-
"target": "Prove the production foundation before heavier 100k, 1M, and 10M vector load tests: deterministic placement, survivable replicas, hot-cache behavior, and structured payload recall.",
|
|
749
|
-
"next_step": "Move from
|
|
784
|
+
"target": "Prove the production foundation before heavier 100k, 1M, and 10M vector load tests: deterministic placement, survivable replicas, active-active sync, restore drills, hot-cache behavior, and structured payload recall.",
|
|
785
|
+
"next_step": "Move from local replicated runtime to service-backed replicated runs, scheduled/offsite snapshots, and larger 10M candidate-index load tests.",
|
|
750
786
|
},
|
|
751
787
|
{
|
|
752
788
|
"id": "memory_competitor_adapter_profile",
|
|
@@ -238,6 +238,138 @@ def run_replication_runtime_profile() -> dict[str, object]:
|
|
|
238
238
|
memory.close()
|
|
239
239
|
|
|
240
240
|
|
|
241
|
+
def run_active_active_delta_profile() -> dict[str, object]:
|
|
242
|
+
with tempfile.TemporaryDirectory() as directory:
|
|
243
|
+
kwargs = {
|
|
244
|
+
"replication_factor": 3,
|
|
245
|
+
"width": 16,
|
|
246
|
+
"height": 16,
|
|
247
|
+
"layers": 1,
|
|
248
|
+
"encoder": HashingTextEncoder(vector_dim=64),
|
|
249
|
+
}
|
|
250
|
+
region_a = ReplicatedWaveMind(
|
|
251
|
+
root_path=Path(directory) / "region-a",
|
|
252
|
+
nodes=[
|
|
253
|
+
{"id": "region-a-1", "address": "127.0.0.1:8101", "zone": "zone-a"},
|
|
254
|
+
{"id": "region-a-2", "address": "127.0.0.1:8102", "zone": "zone-b"},
|
|
255
|
+
{"id": "region-a-3", "address": "127.0.0.1:8103", "zone": "zone-c"},
|
|
256
|
+
],
|
|
257
|
+
**kwargs,
|
|
258
|
+
)
|
|
259
|
+
region_b = ReplicatedWaveMind(
|
|
260
|
+
root_path=Path(directory) / "region-b",
|
|
261
|
+
nodes=[
|
|
262
|
+
{"id": "region-b-1", "address": "127.0.0.1:8201", "zone": "zone-a"},
|
|
263
|
+
{"id": "region-b-2", "address": "127.0.0.1:8202", "zone": "zone-b"},
|
|
264
|
+
{"id": "region-b-3", "address": "127.0.0.1:8203", "zone": "zone-c"},
|
|
265
|
+
],
|
|
266
|
+
**kwargs,
|
|
267
|
+
)
|
|
268
|
+
try:
|
|
269
|
+
namespace = "tenant:active-active"
|
|
270
|
+
region_a.remember("region a billing preference", namespace=namespace)
|
|
271
|
+
region_b.remember("region b support preference", namespace=namespace)
|
|
272
|
+
sync_started = time.perf_counter()
|
|
273
|
+
import_b = region_b.import_namespace_delta(
|
|
274
|
+
region_a.export_namespace_delta(namespace)
|
|
275
|
+
)
|
|
276
|
+
import_a = region_a.import_namespace_delta(
|
|
277
|
+
region_b.export_namespace_delta(namespace)
|
|
278
|
+
)
|
|
279
|
+
sync_ms = (time.perf_counter() - sync_started) * 1000.0
|
|
280
|
+
converged = (
|
|
281
|
+
region_a.query("support preference", namespace=namespace, top_k=1)
|
|
282
|
+
and region_b.query("billing preference", namespace=namespace, top_k=1)
|
|
283
|
+
)
|
|
284
|
+
|
|
285
|
+
stale_delta = region_b.export_namespace_delta(namespace)
|
|
286
|
+
region_a.forget(text="region a billing preference", namespace=namespace)
|
|
287
|
+
region_a.import_namespace_delta(stale_delta)
|
|
288
|
+
suppressed_stale_import = all(
|
|
289
|
+
result.text != "region a billing preference"
|
|
290
|
+
for result in region_a.query("billing preference", namespace=namespace, top_k=3)
|
|
291
|
+
)
|
|
292
|
+
tombstone_delta = region_a.export_namespace_delta(namespace)
|
|
293
|
+
tombstone_report = region_b.import_namespace_delta(tombstone_delta)
|
|
294
|
+
tombstone_converged = all(
|
|
295
|
+
result.text != "region a billing preference"
|
|
296
|
+
for result in region_b.query("billing preference", namespace=namespace, top_k=3)
|
|
297
|
+
)
|
|
298
|
+
return {
|
|
299
|
+
"engine": "WaveMind active-active delta sync",
|
|
300
|
+
"regions": 2,
|
|
301
|
+
"replication_factor_per_region": 3,
|
|
302
|
+
"records_imported": import_a.imported_records + import_b.imported_records,
|
|
303
|
+
"converged_after_bidirectional_sync": bool(converged),
|
|
304
|
+
"sync_ms": sync_ms,
|
|
305
|
+
"suppressed_stale_import_after_delete": suppressed_stale_import,
|
|
306
|
+
"tombstone_deleted_records": tombstone_report.deleted_records,
|
|
307
|
+
"tombstone_converged": tombstone_converged,
|
|
308
|
+
}
|
|
309
|
+
finally:
|
|
310
|
+
region_a.close()
|
|
311
|
+
region_b.close()
|
|
312
|
+
|
|
313
|
+
|
|
314
|
+
def run_replicated_snapshot_profile() -> dict[str, object]:
|
|
315
|
+
with tempfile.TemporaryDirectory() as directory:
|
|
316
|
+
root = Path(directory)
|
|
317
|
+
memory = ReplicatedWaveMind(
|
|
318
|
+
root_path=root / "replicas",
|
|
319
|
+
nodes=[
|
|
320
|
+
{"id": "node-a", "address": "127.0.0.1:8101", "zone": "zone-a"},
|
|
321
|
+
{"id": "node-b", "address": "127.0.0.1:8102", "zone": "zone-b"},
|
|
322
|
+
{"id": "node-c", "address": "127.0.0.1:8103", "zone": "zone-c"},
|
|
323
|
+
],
|
|
324
|
+
replication_factor=3,
|
|
325
|
+
width=16,
|
|
326
|
+
height=16,
|
|
327
|
+
layers=1,
|
|
328
|
+
encoder=HashingTextEncoder(vector_dim=64),
|
|
329
|
+
)
|
|
330
|
+
restored = None
|
|
331
|
+
try:
|
|
332
|
+
namespace = "tenant:snapshot"
|
|
333
|
+
memory.remember(
|
|
334
|
+
"replicated snapshot restore survives node loss",
|
|
335
|
+
namespace=namespace,
|
|
336
|
+
)
|
|
337
|
+
snapshot_started = time.perf_counter()
|
|
338
|
+
snapshot = memory.snapshot(root / "snapshots")
|
|
339
|
+
snapshot_ms = (time.perf_counter() - snapshot_started) * 1000.0
|
|
340
|
+
health = ReplicatedWaveMind.verify_snapshot(snapshot.snapshot_path)
|
|
341
|
+
|
|
342
|
+
restore_started = time.perf_counter()
|
|
343
|
+
restored, restore = ReplicatedWaveMind.restore_snapshot(
|
|
344
|
+
snapshot.snapshot_path,
|
|
345
|
+
root / "restored",
|
|
346
|
+
width=16,
|
|
347
|
+
height=16,
|
|
348
|
+
layers=1,
|
|
349
|
+
encoder=HashingTextEncoder(vector_dim=64),
|
|
350
|
+
)
|
|
351
|
+
restore_ms = (time.perf_counter() - restore_started) * 1000.0
|
|
352
|
+
placement = restored.placement(namespace)
|
|
353
|
+
restored.set_node_available(placement.primary, False)
|
|
354
|
+
recalled_after_restore_loss = bool(
|
|
355
|
+
restored.query("snapshot restore node loss", namespace=namespace, top_k=1)
|
|
356
|
+
)
|
|
357
|
+
return {
|
|
358
|
+
"engine": "WaveMind replicated snapshot",
|
|
359
|
+
"nodes": len(snapshot.nodes),
|
|
360
|
+
"manifest_healthy": health["healthy"],
|
|
361
|
+
"total_bytes": snapshot.total_bytes,
|
|
362
|
+
"snapshot_ms": snapshot_ms,
|
|
363
|
+
"restore_ms": restore_ms,
|
|
364
|
+
"restored_files": len(restore.restored_files),
|
|
365
|
+
"recalled_after_restore_node_loss": recalled_after_restore_loss,
|
|
366
|
+
}
|
|
367
|
+
finally:
|
|
368
|
+
memory.close()
|
|
369
|
+
if restored is not None:
|
|
370
|
+
restored.close()
|
|
371
|
+
|
|
372
|
+
|
|
241
373
|
def run_multimodal_profile() -> dict[str, object]:
|
|
242
374
|
with tempfile.TemporaryDirectory() as directory:
|
|
243
375
|
memory = WaveMind(
|
|
@@ -325,6 +457,8 @@ def run_benchmark(
|
|
|
325
457
|
),
|
|
326
458
|
run_cache_profile(queries=cache_queries, capacity=cache_capacity),
|
|
327
459
|
run_replication_runtime_profile(),
|
|
460
|
+
run_active_active_delta_profile(),
|
|
461
|
+
run_replicated_snapshot_profile(),
|
|
328
462
|
run_multimodal_profile(),
|
|
329
463
|
]
|
|
330
464
|
return {
|
|
@@ -337,8 +471,9 @@ def run_benchmark(
|
|
|
337
471
|
"description": (
|
|
338
472
|
"Deterministic scale-readiness profile for cluster placement, "
|
|
339
473
|
"node/zone loss simulation, quorum-replicated runtime behavior, "
|
|
340
|
-
"
|
|
341
|
-
"
|
|
474
|
+
"active-active delta sync, replicated snapshot/restore, hot-cache "
|
|
475
|
+
"behavior, and structured payload retrieval. This is not a "
|
|
476
|
+
"10M-vector database load test."
|
|
342
477
|
),
|
|
343
478
|
},
|
|
344
479
|
"results": results,
|
|
@@ -379,6 +514,12 @@ def main() -> int:
|
|
|
379
514
|
print(f"| replicated runtime | recalled_after_node_loss | {result['recalled_after_node_loss']} |")
|
|
380
515
|
print(f"| replicated runtime | repair_copied_records | {result['repair_copied_records']} |")
|
|
381
516
|
print(f"| replicated runtime | tombstone_repair_deleted_records | {result['tombstone_repair_deleted_records']} |")
|
|
517
|
+
elif result["engine"] == "WaveMind active-active delta sync":
|
|
518
|
+
print(f"| active-active delta | converged | {result['converged_after_bidirectional_sync']} |")
|
|
519
|
+
print(f"| active-active delta | tombstone_converged | {result['tombstone_converged']} |")
|
|
520
|
+
elif result["engine"] == "WaveMind replicated snapshot":
|
|
521
|
+
print(f"| replicated snapshot | manifest_healthy | {result['manifest_healthy']} |")
|
|
522
|
+
print(f"| replicated snapshot | recalled_after_restore_node_loss | {result['recalled_after_restore_node_loss']} |")
|
|
382
523
|
else:
|
|
383
524
|
print(f"| structured payloads | precision@1 | {result['precision_at_1']:.3f} |")
|
|
384
525
|
print(f"\nWrote {args.output}")
|