wavemind 2.2.2__tar.gz → 2.2.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (161) hide show
  1. {wavemind-2.2.2 → wavemind-2.2.3}/PKG-INFO +39 -16
  2. {wavemind-2.2.2 → wavemind-2.2.3}/README.md +35 -15
  3. {wavemind-2.2.2 → wavemind-2.2.3}/benchmarks/BENCHMARK_LEADERBOARD.md +4 -3
  4. {wavemind-2.2.2 → wavemind-2.2.3}/benchmarks/BENCHMARK_REPORT.md +4 -3
  5. {wavemind-2.2.2 → wavemind-2.2.3}/benchmarks/ann_index_curve_benchmark.py +79 -16
  6. {wavemind-2.2.2 → wavemind-2.2.3}/benchmarks/benchmark_matrix_results.json +79 -16
  7. {wavemind-2.2.2 → wavemind-2.2.3}/benchmarks/benchmark_registry.py +53 -10
  8. {wavemind-2.2.2 → wavemind-2.2.3}/benchmarks/production_load_benchmark.py +6 -3
  9. wavemind-2.2.3/benchmarks/production_load_qdrant_100k_tuned_results.json +75 -0
  10. wavemind-2.2.3/benchmarks/production_load_qdrant_1m_ef_sweep_results.json +79 -0
  11. wavemind-2.2.3/benchmarks/production_load_qdrant_1m_tuned_results.json +75 -0
  12. {wavemind-2.2.2 → wavemind-2.2.3}/benchmarks/scale_readiness_benchmark.py +8 -2
  13. {wavemind-2.2.2 → wavemind-2.2.3}/benchmarks/scale_readiness_results.json +12 -9
  14. {wavemind-2.2.2 → wavemind-2.2.3}/docker-compose.yml +1 -1
  15. {wavemind-2.2.2 → wavemind-2.2.3}/docs/BENCHMARK_BRIEF.md +27 -17
  16. {wavemind-2.2.2 → wavemind-2.2.3}/docs/LAUNCH_KIT.md +13 -5
  17. {wavemind-2.2.2 → wavemind-2.2.3}/docs/ROADMAP.md +6 -5
  18. {wavemind-2.2.2 → wavemind-2.2.3}/pyproject.toml +5 -1
  19. {wavemind-2.2.2 → wavemind-2.2.3}/tests/test_cli_smoke.py +27 -0
  20. {wavemind-2.2.2 → wavemind-2.2.3}/tests/test_cluster.py +25 -0
  21. wavemind-2.2.3/tests/test_jobs.py +126 -0
  22. {wavemind-2.2.2 → wavemind-2.2.3}/tests/test_scale_readiness_benchmark.py +2 -0
  23. {wavemind-2.2.2 → wavemind-2.2.3}/wavemind/__init__.py +3 -2
  24. {wavemind-2.2.2 → wavemind-2.2.3}/wavemind/cli.py +22 -0
  25. {wavemind-2.2.2 → wavemind-2.2.3}/wavemind/cluster.py +76 -2
  26. {wavemind-2.2.2 → wavemind-2.2.3}/wavemind/jobs.py +158 -1
  27. {wavemind-2.2.2 → wavemind-2.2.3}/wavemind.egg-info/PKG-INFO +39 -16
  28. {wavemind-2.2.2 → wavemind-2.2.3}/wavemind.egg-info/SOURCES.txt +3 -0
  29. {wavemind-2.2.2 → wavemind-2.2.3}/wavemind.egg-info/requires.txt +4 -0
  30. wavemind-2.2.2/tests/test_jobs.py +0 -56
  31. {wavemind-2.2.2 → wavemind-2.2.3}/CONTRIBUTING.md +0 -0
  32. {wavemind-2.2.2 → wavemind-2.2.3}/Dockerfile +0 -0
  33. {wavemind-2.2.2 → wavemind-2.2.3}/LICENSE +0 -0
  34. {wavemind-2.2.2 → wavemind-2.2.3}/MANIFEST.in +0 -0
  35. {wavemind-2.2.2 → wavemind-2.2.3}/SECURITY.md +0 -0
  36. {wavemind-2.2.2 → wavemind-2.2.3}/SUPPORT.md +0 -0
  37. {wavemind-2.2.2 → wavemind-2.2.3}/benchmarks/agent_memory_benchmark.py +0 -0
  38. {wavemind-2.2.2 → wavemind-2.2.3}/benchmarks/agent_memory_results.json +0 -0
  39. {wavemind-2.2.2 → wavemind-2.2.3}/benchmarks/ann_index_curve_results.json +0 -0
  40. {wavemind-2.2.2 → wavemind-2.2.3}/benchmarks/dynamic_memory_benchmark.py +0 -0
  41. {wavemind-2.2.2 → wavemind-2.2.3}/benchmarks/dynamic_memory_results.json +0 -0
  42. {wavemind-2.2.2 → wavemind-2.2.3}/benchmarks/field_memory_dynamics_benchmark.py +0 -0
  43. {wavemind-2.2.2 → wavemind-2.2.3}/benchmarks/field_memory_dynamics_results.json +0 -0
  44. {wavemind-2.2.2 → wavemind-2.2.3}/benchmarks/locomo_evidence_results.json +0 -0
  45. {wavemind-2.2.2 → wavemind-2.2.3}/benchmarks/locomo_memory_benchmark.py +0 -0
  46. {wavemind-2.2.2 → wavemind-2.2.3}/benchmarks/locomo_sentence_evidence_results.json +0 -0
  47. {wavemind-2.2.2 → wavemind-2.2.3}/benchmarks/long_memory_evidence_benchmark.py +0 -0
  48. {wavemind-2.2.2 → wavemind-2.2.3}/benchmarks/long_memory_evidence_results.json +0 -0
  49. {wavemind-2.2.2 → wavemind-2.2.3}/benchmarks/longmemeval_answer_benchmark.py +0 -0
  50. {wavemind-2.2.2 → wavemind-2.2.3}/benchmarks/longmemeval_answer_extractive_20_results.json +0 -0
  51. {wavemind-2.2.2 → wavemind-2.2.3}/benchmarks/longmemeval_answer_qwen25_0_5b_50_results.json +0 -0
  52. {wavemind-2.2.2 → wavemind-2.2.3}/benchmarks/longmemeval_answer_qwen25_1_5b_50_results.json +0 -0
  53. {wavemind-2.2.2 → wavemind-2.2.3}/benchmarks/longmemeval_evidence_50_results.json +0 -0
  54. {wavemind-2.2.2 → wavemind-2.2.3}/benchmarks/longmemeval_evidence_results.json +0 -0
  55. {wavemind-2.2.2 → wavemind-2.2.3}/benchmarks/longmemeval_memory_benchmark.py +0 -0
  56. {wavemind-2.2.2 → wavemind-2.2.3}/benchmarks/memory_competitor_benchmark.py +0 -0
  57. {wavemind-2.2.2 → wavemind-2.2.3}/benchmarks/memory_competitor_results.json +0 -0
  58. {wavemind-2.2.2 → wavemind-2.2.3}/benchmarks/nomiracl_russian_benchmark.py +0 -0
  59. {wavemind-2.2.2 → wavemind-2.2.3}/benchmarks/nomiracl_russian_results.json +0 -0
  60. {wavemind-2.2.2 → wavemind-2.2.3}/benchmarks/open_retrieval_benchmark.py +0 -0
  61. {wavemind-2.2.2 → wavemind-2.2.3}/benchmarks/open_retrieval_scifact_results.json +0 -0
  62. {wavemind-2.2.2 → wavemind-2.2.3}/benchmarks/production_index_profile_results.json +0 -0
  63. {wavemind-2.2.2 → wavemind-2.2.3}/benchmarks/production_load_qdrant_1m_results.json +0 -0
  64. {wavemind-2.2.2 → wavemind-2.2.3}/benchmarks/production_load_results.json +0 -0
  65. {wavemind-2.2.2 → wavemind-2.2.3}/benchmarks/render_benchmark_charts.py +0 -0
  66. {wavemind-2.2.2 → wavemind-2.2.3}/benchmarks/render_benchmark_leaderboard.py +0 -0
  67. {wavemind-2.2.2 → wavemind-2.2.3}/benchmarks/render_benchmark_report.py +0 -0
  68. {wavemind-2.2.2 → wavemind-2.2.3}/benchmarks/ru_sentences_benchmark.py +0 -0
  69. {wavemind-2.2.2 → wavemind-2.2.3}/benchmarks/wavemind_capacity_results.json +0 -0
  70. {wavemind-2.2.2 → wavemind-2.2.3}/docs/CHROMA_MIGRATION.md +0 -0
  71. {wavemind-2.2.2 → wavemind-2.2.3}/docs/DEMO_SCRIPT.md +0 -0
  72. {wavemind-2.2.2 → wavemind-2.2.3}/docs/OBSERVABILITY.md +0 -0
  73. {wavemind-2.2.2 → wavemind-2.2.3}/docs/PROJECT_BOARD.md +0 -0
  74. {wavemind-2.2.2 → wavemind-2.2.3}/docs/RELEASE.md +0 -0
  75. {wavemind-2.2.2 → wavemind-2.2.3}/docs/RU_LAUNCH_POSTS.md +0 -0
  76. {wavemind-2.2.2 → wavemind-2.2.3}/docs/USE_CASES.md +0 -0
  77. {wavemind-2.2.2 → wavemind-2.2.3}/docs/assets/benchmark-summary.svg +0 -0
  78. {wavemind-2.2.2 → wavemind-2.2.3}/docs/assets/wavemind-demo.gif +0 -0
  79. {wavemind-2.2.2 → wavemind-2.2.3}/docs/assets/wavemind-social-card.svg +0 -0
  80. {wavemind-2.2.2 → wavemind-2.2.3}/examples/agent_with_memory.py +0 -0
  81. {wavemind-2.2.2 → wavemind-2.2.3}/examples/chroma_migration.py +0 -0
  82. {wavemind-2.2.2 → wavemind-2.2.3}/examples/customer_support_memory.py +0 -0
  83. {wavemind-2.2.2 → wavemind-2.2.3}/examples/demo.py +0 -0
  84. {wavemind-2.2.2 → wavemind-2.2.3}/examples/dynamic_memory_demo.py +0 -0
  85. {wavemind-2.2.2 → wavemind-2.2.3}/examples/framework_integrations.py +0 -0
  86. {wavemind-2.2.2 → wavemind-2.2.3}/examples/langchain_memory.py +0 -0
  87. {wavemind-2.2.2 → wavemind-2.2.3}/examples/llamaindex_retriever.py +0 -0
  88. {wavemind-2.2.2 → wavemind-2.2.3}/examples/observability/README.md +0 -0
  89. {wavemind-2.2.2 → wavemind-2.2.3}/examples/observability/docker-compose.yml +0 -0
  90. {wavemind-2.2.2 → wavemind-2.2.3}/examples/observability/otel-collector.yaml +0 -0
  91. {wavemind-2.2.2 → wavemind-2.2.3}/examples/observability/prometheus-alerts.yml +0 -0
  92. {wavemind-2.2.2 → wavemind-2.2.3}/examples/observability/prometheus.yml +0 -0
  93. {wavemind-2.2.2 → wavemind-2.2.3}/examples/production-index-profile/README.md +0 -0
  94. {wavemind-2.2.2 → wavemind-2.2.3}/examples/production-index-profile/docker-compose.yml +0 -0
  95. {wavemind-2.2.2 → wavemind-2.2.3}/examples/research_notebook_memory.py +0 -0
  96. {wavemind-2.2.2 → wavemind-2.2.3}/examples/sharded_memory.py +0 -0
  97. {wavemind-2.2.2 → wavemind-2.2.3}/install.bat +0 -0
  98. {wavemind-2.2.2 → wavemind-2.2.3}/install.sh +0 -0
  99. {wavemind-2.2.2 → wavemind-2.2.3}/requirements-optional.txt +0 -0
  100. {wavemind-2.2.2 → wavemind-2.2.3}/requirements.txt +0 -0
  101. {wavemind-2.2.2 → wavemind-2.2.3}/setup.cfg +0 -0
  102. {wavemind-2.2.2 → wavemind-2.2.3}/tests/test_agent_memory_benchmark.py +0 -0
  103. {wavemind-2.2.2 → wavemind-2.2.3}/tests/test_ann_index_curve_benchmark.py +0 -0
  104. {wavemind-2.2.2 → wavemind-2.2.3}/tests/test_api.py +0 -0
  105. {wavemind-2.2.2 → wavemind-2.2.3}/tests/test_api_process_persistence.py +0 -0
  106. {wavemind-2.2.2 → wavemind-2.2.3}/tests/test_benchmark_brief.py +0 -0
  107. {wavemind-2.2.2 → wavemind-2.2.3}/tests/test_benchmark_charts.py +0 -0
  108. {wavemind-2.2.2 → wavemind-2.2.3}/tests/test_benchmark_leaderboard.py +0 -0
  109. {wavemind-2.2.2 → wavemind-2.2.3}/tests/test_benchmark_registry.py +0 -0
  110. {wavemind-2.2.2 → wavemind-2.2.3}/tests/test_benchmark_report.py +0 -0
  111. {wavemind-2.2.2 → wavemind-2.2.3}/tests/test_chroma_migration_example.py +0 -0
  112. {wavemind-2.2.2 → wavemind-2.2.3}/tests/test_core_persistence.py +0 -0
  113. {wavemind-2.2.2 → wavemind-2.2.3}/tests/test_dynamic_memory_benchmark.py +0 -0
  114. {wavemind-2.2.2 → wavemind-2.2.3}/tests/test_examples.py +0 -0
  115. {wavemind-2.2.2 → wavemind-2.2.3}/tests/test_field_graph.py +0 -0
  116. {wavemind-2.2.2 → wavemind-2.2.3}/tests/test_field_graph_integration.py +0 -0
  117. {wavemind-2.2.2 → wavemind-2.2.3}/tests/test_field_memory_dynamics_benchmark.py +0 -0
  118. {wavemind-2.2.2 → wavemind-2.2.3}/tests/test_framework_adapters.py +0 -0
  119. {wavemind-2.2.2 → wavemind-2.2.3}/tests/test_import_benchmark.py +0 -0
  120. {wavemind-2.2.2 → wavemind-2.2.3}/tests/test_indexes_encoders.py +0 -0
  121. {wavemind-2.2.2 → wavemind-2.2.3}/tests/test_langchain_integration.py +0 -0
  122. {wavemind-2.2.2 → wavemind-2.2.3}/tests/test_locomo_memory_benchmark.py +0 -0
  123. {wavemind-2.2.2 → wavemind-2.2.3}/tests/test_long_memory_evidence_benchmark.py +0 -0
  124. {wavemind-2.2.2 → wavemind-2.2.3}/tests/test_longmemeval_answer_benchmark.py +0 -0
  125. {wavemind-2.2.2 → wavemind-2.2.3}/tests/test_longmemeval_memory_benchmark.py +0 -0
  126. {wavemind-2.2.2 → wavemind-2.2.3}/tests/test_memory_competitor_benchmark.py +0 -0
  127. {wavemind-2.2.2 → wavemind-2.2.3}/tests/test_multimodal.py +0 -0
  128. {wavemind-2.2.2 → wavemind-2.2.3}/tests/test_nomiracl_russian_benchmark.py +0 -0
  129. {wavemind-2.2.2 → wavemind-2.2.3}/tests/test_observability.py +0 -0
  130. {wavemind-2.2.2 → wavemind-2.2.3}/tests/test_observability_docs.py +0 -0
  131. {wavemind-2.2.2 → wavemind-2.2.3}/tests/test_open_retrieval_benchmark.py +0 -0
  132. {wavemind-2.2.2 → wavemind-2.2.3}/tests/test_packaging_files.py +0 -0
  133. {wavemind-2.2.2 → wavemind-2.2.3}/tests/test_postgres_storage.py +0 -0
  134. {wavemind-2.2.2 → wavemind-2.2.3}/tests/test_production_index_profile.py +0 -0
  135. {wavemind-2.2.2 → wavemind-2.2.3}/tests/test_production_load_benchmark.py +0 -0
  136. {wavemind-2.2.2 → wavemind-2.2.3}/tests/test_scale_plan.py +0 -0
  137. {wavemind-2.2.2 → wavemind-2.2.3}/tests/test_semantic_and_latency.py +0 -0
  138. {wavemind-2.2.2 → wavemind-2.2.3}/tests/test_sharding.py +0 -0
  139. {wavemind-2.2.2 → wavemind-2.2.3}/wavemind/__main__.py +0 -0
  140. {wavemind-2.2.2 → wavemind-2.2.3}/wavemind/api.py +0 -0
  141. {wavemind-2.2.2 → wavemind-2.2.3}/wavemind/benchmark.py +0 -0
  142. {wavemind-2.2.2 → wavemind-2.2.3}/wavemind/core.py +0 -0
  143. {wavemind-2.2.2 → wavemind-2.2.3}/wavemind/encoders.py +0 -0
  144. {wavemind-2.2.2 → wavemind-2.2.3}/wavemind/field_graph.py +0 -0
  145. {wavemind-2.2.2 → wavemind-2.2.3}/wavemind/importers.py +0 -0
  146. {wavemind-2.2.2 → wavemind-2.2.3}/wavemind/indexes.py +0 -0
  147. {wavemind-2.2.2 → wavemind-2.2.3}/wavemind/integrations/__init__.py +0 -0
  148. {wavemind-2.2.2 → wavemind-2.2.3}/wavemind/integrations/autogen.py +0 -0
  149. {wavemind-2.2.2 → wavemind-2.2.3}/wavemind/integrations/crewai.py +0 -0
  150. {wavemind-2.2.2 → wavemind-2.2.3}/wavemind/integrations/langchain.py +0 -0
  151. {wavemind-2.2.2 → wavemind-2.2.3}/wavemind/integrations/langgraph.py +0 -0
  152. {wavemind-2.2.2 → wavemind-2.2.3}/wavemind/integrations/llamaindex.py +0 -0
  153. {wavemind-2.2.2 → wavemind-2.2.3}/wavemind/multimodal.py +0 -0
  154. {wavemind-2.2.2 → wavemind-2.2.3}/wavemind/observability.py +0 -0
  155. {wavemind-2.2.2 → wavemind-2.2.3}/wavemind/scale.py +0 -0
  156. {wavemind-2.2.2 → wavemind-2.2.3}/wavemind/sharding.py +0 -0
  157. {wavemind-2.2.2 → wavemind-2.2.3}/wavemind/storage.py +0 -0
  158. {wavemind-2.2.2 → wavemind-2.2.3}/wavemind/studio.py +0 -0
  159. {wavemind-2.2.2 → wavemind-2.2.3}/wavemind.egg-info/dependency_links.txt +0 -0
  160. {wavemind-2.2.2 → wavemind-2.2.3}/wavemind.egg-info/entry_points.txt +0 -0
  161. {wavemind-2.2.2 → wavemind-2.2.3}/wavemind.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: wavemind
3
- Version: 2.2.2
3
+ Version: 2.2.3
4
4
  Summary: Local-first dynamic memory field with vector search and wave-field re-ranking
5
5
  License-Expression: MIT
6
6
  Project-URL: Homepage, https://github.com/CaspianG/wavemind
@@ -23,6 +23,8 @@ Requires-Dist: faiss-cpu>=1.8; platform_system != "Windows" and extra == "indexe
23
23
  Requires-Dist: qdrant-client>=1.9; extra == "indexes"
24
24
  Provides-Extra: postgres
25
25
  Requires-Dist: psycopg[binary]>=3.1; extra == "postgres"
26
+ Provides-Extra: redis
27
+ Requires-Dist: redis>=5.0; extra == "redis"
26
28
  Provides-Extra: otel
27
29
  Requires-Dist: opentelemetry-api>=1.25; extra == "otel"
28
30
  Requires-Dist: opentelemetry-sdk>=1.25; extra == "otel"
@@ -38,6 +40,7 @@ Requires-Dist: annoy>=1.17; extra == "production"
38
40
  Requires-Dist: faiss-cpu>=1.8; platform_system != "Windows" and extra == "production"
39
41
  Requires-Dist: qdrant-client>=1.9; extra == "production"
40
42
  Requires-Dist: psycopg[binary]>=3.1; extra == "production"
43
+ Requires-Dist: redis>=5.0; extra == "production"
41
44
  Requires-Dist: opentelemetry-api>=1.25; extra == "production"
42
45
  Requires-Dist: opentelemetry-sdk>=1.25; extra == "production"
43
46
  Requires-Dist: opentelemetry-exporter-otlp>=1.25; extra == "production"
@@ -538,7 +541,7 @@ Checked-in result:
538
541
 
539
542
  | profile | result |
540
543
  |---|---:|
541
- | Cluster planner | 4096 namespaces, 4 nodes, replication factor 2, single-node loss availability `1.000`. |
544
+ | Cluster planner | 4096 namespaces, 4 nodes, replication factor 2, node-loss availability `1.000`, zone-loss availability `1.000`, write quorum `2`. |
542
545
  | Hot cache | 2000 lookups, hit rate `0.920`, p99 lookup `0.01 ms`. |
543
546
  | Structured payloads | image/audio/table/event retrieval, precision@1 `1.000`, p99 `1.27 ms`. |
544
547
 
@@ -567,6 +570,24 @@ SQLite writes.
567
570
 
568
571
  The same planner is available over HTTP as `POST /cluster-plan`.
569
572
 
573
+ Maintenance worker:
574
+
575
+ ```sh
576
+ wavemind maintenance --namespace user:42 --consolidate-steps 10 --consolidate-concepts --json
577
+ ```
578
+
579
+ This runs one deterministic maintenance pass: expired-memory purge, optional
580
+ field/concept consolidation, and index-health repair. Production deployments can
581
+ call the same command from cron, systemd, Kubernetes CronJobs, Celery, RQ, or
582
+ Temporal.
583
+
584
+ Hot-cache options:
585
+
586
+ | cache | use case |
587
+ |---|---|
588
+ | `HotMemoryCache` | in-process local API/server cache. |
589
+ | `RedisHotMemoryCache` | shared cache for multiple API workers. Install with `pip install "wavemind[redis]"`. |
590
+
570
591
  ## Structured And Multimodal Memory
571
592
 
572
593
  WaveMind can store non-text memories as structured text plus metadata. This is
@@ -1017,8 +1038,8 @@ Current read:
1017
1038
  | LongMemEval full retrieval | On the official LongMemEval-S cleaned file, 470 non-abstention session-level questions, WaveMind reaches `evidence_recall@5 0.782` and `precision@1 0.696`; Chroma static reaches `0.518` / `0.355`; Qdrant static reaches `0.520` / `0.355`. | This is now the strongest public memory result in the repo. It is retrieval-only, not final answer quality. |
1018
1039
  | LongMemEval 50-query smoke | On the first 50 non-abstention LongMemEval-S questions, WaveMind reaches `evidence_recall@5 0.920`, `precision@1 0.760`, and `MRR@5 0.827`; Chroma/Qdrant static reach `0.600`, `0.260`, and `0.385`. | This is the fast regression profile for checking current changes before rerunning the full LongMemEval profile. WaveMind wins on quality; latency still needs work. |
1019
1040
  | ANN/index curve | At 50000 generated 128-d vectors, NumPy exact keeps `recall@10 1.000` at `6.49 ms`; quantized int8 keeps `0.934` at `24.92 ms`; Annoy is faster at `4.92 ms` but drops to `0.730` recall; Qdrant local keeps `1.000` recall at `43.49 ms`. | Current local scale boundary is clear: quantized search needs kernel work, Annoy needs tuning/FAISS, and Qdrant should be tested in service mode for a fair production comparison. |
1020
- | Production load | At 100000 generated 128-d vectors, service-mode Qdrant reaches `recall@10 1.000`, avg `10.76 ms`; pgvector HNSW reaches `0.736`, avg `17.76 ms`; at 1M vectors Qdrant reaches `0.506`, avg `45.81 ms`. | Qdrant service is already usable at 100k. The 1M result is not production-grade yet: large-N service settings need tuning before claiming million-memory recall. |
1021
- | Scale readiness | Deterministic 1M-memory simulation validates 4096 namespace placements over 4 nodes with replication factor 2, single-node-loss availability `1.000`, hot-cache hit rate `0.920`, and structured payload precision@1 `1.000`. | This proves routing/cache/payload foundations, not a 10M-vector load-test claim. Real 100k-10M production latency needs service-backed load tests. |
1041
+ | Production load | At 100000 generated 128-d vectors, service-mode Qdrant reaches `recall@10 1.000`, avg `10.28 ms`, p99 `21.26 ms`. At 1M, tuned Qdrant reaches `recall@10 0.984`, avg `116.80 ms`, p99 `209.28 ms`; an EF sweep finds `recall@10 0.977`, avg `64.76 ms`, p99 `103.77 ms` at `hnsw_ef=2048` on 30 queries. | 100k is production-grade on the tested machine. 1M recall is now strong, but p99 still needs tuning before claiming a stable sub-100 ms SLO. |
1042
+ | Scale readiness | Deterministic 1M-memory simulation validates 4096 namespace placements over 4 nodes with replication factor 2, node-loss availability `1.000`, zone-loss availability `1.000`, hot-cache hit rate `0.920`, and structured payload precision@1 `1.000`. | This proves routing/cache/payload foundations, not a 10M-vector load-test claim or Raft replication. Real 10M latency still needs service-backed load tests on larger hardware. |
1022
1043
  | Memory competitor adapters | WaveMind reaches `precision@1 0.80`, `precision@3 1.00`, stale suppression `1.00` on the small adapter profile. Mem0, Zep, and LangGraph are listed as skipped unless their real packages/services are configured. | This prevents fake competitor claims. The adapter harness is ready; real Mem0/Zep/LangGraph results still need configured installs. |
1023
1044
  | LongMemEval local answer generation | With the same local Ollama `qwen2.5:1.5b`, WaveMind reaches `exact_match 0.240`, `contains_answer 0.380`, `token_f1 0.333`, and `evidence_recall@5 0.920`; Chroma and Qdrant static both reach `0.120`, `0.160`, `0.170`, and `0.600`. | This is the first checked-in end-to-end answer benchmark against Chroma/Qdrant. It is still a 50-question lightweight smoke run, not a full LongMemEval leaderboard score. |
1024
1045
 
@@ -1035,8 +1056,9 @@ Current read:
1035
1056
  | NoMIRACL Russian retrieval | Russian human-annotated multilingual relevance over compact candidate passages. | implemented | WaveMind / Chroma / Qdrant | Keep same-embedding `nDCG@10` at parity, then rerun with sentence-transformers and full MIRACL Russian when disk/service capacity allows it. |
1036
1057
  | ANN/VectorDBBench-style local curve | Recall/latency tradeoff for candidate indexes on generated vectors. | implemented | NumPy exact / quantized int8 / Annoy / Qdrant local | Use this as the local engineering curve; official VectorDBBench remains future work. |
1037
1058
  | Production index profile | Docker-backed 50000-vector profile for persisted FAISS, Qdrant service, and PostgreSQL/pgvector HNSW. | implemented | FAISS / Qdrant service / pgvector | Keep service-mode candidate generation above `0.95` recall@10 and below 10 ms average query latency at 50000 vectors. |
1038
- | Production load profile | 100k and 1M service-backed candidate-index checks. | implemented | Qdrant service / pgvector HNSW / FAISS persisted | 100k Qdrant is strong; 1M Qdrant and pgvector require tuning before production claims. |
1039
- | Scale readiness profile | Cluster placement, single-node-loss simulation, hot-cache behavior, and structured/multimodal payload retrieval. | implemented | Mem0 / Zep / LangGraph persistent memory / GraphRAG target adapters | Use this as production foundation proof before real distributed 100k, 1M, and 10M load tests. |
1059
+ | Production load profile | 100k and 1M service-backed candidate-index checks with p95/p99 latency. | implemented | Qdrant service / pgvector HNSW / FAISS persisted | Keep 100k at recall@10 `1.000`; push 1M p99 below 100 ms with recall@10 >= 0.95. |
1060
+ | Qdrant 1M HNSW ef sweep | One 1M Qdrant collection queried with multiple `hnsw_ef` values. | implemented | Qdrant service | Repeat with 100+ queries and collection-level HNSW build parameters before claiming a stable 1M SLO. |
1061
+ | Scale readiness profile | Cluster placement, node/zone-loss simulation, quorum report, hot-cache behavior, and structured/multimodal payload retrieval. | implemented | Mem0 / Zep / LangGraph persistent memory / GraphRAG target adapters | Use this as production foundation proof before real distributed 10M load tests. |
1040
1062
  | Memory competitor adapter profile | Dynamic-memory scenario wired for external memory frameworks. | implemented | Mem0 / Zep / LangGraph persistent memory | Report real competitor results only when their packages/services are explicitly configured. |
1041
1063
  | [BEIR](https://github.com/beir-cellar/beir) | Standard zero-shot information retrieval quality. | planned | Chroma / Qdrant / FAISS | Stay within 0.02 `nDCG@10` on identical embeddings. |
1042
1064
  | [MTEB Retrieval](https://github.com/embeddings-benchmark/mteb) | Separates encoder quality from retrieval-store quality. | planned | Chroma / Qdrant / FAISS | Prove WaveMind does not reduce same-embedding retrieval quality. |
@@ -1307,15 +1329,16 @@ Checked-in production load points:
1307
1329
 
1308
1330
  ```sh
1309
1331
  python benchmarks/production_load_benchmark.py --sizes 100000 --dim 128 --queries 100 --top-k 10 --engines qdrant-service pgvector faiss-persisted
1310
- python benchmarks/production_load_benchmark.py --sizes 1000000 --dim 128 --queries 50 --top-k 10 --engines qdrant-service --output benchmarks/production_load_qdrant_1m_results.json
1332
+ python benchmarks/production_load_benchmark.py --sizes 1000000 --dim 128 --queries 50 --top-k 10 --engines qdrant-service --output benchmarks/production_load_qdrant_1m_tuned_results.json
1311
1333
  ```
1312
1334
 
1313
- | vectors | engine | recall@10 | avg latency | p95 latency | build |
1314
- |---:|---|---:|---:|---:|---:|
1315
- | 100000 | Qdrant service | 1.000 | 10.76 ms | 18.78 ms | 39873.2 ms |
1316
- | 100000 | WaveMind pgvector | 0.736 | 17.76 ms | 23.48 ms | 455703.7 ms |
1317
- | 100000 | WaveMind faiss-persisted | skipped | - | - | - |
1318
- | 1000000 | Qdrant service | 0.506 | 45.81 ms | 65.18 ms | 563945.5 ms |
1335
+ | vectors | engine | recall@10 | avg latency | p95 latency | p99 latency | build |
1336
+ |---:|---|---:|---:|---:|---:|---:|
1337
+ | 100000 | Qdrant service | 1.000 | 10.28 ms | 18.97 ms | 21.26 ms | 27439.3 ms |
1338
+ | 100000 | WaveMind pgvector | 0.736 | 17.76 ms | 23.48 ms | - | 455703.7 ms |
1339
+ | 100000 | WaveMind faiss-persisted | skipped | - | - | - | - |
1340
+ | 1000000 | Qdrant service tuned | 0.984 | 116.80 ms | 153.84 ms | 209.28 ms | 450674.6 ms |
1341
+ | 1000000 | Qdrant `hnsw_ef=2048` sweep point | 0.977 | 64.76 ms | 91.18 ms | 103.77 ms | 451912.4 ms |
1319
1342
 
1320
1343
  Read this as an engineering curve, not an official VectorDBBench result. Annoy
1321
1344
  is faster than exact NumPy at 50000 vectors but loses too much recall with the
@@ -1327,9 +1350,9 @@ FAISS persistence and service-mode Qdrant now both preserve exact recall at
1327
1350
  `WAVEMIND_PGVECTOR_EF_SEARCH=400`, which improves recall materially but still
1328
1351
  misses the `0.95` production target and is slower than the other two profiles.
1329
1352
  The 100k load profile shows Qdrant service is already viable for candidate
1330
- generation; the 1M Qdrant profile shows that default service settings are not
1331
- enough for production recall and need HNSW/search tuning before million-memory
1332
- claims.
1353
+ generation on the tested machine. The tuned 1M profile crosses the recall target,
1354
+ and the EF sweep gets close to the p99 latency target, but 1M should still be
1355
+ treated as tuning-in-progress until a 100+ query p99 run stays below 100 ms.
1333
1356
  If a required package, service, or environment variable is missing, the runner
1334
1357
  marks that engine as `skipped` instead of silently falling back to another
1335
1358
  backend.
@@ -488,7 +488,7 @@ Checked-in result:
488
488
 
489
489
  | profile | result |
490
490
  |---|---:|
491
- | Cluster planner | 4096 namespaces, 4 nodes, replication factor 2, single-node loss availability `1.000`. |
491
+ | Cluster planner | 4096 namespaces, 4 nodes, replication factor 2, node-loss availability `1.000`, zone-loss availability `1.000`, write quorum `2`. |
492
492
  | Hot cache | 2000 lookups, hit rate `0.920`, p99 lookup `0.01 ms`. |
493
493
  | Structured payloads | image/audio/table/event retrieval, precision@1 `1.000`, p99 `1.27 ms`. |
494
494
 
@@ -517,6 +517,24 @@ SQLite writes.
517
517
 
518
518
  The same planner is available over HTTP as `POST /cluster-plan`.
519
519
 
520
+ Maintenance worker:
521
+
522
+ ```sh
523
+ wavemind maintenance --namespace user:42 --consolidate-steps 10 --consolidate-concepts --json
524
+ ```
525
+
526
+ This runs one deterministic maintenance pass: expired-memory purge, optional
527
+ field/concept consolidation, and index-health repair. Production deployments can
528
+ call the same command from cron, systemd, Kubernetes CronJobs, Celery, RQ, or
529
+ Temporal.
530
+
531
+ Hot-cache options:
532
+
533
+ | cache | use case |
534
+ |---|---|
535
+ | `HotMemoryCache` | in-process local API/server cache. |
536
+ | `RedisHotMemoryCache` | shared cache for multiple API workers. Install with `pip install "wavemind[redis]"`. |
537
+
520
538
  ## Structured And Multimodal Memory
521
539
 
522
540
  WaveMind can store non-text memories as structured text plus metadata. This is
@@ -967,8 +985,8 @@ Current read:
967
985
  | LongMemEval full retrieval | On the official LongMemEval-S cleaned file, 470 non-abstention session-level questions, WaveMind reaches `evidence_recall@5 0.782` and `precision@1 0.696`; Chroma static reaches `0.518` / `0.355`; Qdrant static reaches `0.520` / `0.355`. | This is now the strongest public memory result in the repo. It is retrieval-only, not final answer quality. |
968
986
  | LongMemEval 50-query smoke | On the first 50 non-abstention LongMemEval-S questions, WaveMind reaches `evidence_recall@5 0.920`, `precision@1 0.760`, and `MRR@5 0.827`; Chroma/Qdrant static reach `0.600`, `0.260`, and `0.385`. | This is the fast regression profile for checking current changes before rerunning the full LongMemEval profile. WaveMind wins on quality; latency still needs work. |
969
987
  | ANN/index curve | At 50000 generated 128-d vectors, NumPy exact keeps `recall@10 1.000` at `6.49 ms`; quantized int8 keeps `0.934` at `24.92 ms`; Annoy is faster at `4.92 ms` but drops to `0.730` recall; Qdrant local keeps `1.000` recall at `43.49 ms`. | Current local scale boundary is clear: quantized search needs kernel work, Annoy needs tuning/FAISS, and Qdrant should be tested in service mode for a fair production comparison. |
970
- | Production load | At 100000 generated 128-d vectors, service-mode Qdrant reaches `recall@10 1.000`, avg `10.76 ms`; pgvector HNSW reaches `0.736`, avg `17.76 ms`; at 1M vectors Qdrant reaches `0.506`, avg `45.81 ms`. | Qdrant service is already usable at 100k. The 1M result is not production-grade yet: large-N service settings need tuning before claiming million-memory recall. |
971
- | Scale readiness | Deterministic 1M-memory simulation validates 4096 namespace placements over 4 nodes with replication factor 2, single-node-loss availability `1.000`, hot-cache hit rate `0.920`, and structured payload precision@1 `1.000`. | This proves routing/cache/payload foundations, not a 10M-vector load-test claim. Real 100k-10M production latency needs service-backed load tests. |
988
+ | Production load | At 100000 generated 128-d vectors, service-mode Qdrant reaches `recall@10 1.000`, avg `10.28 ms`, p99 `21.26 ms`. At 1M, tuned Qdrant reaches `recall@10 0.984`, avg `116.80 ms`, p99 `209.28 ms`; an EF sweep finds `recall@10 0.977`, avg `64.76 ms`, p99 `103.77 ms` at `hnsw_ef=2048` on 30 queries. | 100k is production-grade on the tested machine. 1M recall is now strong, but p99 still needs tuning before claiming a stable sub-100 ms SLO. |
989
+ | Scale readiness | Deterministic 1M-memory simulation validates 4096 namespace placements over 4 nodes with replication factor 2, node-loss availability `1.000`, zone-loss availability `1.000`, hot-cache hit rate `0.920`, and structured payload precision@1 `1.000`. | This proves routing/cache/payload foundations, not a 10M-vector load-test claim or Raft replication. Real 10M latency still needs service-backed load tests on larger hardware. |
972
990
  | Memory competitor adapters | WaveMind reaches `precision@1 0.80`, `precision@3 1.00`, stale suppression `1.00` on the small adapter profile. Mem0, Zep, and LangGraph are listed as skipped unless their real packages/services are configured. | This prevents fake competitor claims. The adapter harness is ready; real Mem0/Zep/LangGraph results still need configured installs. |
973
991
  | LongMemEval local answer generation | With the same local Ollama `qwen2.5:1.5b`, WaveMind reaches `exact_match 0.240`, `contains_answer 0.380`, `token_f1 0.333`, and `evidence_recall@5 0.920`; Chroma and Qdrant static both reach `0.120`, `0.160`, `0.170`, and `0.600`. | This is the first checked-in end-to-end answer benchmark against Chroma/Qdrant. It is still a 50-question lightweight smoke run, not a full LongMemEval leaderboard score. |
974
992
 
@@ -985,8 +1003,9 @@ Current read:
985
1003
  | NoMIRACL Russian retrieval | Russian human-annotated multilingual relevance over compact candidate passages. | implemented | WaveMind / Chroma / Qdrant | Keep same-embedding `nDCG@10` at parity, then rerun with sentence-transformers and full MIRACL Russian when disk/service capacity allows it. |
986
1004
  | ANN/VectorDBBench-style local curve | Recall/latency tradeoff for candidate indexes on generated vectors. | implemented | NumPy exact / quantized int8 / Annoy / Qdrant local | Use this as the local engineering curve; official VectorDBBench remains future work. |
987
1005
  | Production index profile | Docker-backed 50000-vector profile for persisted FAISS, Qdrant service, and PostgreSQL/pgvector HNSW. | implemented | FAISS / Qdrant service / pgvector | Keep service-mode candidate generation above `0.95` recall@10 and below 10 ms average query latency at 50000 vectors. |
988
- | Production load profile | 100k and 1M service-backed candidate-index checks. | implemented | Qdrant service / pgvector HNSW / FAISS persisted | 100k Qdrant is strong; 1M Qdrant and pgvector require tuning before production claims. |
989
- | Scale readiness profile | Cluster placement, single-node-loss simulation, hot-cache behavior, and structured/multimodal payload retrieval. | implemented | Mem0 / Zep / LangGraph persistent memory / GraphRAG target adapters | Use this as production foundation proof before real distributed 100k, 1M, and 10M load tests. |
1006
+ | Production load profile | 100k and 1M service-backed candidate-index checks with p95/p99 latency. | implemented | Qdrant service / pgvector HNSW / FAISS persisted | Keep 100k at recall@10 `1.000`; push 1M p99 below 100 ms with recall@10 >= 0.95. |
1007
+ | Qdrant 1M HNSW ef sweep | One 1M Qdrant collection queried with multiple `hnsw_ef` values. | implemented | Qdrant service | Repeat with 100+ queries and collection-level HNSW build parameters before claiming a stable 1M SLO. |
1008
+ | Scale readiness profile | Cluster placement, node/zone-loss simulation, quorum report, hot-cache behavior, and structured/multimodal payload retrieval. | implemented | Mem0 / Zep / LangGraph persistent memory / GraphRAG target adapters | Use this as production foundation proof before real distributed 10M load tests. |
990
1009
  | Memory competitor adapter profile | Dynamic-memory scenario wired for external memory frameworks. | implemented | Mem0 / Zep / LangGraph persistent memory | Report real competitor results only when their packages/services are explicitly configured. |
991
1010
  | [BEIR](https://github.com/beir-cellar/beir) | Standard zero-shot information retrieval quality. | planned | Chroma / Qdrant / FAISS | Stay within 0.02 `nDCG@10` on identical embeddings. |
992
1011
  | [MTEB Retrieval](https://github.com/embeddings-benchmark/mteb) | Separates encoder quality from retrieval-store quality. | planned | Chroma / Qdrant / FAISS | Prove WaveMind does not reduce same-embedding retrieval quality. |
@@ -1257,15 +1276,16 @@ Checked-in production load points:
1257
1276
 
1258
1277
  ```sh
1259
1278
  python benchmarks/production_load_benchmark.py --sizes 100000 --dim 128 --queries 100 --top-k 10 --engines qdrant-service pgvector faiss-persisted
1260
- python benchmarks/production_load_benchmark.py --sizes 1000000 --dim 128 --queries 50 --top-k 10 --engines qdrant-service --output benchmarks/production_load_qdrant_1m_results.json
1279
+ python benchmarks/production_load_benchmark.py --sizes 1000000 --dim 128 --queries 50 --top-k 10 --engines qdrant-service --output benchmarks/production_load_qdrant_1m_tuned_results.json
1261
1280
  ```
1262
1281
 
1263
- | vectors | engine | recall@10 | avg latency | p95 latency | build |
1264
- |---:|---|---:|---:|---:|---:|
1265
- | 100000 | Qdrant service | 1.000 | 10.76 ms | 18.78 ms | 39873.2 ms |
1266
- | 100000 | WaveMind pgvector | 0.736 | 17.76 ms | 23.48 ms | 455703.7 ms |
1267
- | 100000 | WaveMind faiss-persisted | skipped | - | - | - |
1268
- | 1000000 | Qdrant service | 0.506 | 45.81 ms | 65.18 ms | 563945.5 ms |
1282
+ | vectors | engine | recall@10 | avg latency | p95 latency | p99 latency | build |
1283
+ |---:|---|---:|---:|---:|---:|---:|
1284
+ | 100000 | Qdrant service | 1.000 | 10.28 ms | 18.97 ms | 21.26 ms | 27439.3 ms |
1285
+ | 100000 | WaveMind pgvector | 0.736 | 17.76 ms | 23.48 ms | - | 455703.7 ms |
1286
+ | 100000 | WaveMind faiss-persisted | skipped | - | - | - | - |
1287
+ | 1000000 | Qdrant service tuned | 0.984 | 116.80 ms | 153.84 ms | 209.28 ms | 450674.6 ms |
1288
+ | 1000000 | Qdrant `hnsw_ef=2048` sweep point | 0.977 | 64.76 ms | 91.18 ms | 103.77 ms | 451912.4 ms |
1269
1289
 
1270
1290
  Read this as an engineering curve, not an official VectorDBBench result. Annoy
1271
1291
  is faster than exact NumPy at 50000 vectors but loses too much recall with the
@@ -1277,9 +1297,9 @@ FAISS persistence and service-mode Qdrant now both preserve exact recall at
1277
1297
  `WAVEMIND_PGVECTOR_EF_SEARCH=400`, which improves recall materially but still
1278
1298
  misses the `0.95` production target and is slower than the other two profiles.
1279
1299
  The 100k load profile shows Qdrant service is already viable for candidate
1280
- generation; the 1M Qdrant profile shows that default service settings are not
1281
- enough for production recall and need HNSW/search tuning before million-memory
1282
- claims.
1300
+ generation on the tested machine. The tuned 1M profile crosses the recall target,
1301
+ and the EF sweep gets close to the p99 latency target, but 1M should still be
1302
+ treated as tuning-in-progress until a 100+ query p99 run stays below 100 ms.
1283
1303
  If a required package, service, or environment variable is missing, the runner
1284
1304
  marks that engine as `skipped` instead of silently falling back to another
1285
1305
  backend.
@@ -18,9 +18,10 @@ This is a compact reader-facing view of checked-in benchmark results. It is not
18
18
  | [LongMemEval evidence 50-query smoke](https://github.com/xiaowu0162/LongMemEval) | long-term-agent-memory | evidence recall@k | WaveMind: 0.92 / 15.3 ms | Static vector: 0.6 / 0.337 ms | WaveMind leads on quality |
19
19
  | [ANN index latency curve](https://github.com/erikbern/ann-benchmarks) | index-latency | Recall@k | WaveMind numpy: 1 / 6.485 ms | Qdrant local: 1 / 43.5 ms | Quality tie; WaveMind faster |
20
20
  | Production index profile | index-latency | Recall@k | WaveMind faiss-persisted: 1 / 3.524 ms | Qdrant service: 1 / 4.414 ms | Quality tie; WaveMind faster |
21
- | Production load profile 100k | production-scale | Recall@k | WaveMind pgvector: 0.736 / 17.8 ms | Qdrant service: 1 / 10.8 ms | Baseline leads on quality |
22
- | Production load profile 1M | production-scale | Recall@k | - | Qdrant service: 0.506 / 45.8 ms | No WaveMind result |
23
- | Scale readiness profile | production-scale | precision@1 | WaveMind structured payloads: 1 / 0.791 ms | - | WaveMind-only check |
21
+ | Production load profile 100k | production-scale | Recall@k | WaveMind pgvector: 0.736 / 17.8 ms | Qdrant service: 1 / 10.3 ms | Baseline leads on quality |
22
+ | Production load profile 1M | production-scale | Recall@k | - | Qdrant service: 0.984 / 116.8 ms | No WaveMind result |
23
+ | Qdrant 1M HNSW ef sweep | production-scale | Recall@k | - | hnsw_ef=2048: 0.977 / 64.8 ms | No WaveMind result |
24
+ | Scale readiness profile | production-scale | precision@1 | WaveMind structured payloads: 1 / 0.837 ms | - | WaveMind-only check |
24
25
  | Memory competitor adapter profile | agent-memory | precision@1 | WaveMind: 0.8 / 0.554 ms | - | WaveMind-only check |
25
26
  | [LongMemEval answer generation](https://github.com/xiaowu0162/LongMemEval) | long-term-agent-memory | token F1 | WaveMind + qwen2.5:1.5b: 0.333 / - | Chroma static + qwen2.5:1.5b: 0.17 / - | WaveMind leads on quality |
26
27
 
@@ -21,9 +21,10 @@ Planned rows are not claimed wins. They are the public proof path WaveMind must
21
21
  | [LongMemEval evidence 50-query smoke](https://github.com/xiaowu0162/LongMemEval) | long-term-agent-memory | implemented | WaveMind: evidence recall@k 0.92, precision@1 0.76, MRR@k 0.83, context saved 0.87, avg latency 15.3, p95 latency 42.9<br>Chroma static: evidence recall@k 0.60, precision@1 0.26, MRR@k 0.39, context saved 0.89, avg latency 13.3, p95 latency 26.0<br>Static vector: evidence recall@k 0.60, precision@1 0.26, MRR@k 0.39, context saved 0.89, avg latency 0.34, p95 latency 2.14<br>Qdrant static: evidence recall@k 0.60, precision@1 0.26, MRR@k 0.39, context saved 0.89, avg latency 180.2, p95 latency 296.7 | Speed up full LongMemEval reruns by reusing per-question candidate indexes or adding a streaming runner mode. |
22
22
  | [ANN index latency curve](https://github.com/erikbern/ann-benchmarks) | index-latency | implemented | WaveMind numpy: Recall@k 1.00, avg latency 6.49, p95 latency 6.41, build ms 744.7<br>WaveMind quantized: Recall@k 0.93, avg latency 24.9, p95 latency 37.4, build ms 2088.7<br>WaveMind annoy: Recall@k 0.73, avg latency 4.92, p95 latency 7.37, build ms 4090.1<br>WaveMind faiss: skipped - Install faiss-cpu to use FaissVectorIndex<br>Qdrant local: Recall@k 1.00, avg latency 43.5, p95 latency 59.7, build ms 17525.7 | Tune quantized search kernels, add FAISS on Linux/macOS CI, and test Qdrant service-mode curves beyond 50000 vectors. |
23
23
  | Production index profile | index-latency | implemented | WaveMind faiss-persisted: Recall@k 1.00, avg latency 3.52, p95 latency 7.88, build ms 715.9<br>Qdrant service: Recall@k 1.00, avg latency 4.41, p95 latency 5.93, build ms 12269.8<br>WaveMind pgvector: Recall@k 0.81, avg latency 10.9, p95 latency 15.7, build ms 185048.9 | Use the dedicated production load profile for 100000 and 1000000-vector service tests, then tune pgvector and Qdrant for recall/latency. |
24
- | Production load profile 100k | production-scale | implemented | Qdrant service: Recall@k 1.00, avg latency 10.8, p95 latency 18.8, build ms 39873.2<br>WaveMind pgvector: Recall@k 0.74, avg latency 17.8, p95 latency 23.5, build ms 455703.7<br>WaveMind faiss-persisted: skipped - Set WAVEMIND_FAISS_PATH to use the persisted FAISS backend | Tune pgvector HNSW build/search parameters and add persisted FAISS from the Linux benchmark container. |
25
- | Production load profile 1M | production-scale | implemented | Qdrant service: Recall@k 0.51, avg latency 45.8, p95 latency 65.2, build ms 563945.5 | Run Qdrant with tuned HNSW/search params, then add FAISS IVF/HNSW and pgvector 1M profiles on a larger disk. |
26
- | Scale readiness profile | production-scale | implemented | WaveMind cluster planner: simulated memories 1000000, namespaces 4096, nodes 4, replication factor 2, node loss min availability 1.00, placement ms 109.3<br>WaveMind hot cache: queries 2000, capacity 512, hit rate 0.92, evictions 0, p99 lookup ms 0.01<br>WaveMind structured payloads: queries 4, precision@1 1.00, avg latency 0.79, p99 latency ms 1.27 | Move from single-node service profiles to namespace sharding and replicated service runs. |
24
+ | Production load profile 100k | production-scale | implemented | Qdrant service: Recall@k 1.00, avg latency 10.3, p95 latency 19.0, p99 latency ms 21.3, build ms 27439.3<br>WaveMind pgvector: Recall@k 0.74, avg latency 17.8, p95 latency 23.5, build ms 455703.7<br>WaveMind faiss-persisted: skipped - Set WAVEMIND_FAISS_PATH to use the persisted FAISS backend | Tune pgvector HNSW build/search parameters and add persisted FAISS from the Linux benchmark container. |
25
+ | Production load profile 1M | production-scale | implemented | Qdrant service: Recall@k 0.98, avg latency 116.8, p95 latency 153.8, p99 latency ms 209.3, build ms 450674.6 | Tune Qdrant indexing/search params further, then add FAISS IVF/HNSW and pgvector 1M profiles on a larger disk. |
26
+ | Qdrant 1M HNSW ef sweep | production-scale | implemented | hnsw_ef=512: Recall@k 0.75, avg latency 47.2, p95 latency 68.5, p99 latency ms 68.5, max latency ms 68.5<br>hnsw_ef=768: Recall@k 0.85, avg latency 44.0, p95 latency 69.1, p99 latency ms 69.8, max latency ms 69.8<br>hnsw_ef=1024: Recall@k 0.88, avg latency 62.9, p95 latency 81.1, p99 latency ms 85.5, max latency ms 85.5<br>hnsw_ef=1536: Recall@k 0.94, avg latency 65.6, p95 latency 111.2, p99 latency ms 119.7, max latency ms 119.7<br>hnsw_ef=2048: Recall@k 0.98, avg latency 64.8, p95 latency 91.2, p99 latency ms 103.8, max latency ms 103.8 | Repeat with 100+ queries and collection-level HNSW build parameters before claiming a stable production SLO. |
27
+ | Scale readiness profile | production-scale | implemented | WaveMind cluster planner: simulated memories 1000000, namespaces 4096, nodes 4, replication factor 2, node loss min availability 1.00, zone loss min availability 1.00, read quorum 1, write quorum 2, placement ms 115.8<br>WaveMind hot cache: queries 2000, capacity 512, hit rate 0.92, evictions 0, p99 lookup ms 0.00<br>WaveMind structured payloads: queries 4, precision@1 1.00, avg latency 0.84, p99 latency ms 1.07 | Move from single-node service profiles to namespace sharding and replicated service runs. |
27
28
  | Memory competitor adapter profile | agent-memory | implemented | WaveMind: precision@1 0.80, precision@3 1.00, stale suppression 1.00, avg latency 0.55, p95 latency 0.83<br>Mem0: skipped - Install Mem0 to run this adapter profile: pip install "mem0ai"<br>Zep: skipped - Install the Zep client package and set ZEP_API_KEY or ZEP_API_URL.<br>LangGraph persistent memory: skipped - Install LangGraph to run this adapter profile: pip install "langgraph" | Add documented setup commands for each competitor adapter and store checked-in results only when those real adapters run. |
28
29
  | [LongMemEval answer generation](https://github.com/xiaowu0162/LongMemEval) | long-term-agent-memory | implemented | extractive smoke: queries 20, evidence recall@k 1.00, exact match 0.00, contains answer 0.05, token f1 0.02, avg retrieval ms 3.79, avg generation ms 0.77<br>WaveMind + qwen2.5:0.5b: queries 50, evidence recall@k 0.92, exact match 0.12, contains answer 0.18, token f1 0.18, avg retrieval ms 2.98, avg generation ms 1428.2<br>Chroma static + qwen2.5:0.5b: queries 50, evidence recall@k 0.60, exact match 0.10, contains answer 0.12, token f1 0.13, avg retrieval ms 4.10, avg generation ms 1234.7<br>Qdrant static + qwen2.5:0.5b: queries 50, evidence recall@k 0.60, exact match 0.10, contains answer 0.12, token f1 0.13, avg retrieval ms 63.8, avg generation ms 893.5<br>WaveMind + qwen2.5:1.5b: queries 50, evidence recall@k 0.92, exact match 0.24, contains answer 0.38, token f1 0.33, avg retrieval ms 2.00, avg generation ms 2153.0<br>Chroma static + qwen2.5:1.5b: queries 50, evidence recall@k 0.60, exact match 0.12, contains answer 0.16, token f1 0.17, avg retrieval ms 7.05, avg generation ms 2082.4<br>Qdrant static + qwen2.5:1.5b: queries 50, evidence recall@k 0.60, exact match 0.12, contains answer 0.16, token f1 0.17, avg retrieval ms 100.2, avg generation ms 758.1 | Run all 470 non-abstention questions with a stronger local/API model and add faithfulness/abstention scoring. |
29
30
 
@@ -1,6 +1,7 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  import argparse
4
+ from contextlib import contextmanager
4
5
  import json
5
6
  import os
6
7
  import statistics
@@ -57,11 +58,15 @@ def _recall_at_k(results: list[list[int]], expected: list[set[int]], top_k: int)
57
58
  return statistics.mean(recalls) if recalls else 0.0
58
59
 
59
60
 
60
- def _p95(values: list[float]) -> float:
61
+ def _percentile(values: list[float], pct: float) -> float:
61
62
  if not values:
62
63
  return 0.0
63
64
  ordered = sorted(values)
64
- return ordered[min(len(ordered) - 1, int(len(ordered) * 0.95))]
65
+ index = min(
66
+ len(ordered) - 1,
67
+ max(0, int(round((pct / 100.0) * (len(ordered) - 1)))),
68
+ )
69
+ return ordered[index]
65
70
 
66
71
 
67
72
  def run_wavemind_index(
@@ -87,7 +92,10 @@ def run_wavemind_index(
87
92
  "engine": f"WaveMind {kind}",
88
93
  "recall_at_k": _recall_at_k(ids, expected, top_k),
89
94
  "avg_latency_ms": statistics.mean(latencies) if latencies else 0.0,
90
- "p95_latency_ms": _p95(latencies),
95
+ "p50_latency_ms": statistics.median(latencies) if latencies else 0.0,
96
+ "p95_latency_ms": _percentile(latencies, 95),
97
+ "p99_latency_ms": _percentile(latencies, 99),
98
+ "max_latency_ms": max(latencies) if latencies else 0.0,
91
99
  "build_ms": build_ms,
92
100
  "queries": len(queries),
93
101
  }
@@ -108,21 +116,27 @@ def run_qdrant(
108
116
  url = ":memory:"
109
117
  try:
110
118
  from qdrant_client import QdrantClient
111
- from qdrant_client.models import Distance, PointStruct, VectorParams
119
+ from qdrant_client.models import Distance, PointStruct, SearchParams, VectorParams
112
120
  except ImportError as exc:
113
121
  raise RuntimeError("Install qdrant-client to run the Qdrant ANN curve") from exc
114
- if service:
115
- client = QdrantClient(url=url, api_key=os.environ.get("WAVEMIND_QDRANT_API_KEY"))
116
- engine = "Qdrant service"
117
- else:
118
- client = QdrantClient(url)
119
- engine = "Qdrant local"
122
+ with _local_no_proxy(url):
123
+ if service:
124
+ client = QdrantClient(
125
+ url=url,
126
+ api_key=os.environ.get("WAVEMIND_QDRANT_API_KEY"),
127
+ timeout=float(os.environ.get("WAVEMIND_QDRANT_TIMEOUT", "120")),
128
+ )
129
+ engine = "Qdrant service"
130
+ else:
131
+ client = QdrantClient(url)
132
+ engine = "Qdrant local"
120
133
  collection_name = f"wavemind_ann_curve_{time.time_ns()}"
121
134
  try:
122
135
  started = time.perf_counter()
123
136
  client.recreate_collection(
124
137
  collection_name=collection_name,
125
138
  vectors_config=VectorParams(size=int(vectors.shape[1]), distance=Distance.COSINE),
139
+ timeout=int(os.environ.get("WAVEMIND_QDRANT_COLLECTION_TIMEOUT", "120")),
126
140
  )
127
141
  batch = []
128
142
  for index, vector in enumerate(vectors):
@@ -137,12 +151,26 @@ def run_qdrant(
137
151
  latencies: list[float] = []
138
152
  for query in queries:
139
153
  started = time.perf_counter()
154
+ search_params = None
155
+ hnsw_ef = os.environ.get("WAVEMIND_QDRANT_HNSW_EF")
156
+ exact = os.environ.get("WAVEMIND_QDRANT_EXACT", "").lower() in {
157
+ "1",
158
+ "true",
159
+ "yes",
160
+ "on",
161
+ }
162
+ if hnsw_ef or exact:
163
+ search_params = SearchParams(
164
+ hnsw_ef=int(hnsw_ef) if hnsw_ef else None,
165
+ exact=exact or None,
166
+ )
140
167
  hits = list(
141
168
  client.query_points(
142
169
  collection_name=collection_name,
143
170
  query=query.tolist(),
144
171
  limit=top_k,
145
172
  with_payload=False,
173
+ search_params=search_params,
146
174
  ).points
147
175
  )
148
176
  latencies.append((time.perf_counter() - started) * 1000.0)
@@ -151,9 +179,16 @@ def run_qdrant(
151
179
  "engine": engine,
152
180
  "recall_at_k": _recall_at_k(ids, expected, top_k),
153
181
  "avg_latency_ms": statistics.mean(latencies) if latencies else 0.0,
154
- "p95_latency_ms": _p95(latencies),
182
+ "p50_latency_ms": statistics.median(latencies) if latencies else 0.0,
183
+ "p95_latency_ms": _percentile(latencies, 95),
184
+ "p99_latency_ms": _percentile(latencies, 99),
185
+ "max_latency_ms": max(latencies) if latencies else 0.0,
155
186
  "build_ms": build_ms,
156
187
  "queries": len(queries),
188
+ "search_params": {
189
+ "hnsw_ef": int(hnsw_ef) if hnsw_ef else None,
190
+ "exact": exact,
191
+ },
157
192
  }
158
193
  finally:
159
194
  if os.environ.get("WAVEMIND_QDRANT_KEEP_COLLECTION", "0").lower() not in {
@@ -204,7 +239,7 @@ def run_size(
204
239
  elif key in {"qdrant", "qdrant-local"}:
205
240
  try:
206
241
  results.append(run_qdrant(vectors, queries, expected, top_k, service=False))
207
- except RuntimeError as exc:
242
+ except Exception as exc:
208
243
  results.append(
209
244
  {
210
245
  "engine": "Qdrant local",
@@ -215,7 +250,7 @@ def run_size(
215
250
  elif key == "qdrant-service":
216
251
  try:
217
252
  results.append(run_qdrant(vectors, queries, expected, top_k, service=True))
218
- except RuntimeError as exc:
253
+ except Exception as exc:
219
254
  results.append(
220
255
  {
221
256
  "engine": "Qdrant service",
@@ -235,6 +270,33 @@ def run_size(
235
270
  }
236
271
 
237
272
 
273
+ @contextmanager
274
+ def _local_no_proxy(url: str):
275
+ if not any(host in url for host in ("127.0.0.1", "localhost", "::1")):
276
+ yield
277
+ return
278
+ original_no_proxy = os.environ.get("NO_PROXY")
279
+ original_no_proxy_lower = os.environ.get("no_proxy")
280
+ local_hosts = "127.0.0.1,localhost,::1"
281
+ os.environ["NO_PROXY"] = (
282
+ f"{original_no_proxy},{local_hosts}" if original_no_proxy else local_hosts
283
+ )
284
+ os.environ["no_proxy"] = (
285
+ f"{original_no_proxy_lower},{local_hosts}" if original_no_proxy_lower else local_hosts
286
+ )
287
+ try:
288
+ yield
289
+ finally:
290
+ if original_no_proxy is None:
291
+ os.environ.pop("NO_PROXY", None)
292
+ else:
293
+ os.environ["NO_PROXY"] = original_no_proxy
294
+ if original_no_proxy_lower is None:
295
+ os.environ.pop("no_proxy", None)
296
+ else:
297
+ os.environ["no_proxy"] = original_no_proxy_lower
298
+
299
+
238
300
  def run_benchmark(
239
301
  sizes: Iterable[int],
240
302
  dim: int,
@@ -276,18 +338,19 @@ def run_benchmark(
276
338
 
277
339
  def print_table(payload: dict[str, Any]) -> None:
278
340
  top_k = payload["scenario"]["top_k"]
279
- print(f"| vectors | engine | recall@{top_k} | avg latency | p95 latency | build |")
280
- print("|---:|---|---:|---:|---:|---:|")
341
+ print(f"| vectors | engine | recall@{top_k} | avg latency | p95 latency | p99 latency | build |")
342
+ print("|---:|---|---:|---:|---:|---:|---:|")
281
343
  for size_result in payload["results"]:
282
344
  for result in size_result["results"]:
283
345
  if result.get("skipped"):
284
- print(f"| {size_result['vectors']} | {result['engine']} | skipped | - | - | - |")
346
+ print(f"| {size_result['vectors']} | {result['engine']} | skipped | - | - | - | - |")
285
347
  continue
286
348
  print(
287
349
  f"| {size_result['vectors']} | {result['engine']} | "
288
350
  f"{result['recall_at_k']:.3f} | "
289
351
  f"{result['avg_latency_ms']:.2f} ms | "
290
352
  f"{result['p95_latency_ms']:.2f} ms | "
353
+ f"{result['p99_latency_ms']:.2f} ms | "
291
354
  f"{result['build_ms']:.1f} ms |"
292
355
  )
293
356
 
@@ -625,14 +625,16 @@
625
625
  "recall@10",
626
626
  "avg_latency_ms",
627
627
  "p95_latency_ms",
628
+ "p99_latency_ms",
628
629
  "build_ms"
629
630
  ],
630
631
  "current": {
631
632
  "Qdrant service": {
632
633
  "recall_at_k": 1.0,
633
- "avg_latency_ms": 10.75583399971947,
634
- "p95_latency_ms": 18.775899952743202,
635
- "build_ms": 39873.22780000977
634
+ "avg_latency_ms": 10.275224001379684,
635
+ "p95_latency_ms": 18.97340000141412,
636
+ "p99_latency_ms": 21.25629998045042,
637
+ "build_ms": 27439.262800035067
636
638
  },
637
639
  "WaveMind pgvector": {
638
640
  "recall_at_k": 0.736,
@@ -645,7 +647,7 @@
645
647
  "reason": "Set WAVEMIND_FAISS_PATH to use the persisted FAISS backend"
646
648
  }
647
649
  },
648
- "target": "Reach recall@10 >= 0.95 and avg latency < 20 ms on at least one production service backend at 100000 memories.",
650
+ "target": "Reach recall@10 >= 0.95 and p99 latency < 100 ms on at least one production service backend at 100000 memories.",
649
651
  "next_step": "Tune pgvector HNSW build/search parameters and add persisted FAISS from the Linux benchmark container."
650
652
  },
651
653
  {
@@ -653,8 +655,8 @@
653
655
  "name": "Production load profile 1M",
654
656
  "category": "production-scale",
655
657
  "status": "implemented",
656
- "source": "benchmarks/production_load_qdrant_1m_results.json",
657
- "dataset": "1000000 generated normalized 128-d vectors; Qdrant service-only recall@10/latency profile with 50 queries.",
658
+ "source": "benchmarks/production_load_qdrant_1m_tuned_results.json",
659
+ "dataset": "1000000 generated normalized 128-d vectors; Qdrant service-only recall@10/latency profile with tuned HNSW search.",
658
660
  "competitors": [
659
661
  "Qdrant service"
660
662
  ],
@@ -662,18 +664,76 @@
662
664
  "recall@10",
663
665
  "avg_latency_ms",
664
666
  "p95_latency_ms",
667
+ "p99_latency_ms",
665
668
  "build_ms"
666
669
  ],
667
670
  "current": {
668
671
  "Qdrant service": {
669
- "recall_at_k": 0.506,
670
- "avg_latency_ms": 45.80613199970685,
671
- "p95_latency_ms": 65.17819996224716,
672
- "build_ms": 563945.4993000254
672
+ "recall_at_k": 0.984,
673
+ "avg_latency_ms": 116.80030399700627,
674
+ "p95_latency_ms": 153.84380001341924,
675
+ "p99_latency_ms": 209.27749999100342,
676
+ "build_ms": 450674.6019999846
673
677
  }
674
678
  },
675
- "target": "Tune service settings until recall@10 >= 0.95 with p95 latency below 100 ms at 1M vectors.",
676
- "next_step": "Run Qdrant with tuned HNSW/search params, then add FAISS IVF/HNSW and pgvector 1M profiles on a larger disk."
679
+ "target": "Keep recall@10 >= 0.95 and push p99 latency below 100 ms at 1M vectors.",
680
+ "next_step": "Tune Qdrant indexing/search params further, then add FAISS IVF/HNSW and pgvector 1M profiles on a larger disk."
681
+ },
682
+ {
683
+ "id": "production_load_qdrant_1m_ef_sweep",
684
+ "name": "Qdrant 1M HNSW ef sweep",
685
+ "category": "production-scale",
686
+ "status": "implemented",
687
+ "source": "benchmarks/production_load_qdrant_1m_ef_sweep_results.json",
688
+ "dataset": "1000000 generated normalized 128-d vectors; one Qdrant service collection queried with multiple hnsw_ef settings.",
689
+ "competitors": [
690
+ "Qdrant service"
691
+ ],
692
+ "metrics": [
693
+ "recall@10",
694
+ "avg_latency_ms",
695
+ "p95_latency_ms",
696
+ "p99_latency_ms"
697
+ ],
698
+ "current": {
699
+ "hnsw_ef=512": {
700
+ "recall_at_k": 0.7533333333333333,
701
+ "avg_latency_ms": 47.16066332766786,
702
+ "p95_latency_ms": 68.49600002169609,
703
+ "p99_latency_ms": 68.51829995866865,
704
+ "max_latency_ms": 68.51829995866865
705
+ },
706
+ "hnsw_ef=768": {
707
+ "recall_at_k": 0.85,
708
+ "avg_latency_ms": 44.041876665626965,
709
+ "p95_latency_ms": 69.07949998276308,
710
+ "p99_latency_ms": 69.75289998808876,
711
+ "max_latency_ms": 69.75289998808876
712
+ },
713
+ "hnsw_ef=1024": {
714
+ "recall_at_k": 0.8833333333333333,
715
+ "avg_latency_ms": 62.91633000170501,
716
+ "p95_latency_ms": 81.1338999774307,
717
+ "p99_latency_ms": 85.5405000038445,
718
+ "max_latency_ms": 85.5405000038445
719
+ },
720
+ "hnsw_ef=1536": {
721
+ "recall_at_k": 0.9366666666666666,
722
+ "avg_latency_ms": 65.5569299919686,
723
+ "p95_latency_ms": 111.21890001231804,
724
+ "p99_latency_ms": 119.66809997102246,
725
+ "max_latency_ms": 119.66809997102246
726
+ },
727
+ "hnsw_ef=2048": {
728
+ "recall_at_k": 0.9766666666666667,
729
+ "avg_latency_ms": 64.75662334123626,
730
+ "p95_latency_ms": 91.18230000603944,
731
+ "p99_latency_ms": 103.77270000753924,
732
+ "max_latency_ms": 103.77270000753924
733
+ }
734
+ },
735
+ "target": "Find a setting that keeps recall@10 >= 0.95 while keeping p99 latency below 100 ms.",
736
+ "next_step": "Repeat with 100+ queries and collection-level HNSW build parameters before claiming a stable production SLO."
677
737
  },
678
738
  {
679
739
  "id": "scale_readiness",
@@ -701,20 +761,23 @@
701
761
  "nodes": 4,
702
762
  "replication_factor": 2,
703
763
  "node_loss_min_availability": 1.0,
704
- "placement_ms": 109.29749999195337
764
+ "zone_loss_min_availability": 1.0,
765
+ "read_quorum": 1,
766
+ "write_quorum": 2,
767
+ "placement_ms": 115.80999998841435
705
768
  },
706
769
  "WaveMind hot cache": {
707
770
  "queries": 2000,
708
771
  "capacity": 512,
709
772
  "hit_rate": 0.92,
710
773
  "evictions": 0,
711
- "p99_lookup_ms": 0.009100011084228754
774
+ "p99_lookup_ms": 0.003500026650726795
712
775
  },
713
776
  "WaveMind structured payloads": {
714
777
  "queries": 4,
715
778
  "precision_at_1": 1.0,
716
- "avg_latency_ms": 0.7908999978099018,
717
- "p99_latency_ms": 1.2743999832309783
779
+ "avg_latency_ms": 0.8370749856112525,
780
+ "p99_latency_ms": 1.0724000167101622
718
781
  }
719
782
  },
720
783
  "target": "Prove the production foundation before heavier 100k, 1M, and 10M vector load tests: deterministic placement, survivable replicas, hot-cache behavior, and structured payload recall.",