vectordb-bench 0.0.28__tar.gz → 0.0.30__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (191) hide show
  1. {vectordb_bench-0.0.28 → vectordb_bench-0.0.30}/PKG-INFO +56 -5
  2. {vectordb_bench-0.0.28 → vectordb_bench-0.0.30}/README.md +55 -4
  3. {vectordb_bench-0.0.28 → vectordb_bench-0.0.30}/vectordb_bench/__init__.py +3 -1
  4. {vectordb_bench-0.0.28 → vectordb_bench-0.0.30}/vectordb_bench/backend/clients/__init__.py +16 -0
  5. {vectordb_bench-0.0.28 → vectordb_bench-0.0.30}/vectordb_bench/backend/clients/aws_opensearch/aws_opensearch.py +180 -15
  6. {vectordb_bench-0.0.28 → vectordb_bench-0.0.30}/vectordb_bench/backend/clients/aws_opensearch/cli.py +51 -21
  7. {vectordb_bench-0.0.28 → vectordb_bench-0.0.30}/vectordb_bench/backend/clients/aws_opensearch/config.py +37 -14
  8. {vectordb_bench-0.0.28 → vectordb_bench-0.0.30}/vectordb_bench/backend/clients/clickhouse/cli.py +1 -0
  9. {vectordb_bench-0.0.28 → vectordb_bench-0.0.30}/vectordb_bench/backend/clients/clickhouse/clickhouse.py +3 -3
  10. {vectordb_bench-0.0.28 → vectordb_bench-0.0.30}/vectordb_bench/backend/clients/clickhouse/config.py +2 -2
  11. vectordb_bench-0.0.30/vectordb_bench/backend/clients/lancedb/cli.py +146 -0
  12. {vectordb_bench-0.0.28 → vectordb_bench-0.0.30}/vectordb_bench/backend/clients/lancedb/config.py +14 -1
  13. {vectordb_bench-0.0.28 → vectordb_bench-0.0.30}/vectordb_bench/backend/clients/lancedb/lancedb.py +21 -3
  14. {vectordb_bench-0.0.28 → vectordb_bench-0.0.30}/vectordb_bench/backend/clients/memorydb/memorydb.py +2 -2
  15. {vectordb_bench-0.0.28 → vectordb_bench-0.0.30}/vectordb_bench/backend/clients/milvus/cli.py +30 -9
  16. {vectordb_bench-0.0.28 → vectordb_bench-0.0.30}/vectordb_bench/backend/clients/milvus/config.py +2 -0
  17. {vectordb_bench-0.0.28 → vectordb_bench-0.0.30}/vectordb_bench/backend/clients/milvus/milvus.py +7 -1
  18. vectordb_bench-0.0.30/vectordb_bench/backend/clients/qdrant_cloud/cli.py +43 -0
  19. {vectordb_bench-0.0.28 → vectordb_bench-0.0.30}/vectordb_bench/backend/clients/qdrant_cloud/config.py +4 -4
  20. vectordb_bench-0.0.30/vectordb_bench/backend/clients/qdrant_local/cli.py +60 -0
  21. vectordb_bench-0.0.30/vectordb_bench/backend/clients/qdrant_local/config.py +47 -0
  22. vectordb_bench-0.0.30/vectordb_bench/backend/clients/qdrant_local/qdrant_local.py +232 -0
  23. vectordb_bench-0.0.30/vectordb_bench/backend/clients/weaviate_cloud/cli.py +66 -0
  24. {vectordb_bench-0.0.28 → vectordb_bench-0.0.30}/vectordb_bench/backend/clients/weaviate_cloud/config.py +2 -0
  25. {vectordb_bench-0.0.28 → vectordb_bench-0.0.30}/vectordb_bench/backend/clients/weaviate_cloud/weaviate_cloud.py +5 -0
  26. {vectordb_bench-0.0.28 → vectordb_bench-0.0.30}/vectordb_bench/backend/runner/mp_runner.py +16 -5
  27. {vectordb_bench-0.0.28 → vectordb_bench-0.0.30}/vectordb_bench/backend/task_runner.py +1 -0
  28. vectordb_bench-0.0.30/vectordb_bench/cli/batch_cli.py +121 -0
  29. {vectordb_bench-0.0.28 → vectordb_bench-0.0.30}/vectordb_bench/cli/cli.py +13 -2
  30. {vectordb_bench-0.0.28 → vectordb_bench-0.0.30}/vectordb_bench/cli/vectordbbench.py +6 -0
  31. vectordb_bench-0.0.30/vectordb_bench/config-files/batch_sample_config.yml +17 -0
  32. {vectordb_bench-0.0.28 → vectordb_bench-0.0.30}/vectordb_bench/frontend/components/run_test/dbConfigSetting.py +10 -4
  33. {vectordb_bench-0.0.28 → vectordb_bench-0.0.30}/vectordb_bench/frontend/config/dbCaseConfigs.py +113 -1
  34. {vectordb_bench-0.0.28 → vectordb_bench-0.0.30}/vectordb_bench/models.py +13 -0
  35. {vectordb_bench-0.0.28 → vectordb_bench-0.0.30}/vectordb_bench.egg-info/PKG-INFO +56 -5
  36. {vectordb_bench-0.0.28 → vectordb_bench-0.0.30}/vectordb_bench.egg-info/SOURCES.txt +6 -0
  37. vectordb_bench-0.0.28/vectordb_bench/backend/clients/lancedb/cli.py +0 -92
  38. vectordb_bench-0.0.28/vectordb_bench/backend/clients/weaviate_cloud/cli.py +0 -40
  39. {vectordb_bench-0.0.28 → vectordb_bench-0.0.30}/.devcontainer/Dockerfile +0 -0
  40. {vectordb_bench-0.0.28 → vectordb_bench-0.0.30}/.devcontainer/devcontainer.json +0 -0
  41. {vectordb_bench-0.0.28 → vectordb_bench-0.0.30}/.env.example +0 -0
  42. {vectordb_bench-0.0.28 → vectordb_bench-0.0.30}/.github/workflows/publish_package_on_release.yml +0 -0
  43. {vectordb_bench-0.0.28 → vectordb_bench-0.0.30}/.github/workflows/pull_request.yml +0 -0
  44. {vectordb_bench-0.0.28 → vectordb_bench-0.0.30}/.gitignore +0 -0
  45. {vectordb_bench-0.0.28 → vectordb_bench-0.0.30}/Dockerfile +0 -0
  46. {vectordb_bench-0.0.28 → vectordb_bench-0.0.30}/LICENSE +0 -0
  47. {vectordb_bench-0.0.28 → vectordb_bench-0.0.30}/Makefile +0 -0
  48. {vectordb_bench-0.0.28 → vectordb_bench-0.0.30}/OWNERS +0 -0
  49. {vectordb_bench-0.0.28 → vectordb_bench-0.0.30}/fig/custom_case_run_test.png +0 -0
  50. {vectordb_bench-0.0.28 → vectordb_bench-0.0.30}/fig/custom_dataset.png +0 -0
  51. {vectordb_bench-0.0.28 → vectordb_bench-0.0.30}/install/requirements_py3.11.txt +0 -0
  52. {vectordb_bench-0.0.28 → vectordb_bench-0.0.30}/install.py +0 -0
  53. {vectordb_bench-0.0.28 → vectordb_bench-0.0.30}/pyproject.toml +0 -0
  54. {vectordb_bench-0.0.28 → vectordb_bench-0.0.30}/setup.cfg +0 -0
  55. {vectordb_bench-0.0.28 → vectordb_bench-0.0.30}/tests/conftest.py +0 -0
  56. {vectordb_bench-0.0.28 → vectordb_bench-0.0.30}/tests/pytest.ini +0 -0
  57. {vectordb_bench-0.0.28 → vectordb_bench-0.0.30}/tests/test_bench_runner.py +0 -0
  58. {vectordb_bench-0.0.28 → vectordb_bench-0.0.30}/tests/test_chroma.py +0 -0
  59. {vectordb_bench-0.0.28 → vectordb_bench-0.0.30}/tests/test_data_source.py +0 -0
  60. {vectordb_bench-0.0.28 → vectordb_bench-0.0.30}/tests/test_dataset.py +0 -0
  61. {vectordb_bench-0.0.28 → vectordb_bench-0.0.30}/tests/test_elasticsearch_cloud.py +0 -0
  62. {vectordb_bench-0.0.28 → vectordb_bench-0.0.30}/tests/test_models.py +0 -0
  63. {vectordb_bench-0.0.28 → vectordb_bench-0.0.30}/tests/test_rate_runner.py +0 -0
  64. {vectordb_bench-0.0.28 → vectordb_bench-0.0.30}/tests/test_redis.py +0 -0
  65. {vectordb_bench-0.0.28 → vectordb_bench-0.0.30}/tests/test_utils.py +0 -0
  66. {vectordb_bench-0.0.28 → vectordb_bench-0.0.30}/tests/ut_cases.py +0 -0
  67. {vectordb_bench-0.0.28 → vectordb_bench-0.0.30}/vectordb_bench/__main__.py +0 -0
  68. {vectordb_bench-0.0.28 → vectordb_bench-0.0.30}/vectordb_bench/backend/__init__.py +0 -0
  69. {vectordb_bench-0.0.28 → vectordb_bench-0.0.30}/vectordb_bench/backend/assembler.py +0 -0
  70. {vectordb_bench-0.0.28 → vectordb_bench-0.0.30}/vectordb_bench/backend/cases.py +0 -0
  71. {vectordb_bench-0.0.28 → vectordb_bench-0.0.30}/vectordb_bench/backend/clients/aliyun_elasticsearch/aliyun_elasticsearch.py +0 -0
  72. {vectordb_bench-0.0.28 → vectordb_bench-0.0.30}/vectordb_bench/backend/clients/aliyun_elasticsearch/config.py +0 -0
  73. {vectordb_bench-0.0.28 → vectordb_bench-0.0.30}/vectordb_bench/backend/clients/aliyun_opensearch/aliyun_opensearch.py +0 -0
  74. {vectordb_bench-0.0.28 → vectordb_bench-0.0.30}/vectordb_bench/backend/clients/aliyun_opensearch/config.py +0 -0
  75. {vectordb_bench-0.0.28 → vectordb_bench-0.0.30}/vectordb_bench/backend/clients/alloydb/alloydb.py +0 -0
  76. {vectordb_bench-0.0.28 → vectordb_bench-0.0.30}/vectordb_bench/backend/clients/alloydb/cli.py +0 -0
  77. {vectordb_bench-0.0.28 → vectordb_bench-0.0.30}/vectordb_bench/backend/clients/alloydb/config.py +0 -0
  78. {vectordb_bench-0.0.28 → vectordb_bench-0.0.30}/vectordb_bench/backend/clients/api.py +0 -0
  79. {vectordb_bench-0.0.28 → vectordb_bench-0.0.30}/vectordb_bench/backend/clients/aws_opensearch/run.py +0 -0
  80. {vectordb_bench-0.0.28 → vectordb_bench-0.0.30}/vectordb_bench/backend/clients/chroma/chroma.py +0 -0
  81. {vectordb_bench-0.0.28 → vectordb_bench-0.0.30}/vectordb_bench/backend/clients/chroma/config.py +0 -0
  82. {vectordb_bench-0.0.28 → vectordb_bench-0.0.30}/vectordb_bench/backend/clients/elastic_cloud/config.py +0 -0
  83. {vectordb_bench-0.0.28 → vectordb_bench-0.0.30}/vectordb_bench/backend/clients/elastic_cloud/elastic_cloud.py +0 -0
  84. {vectordb_bench-0.0.28 → vectordb_bench-0.0.30}/vectordb_bench/backend/clients/mariadb/cli.py +0 -0
  85. {vectordb_bench-0.0.28 → vectordb_bench-0.0.30}/vectordb_bench/backend/clients/mariadb/config.py +0 -0
  86. {vectordb_bench-0.0.28 → vectordb_bench-0.0.30}/vectordb_bench/backend/clients/mariadb/mariadb.py +0 -0
  87. {vectordb_bench-0.0.28 → vectordb_bench-0.0.30}/vectordb_bench/backend/clients/memorydb/cli.py +0 -0
  88. {vectordb_bench-0.0.28 → vectordb_bench-0.0.30}/vectordb_bench/backend/clients/memorydb/config.py +0 -0
  89. {vectordb_bench-0.0.28 → vectordb_bench-0.0.30}/vectordb_bench/backend/clients/mongodb/config.py +0 -0
  90. {vectordb_bench-0.0.28 → vectordb_bench-0.0.30}/vectordb_bench/backend/clients/mongodb/mongodb.py +0 -0
  91. {vectordb_bench-0.0.28 → vectordb_bench-0.0.30}/vectordb_bench/backend/clients/pgdiskann/cli.py +0 -0
  92. {vectordb_bench-0.0.28 → vectordb_bench-0.0.30}/vectordb_bench/backend/clients/pgdiskann/config.py +0 -0
  93. {vectordb_bench-0.0.28 → vectordb_bench-0.0.30}/vectordb_bench/backend/clients/pgdiskann/pgdiskann.py +0 -0
  94. {vectordb_bench-0.0.28 → vectordb_bench-0.0.30}/vectordb_bench/backend/clients/pgvecto_rs/cli.py +0 -0
  95. {vectordb_bench-0.0.28 → vectordb_bench-0.0.30}/vectordb_bench/backend/clients/pgvecto_rs/config.py +0 -0
  96. {vectordb_bench-0.0.28 → vectordb_bench-0.0.30}/vectordb_bench/backend/clients/pgvecto_rs/pgvecto_rs.py +0 -0
  97. {vectordb_bench-0.0.28 → vectordb_bench-0.0.30}/vectordb_bench/backend/clients/pgvector/cli.py +0 -0
  98. {vectordb_bench-0.0.28 → vectordb_bench-0.0.30}/vectordb_bench/backend/clients/pgvector/config.py +0 -0
  99. {vectordb_bench-0.0.28 → vectordb_bench-0.0.30}/vectordb_bench/backend/clients/pgvector/pgvector.py +0 -0
  100. {vectordb_bench-0.0.28 → vectordb_bench-0.0.30}/vectordb_bench/backend/clients/pgvectorscale/cli.py +0 -0
  101. {vectordb_bench-0.0.28 → vectordb_bench-0.0.30}/vectordb_bench/backend/clients/pgvectorscale/config.py +0 -0
  102. {vectordb_bench-0.0.28 → vectordb_bench-0.0.30}/vectordb_bench/backend/clients/pgvectorscale/pgvectorscale.py +0 -0
  103. {vectordb_bench-0.0.28 → vectordb_bench-0.0.30}/vectordb_bench/backend/clients/pinecone/config.py +0 -0
  104. {vectordb_bench-0.0.28 → vectordb_bench-0.0.30}/vectordb_bench/backend/clients/pinecone/pinecone.py +0 -0
  105. {vectordb_bench-0.0.28 → vectordb_bench-0.0.30}/vectordb_bench/backend/clients/qdrant_cloud/qdrant_cloud.py +0 -0
  106. {vectordb_bench-0.0.28 → vectordb_bench-0.0.30}/vectordb_bench/backend/clients/redis/cli.py +0 -0
  107. {vectordb_bench-0.0.28 → vectordb_bench-0.0.30}/vectordb_bench/backend/clients/redis/config.py +0 -0
  108. {vectordb_bench-0.0.28 → vectordb_bench-0.0.30}/vectordb_bench/backend/clients/redis/redis.py +0 -0
  109. {vectordb_bench-0.0.28 → vectordb_bench-0.0.30}/vectordb_bench/backend/clients/test/cli.py +0 -0
  110. {vectordb_bench-0.0.28 → vectordb_bench-0.0.30}/vectordb_bench/backend/clients/test/config.py +0 -0
  111. {vectordb_bench-0.0.28 → vectordb_bench-0.0.30}/vectordb_bench/backend/clients/test/test.py +0 -0
  112. {vectordb_bench-0.0.28 → vectordb_bench-0.0.30}/vectordb_bench/backend/clients/tidb/cli.py +0 -0
  113. {vectordb_bench-0.0.28 → vectordb_bench-0.0.30}/vectordb_bench/backend/clients/tidb/config.py +0 -0
  114. {vectordb_bench-0.0.28 → vectordb_bench-0.0.30}/vectordb_bench/backend/clients/tidb/tidb.py +0 -0
  115. {vectordb_bench-0.0.28 → vectordb_bench-0.0.30}/vectordb_bench/backend/clients/vespa/cli.py +0 -0
  116. {vectordb_bench-0.0.28 → vectordb_bench-0.0.30}/vectordb_bench/backend/clients/vespa/config.py +0 -0
  117. {vectordb_bench-0.0.28 → vectordb_bench-0.0.30}/vectordb_bench/backend/clients/vespa/util.py +0 -0
  118. {vectordb_bench-0.0.28 → vectordb_bench-0.0.30}/vectordb_bench/backend/clients/vespa/vespa.py +0 -0
  119. {vectordb_bench-0.0.28 → vectordb_bench-0.0.30}/vectordb_bench/backend/clients/zilliz_cloud/cli.py +0 -0
  120. {vectordb_bench-0.0.28 → vectordb_bench-0.0.30}/vectordb_bench/backend/clients/zilliz_cloud/config.py +0 -0
  121. {vectordb_bench-0.0.28 → vectordb_bench-0.0.30}/vectordb_bench/backend/clients/zilliz_cloud/zilliz_cloud.py +0 -0
  122. {vectordb_bench-0.0.28 → vectordb_bench-0.0.30}/vectordb_bench/backend/data_source.py +0 -0
  123. {vectordb_bench-0.0.28 → vectordb_bench-0.0.30}/vectordb_bench/backend/dataset.py +0 -0
  124. {vectordb_bench-0.0.28 → vectordb_bench-0.0.30}/vectordb_bench/backend/result_collector.py +0 -0
  125. {vectordb_bench-0.0.28 → vectordb_bench-0.0.30}/vectordb_bench/backend/runner/__init__.py +0 -0
  126. {vectordb_bench-0.0.28 → vectordb_bench-0.0.30}/vectordb_bench/backend/runner/rate_runner.py +0 -0
  127. {vectordb_bench-0.0.28 → vectordb_bench-0.0.30}/vectordb_bench/backend/runner/read_write_runner.py +0 -0
  128. {vectordb_bench-0.0.28 → vectordb_bench-0.0.30}/vectordb_bench/backend/runner/serial_runner.py +0 -0
  129. {vectordb_bench-0.0.28 → vectordb_bench-0.0.30}/vectordb_bench/backend/runner/util.py +0 -0
  130. {vectordb_bench-0.0.28 → vectordb_bench-0.0.30}/vectordb_bench/backend/utils.py +0 -0
  131. {vectordb_bench-0.0.28 → vectordb_bench-0.0.30}/vectordb_bench/base.py +0 -0
  132. {vectordb_bench-0.0.28 → vectordb_bench-0.0.30}/vectordb_bench/cli/__init__.py +0 -0
  133. {vectordb_bench-0.0.28 → vectordb_bench-0.0.30}/vectordb_bench/config-files/sample_config.yml +0 -0
  134. {vectordb_bench-0.0.28 → vectordb_bench-0.0.30}/vectordb_bench/custom/custom_case.json +0 -0
  135. {vectordb_bench-0.0.28 → vectordb_bench-0.0.30}/vectordb_bench/frontend/components/check_results/charts.py +0 -0
  136. {vectordb_bench-0.0.28 → vectordb_bench-0.0.30}/vectordb_bench/frontend/components/check_results/data.py +0 -0
  137. {vectordb_bench-0.0.28 → vectordb_bench-0.0.30}/vectordb_bench/frontend/components/check_results/expanderStyle.py +0 -0
  138. {vectordb_bench-0.0.28 → vectordb_bench-0.0.30}/vectordb_bench/frontend/components/check_results/filters.py +0 -0
  139. {vectordb_bench-0.0.28 → vectordb_bench-0.0.30}/vectordb_bench/frontend/components/check_results/footer.py +0 -0
  140. {vectordb_bench-0.0.28 → vectordb_bench-0.0.30}/vectordb_bench/frontend/components/check_results/headerIcon.py +0 -0
  141. {vectordb_bench-0.0.28 → vectordb_bench-0.0.30}/vectordb_bench/frontend/components/check_results/nav.py +0 -0
  142. {vectordb_bench-0.0.28 → vectordb_bench-0.0.30}/vectordb_bench/frontend/components/check_results/priceTable.py +0 -0
  143. {vectordb_bench-0.0.28 → vectordb_bench-0.0.30}/vectordb_bench/frontend/components/check_results/stPageConfig.py +0 -0
  144. {vectordb_bench-0.0.28 → vectordb_bench-0.0.30}/vectordb_bench/frontend/components/concurrent/charts.py +0 -0
  145. {vectordb_bench-0.0.28 → vectordb_bench-0.0.30}/vectordb_bench/frontend/components/custom/displayCustomCase.py +0 -0
  146. {vectordb_bench-0.0.28 → vectordb_bench-0.0.30}/vectordb_bench/frontend/components/custom/displaypPrams.py +0 -0
  147. {vectordb_bench-0.0.28 → vectordb_bench-0.0.30}/vectordb_bench/frontend/components/custom/getCustomConfig.py +0 -0
  148. {vectordb_bench-0.0.28 → vectordb_bench-0.0.30}/vectordb_bench/frontend/components/custom/initStyle.py +0 -0
  149. {vectordb_bench-0.0.28 → vectordb_bench-0.0.30}/vectordb_bench/frontend/components/get_results/saveAsImage.py +0 -0
  150. {vectordb_bench-0.0.28 → vectordb_bench-0.0.30}/vectordb_bench/frontend/components/run_test/autoRefresh.py +0 -0
  151. {vectordb_bench-0.0.28 → vectordb_bench-0.0.30}/vectordb_bench/frontend/components/run_test/caseSelector.py +0 -0
  152. {vectordb_bench-0.0.28 → vectordb_bench-0.0.30}/vectordb_bench/frontend/components/run_test/dbSelector.py +0 -0
  153. {vectordb_bench-0.0.28 → vectordb_bench-0.0.30}/vectordb_bench/frontend/components/run_test/generateTasks.py +0 -0
  154. {vectordb_bench-0.0.28 → vectordb_bench-0.0.30}/vectordb_bench/frontend/components/run_test/hideSidebar.py +0 -0
  155. {vectordb_bench-0.0.28 → vectordb_bench-0.0.30}/vectordb_bench/frontend/components/run_test/initStyle.py +0 -0
  156. {vectordb_bench-0.0.28 → vectordb_bench-0.0.30}/vectordb_bench/frontend/components/run_test/submitTask.py +0 -0
  157. {vectordb_bench-0.0.28 → vectordb_bench-0.0.30}/vectordb_bench/frontend/components/tables/data.py +0 -0
  158. {vectordb_bench-0.0.28 → vectordb_bench-0.0.30}/vectordb_bench/frontend/config/dbPrices.py +0 -0
  159. {vectordb_bench-0.0.28 → vectordb_bench-0.0.30}/vectordb_bench/frontend/config/styles.py +0 -0
  160. {vectordb_bench-0.0.28 → vectordb_bench-0.0.30}/vectordb_bench/frontend/pages/concurrent.py +0 -0
  161. {vectordb_bench-0.0.28 → vectordb_bench-0.0.30}/vectordb_bench/frontend/pages/custom.py +0 -0
  162. {vectordb_bench-0.0.28 → vectordb_bench-0.0.30}/vectordb_bench/frontend/pages/quries_per_dollar.py +0 -0
  163. {vectordb_bench-0.0.28 → vectordb_bench-0.0.30}/vectordb_bench/frontend/pages/run_test.py +0 -0
  164. {vectordb_bench-0.0.28 → vectordb_bench-0.0.30}/vectordb_bench/frontend/pages/tables.py +0 -0
  165. {vectordb_bench-0.0.28 → vectordb_bench-0.0.30}/vectordb_bench/frontend/utils.py +0 -0
  166. {vectordb_bench-0.0.28 → vectordb_bench-0.0.30}/vectordb_bench/frontend/vdb_benchmark.py +0 -0
  167. {vectordb_bench-0.0.28 → vectordb_bench-0.0.30}/vectordb_bench/interface.py +0 -0
  168. {vectordb_bench-0.0.28 → vectordb_bench-0.0.30}/vectordb_bench/log_util.py +0 -0
  169. {vectordb_bench-0.0.28 → vectordb_bench-0.0.30}/vectordb_bench/metric.py +0 -0
  170. {vectordb_bench-0.0.28 → vectordb_bench-0.0.30}/vectordb_bench/results/ElasticCloud/result_20230727_standard_elasticcloud.json +0 -0
  171. {vectordb_bench-0.0.28 → vectordb_bench-0.0.30}/vectordb_bench/results/ElasticCloud/result_20230808_standard_elasticcloud.json +0 -0
  172. {vectordb_bench-0.0.28 → vectordb_bench-0.0.30}/vectordb_bench/results/Milvus/result_20230727_standard_milvus.json +0 -0
  173. {vectordb_bench-0.0.28 → vectordb_bench-0.0.30}/vectordb_bench/results/Milvus/result_20230808_standard_milvus.json +0 -0
  174. {vectordb_bench-0.0.28 → vectordb_bench-0.0.30}/vectordb_bench/results/PgVector/result_20230727_standard_pgvector.json +0 -0
  175. {vectordb_bench-0.0.28 → vectordb_bench-0.0.30}/vectordb_bench/results/PgVector/result_20230808_standard_pgvector.json +0 -0
  176. {vectordb_bench-0.0.28 → vectordb_bench-0.0.30}/vectordb_bench/results/Pinecone/result_20230727_standard_pinecone.json +0 -0
  177. {vectordb_bench-0.0.28 → vectordb_bench-0.0.30}/vectordb_bench/results/Pinecone/result_20230808_standard_pinecone.json +0 -0
  178. {vectordb_bench-0.0.28 → vectordb_bench-0.0.30}/vectordb_bench/results/QdrantCloud/result_20230727_standard_qdrantcloud.json +0 -0
  179. {vectordb_bench-0.0.28 → vectordb_bench-0.0.30}/vectordb_bench/results/QdrantCloud/result_20230808_standard_qdrantcloud.json +0 -0
  180. {vectordb_bench-0.0.28 → vectordb_bench-0.0.30}/vectordb_bench/results/WeaviateCloud/result_20230727_standard_weaviatecloud.json +0 -0
  181. {vectordb_bench-0.0.28 → vectordb_bench-0.0.30}/vectordb_bench/results/WeaviateCloud/result_20230808_standard_weaviatecloud.json +0 -0
  182. {vectordb_bench-0.0.28 → vectordb_bench-0.0.30}/vectordb_bench/results/ZillizCloud/result_20230727_standard_zillizcloud.json +0 -0
  183. {vectordb_bench-0.0.28 → vectordb_bench-0.0.30}/vectordb_bench/results/ZillizCloud/result_20230808_standard_zillizcloud.json +0 -0
  184. {vectordb_bench-0.0.28 → vectordb_bench-0.0.30}/vectordb_bench/results/ZillizCloud/result_20240105_standard_202401_zillizcloud.json +0 -0
  185. {vectordb_bench-0.0.28 → vectordb_bench-0.0.30}/vectordb_bench/results/dbPrices.json +0 -0
  186. {vectordb_bench-0.0.28 → vectordb_bench-0.0.30}/vectordb_bench/results/getLeaderboardData.py +0 -0
  187. {vectordb_bench-0.0.28 → vectordb_bench-0.0.30}/vectordb_bench/results/leaderboard.json +0 -0
  188. {vectordb_bench-0.0.28 → vectordb_bench-0.0.30}/vectordb_bench.egg-info/dependency_links.txt +0 -0
  189. {vectordb_bench-0.0.28 → vectordb_bench-0.0.30}/vectordb_bench.egg-info/entry_points.txt +0 -0
  190. {vectordb_bench-0.0.28 → vectordb_bench-0.0.30}/vectordb_bench.egg-info/requires.txt +0 -0
  191. {vectordb_bench-0.0.28 → vectordb_bench-0.0.30}/vectordb_bench.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: vectordb-bench
3
- Version: 0.0.28
3
+ Version: 0.0.30
4
4
  Summary: VectorDBBench is not just an offering of benchmark results for mainstream vector databases and cloud services, it's your go-to tool for the ultimate performance and cost-effectiveness comparison. Designed with ease-of-use in mind, VectorDBBench is devised to help users, even non-professionals, reproduce results or test new systems, making the hunt for the optimal choice amongst a plethora of cloud services and open-source vector databases a breeze.
5
5
  Author-email: XuanYang-cn <xuan.yang@zilliz.com>
6
6
  Project-URL: repository, https://github.com/zilliztech/VectorDBBench
@@ -92,13 +92,13 @@ Provides-Extra: lancedb
92
92
  Requires-Dist: lancedb; extra == "lancedb"
93
93
  Dynamic: license-file
94
94
 
95
- # VectorDBBench: A Benchmark Tool for VectorDB
95
+ # VectorDBBench(VDBBench): A Benchmark Tool for VectorDB
96
96
 
97
97
  [![version](https://img.shields.io/pypi/v/vectordb-bench.svg?color=blue)](https://pypi.org/project/vectordb-bench/)
98
98
  [![Downloads](https://pepy.tech/badge/vectordb-bench)](https://pepy.tech/project/vectordb-bench)
99
99
 
100
100
  ## What is VectorDBBench
101
- VectorDBBench is not just an offering of benchmark results for mainstream vector databases and cloud services, it's your go-to tool for the ultimate performance and cost-effectiveness comparison. Designed with ease-of-use in mind, VectorDBBench is devised to help users, even non-professionals, reproduce results or test new systems, making the hunt for the optimal choice amongst a plethora of cloud services and open-source vector databases a breeze.
101
+ VectorDBBench(VDBBench) is not just an offering of benchmark results for mainstream vector databases and cloud services, it's your go-to tool for the ultimate performance and cost-effectiveness comparison. Designed with ease-of-use in mind, VectorDBBench is devised to help users, even non-professionals, reproduce results or test new systems, making the hunt for the optimal choice amongst a plethora of cloud services and open-source vector databases a breeze.
102
102
 
103
103
  Understanding the importance of user experience, we provide an intuitive visual interface. This not only empowers users to initiate benchmarks at ease, but also to view comparative result reports, thereby reproducing benchmark results effortlessly.
104
104
  To add more relevance and practicality, we provide cost-effectiveness reports particularly for cloud services. This allows for a more realistic and applicable benchmarking process.
@@ -208,6 +208,10 @@ Options:
208
208
  --num-concurrency TEXT Comma-separated list of concurrency values
209
209
  to test during concurrent search [default:
210
210
  1,10,20]
211
+ --concurrency-timeout INTEGER Timeout (in seconds) to wait for a
212
+ concurrency slot before failing. Set to a
213
+ negative value to wait indefinitely.
214
+ [default: 3600]
211
215
  --user-name TEXT Db username [required]
212
216
  --password TEXT Db password [required]
213
217
  --host TEXT Db host [required]
@@ -291,10 +295,14 @@ Options:
291
295
  --force-merge-enabled BOOLEAN Whether to perform force merge operation
292
296
  --flush-threshold-size TEXT Size threshold for flushing the transaction
293
297
  log
298
+ --engine TEXT type of engine to use valid values [faiss, lucene]
294
299
  # Memory Management
295
300
  --cb-threshold TEXT k-NN Memory circuit breaker threshold
296
-
297
- --help Show this message and exit.```
301
+
302
+ # Quantization Type
303
+ --quantization-type TEXT which type of quantization to use valid values [fp32, fp16]
304
+ --help Show this message and exit.
305
+ ```
298
306
 
299
307
  #### Using a configuration file.
300
308
 
@@ -334,6 +342,49 @@ milvushnsw:
334
342
  > - Options passed on the command line will override the configuration file*
335
343
  > - Parameter names use an _ not -
336
344
 
345
+ #### Using a batch configuration file.
346
+
347
+ The vectordbbench command can read a batch configuration file to run all the test cases in the yaml formatted configuration file.
348
+
349
+ By default, configuration files are expected to be in vectordb_bench/config-files/, this can be overridden by setting
350
+ the environment variable CONFIG_LOCAL_DIR or by passing the full path to the file.
351
+
352
+ The required format is:
353
+ ```yaml
354
+ commandname:
355
+ - parameter_name: parameter_value
356
+ another_parameter_name: parameter_value
357
+ ```
358
+ Example:
359
+ ```yaml
360
+ pgvectorhnsw:
361
+ - db_label: pgConfigTest
362
+ user_name: vectordbbench
363
+ password: vectordbbench
364
+ db_name: vectordbbench
365
+ host: localhost
366
+ m: 16
367
+ ef_construction: 128
368
+ ef_search: 128
369
+ milvushnsw:
370
+ - skip_search_serial: True
371
+ case_type: Performance1536D50K
372
+ uri: http://localhost:19530
373
+ m: 16
374
+ ef_construction: 128
375
+ ef_search: 128
376
+ drop_old: False
377
+ load: False
378
+ ```
379
+ > Notes:
380
+ > - Options can only be passed through configuration files
381
+ > - Parameter names use an _ not -
382
+
383
+ How to use?
384
+ ```shell
385
+ vectordbbench batchcli --batch-config-file <your-yaml-configuration-file>
386
+ ```
387
+
337
388
  ## Leaderboard
338
389
  ### Introduction
339
390
  To facilitate the presentation of test results and provide a comprehensive performance analysis report, we offer a [leaderboard page](https://zilliz.com/benchmark). It allows us to choose from QPS, QP$, and latency metrics, and provides a comprehensive assessment of a system's performance based on the test results of various cases and a set of scoring mechanisms (to be introduced later). On this leaderboard, we can select the systems and models to be compared, and filter out cases we do not want to consider. Comprehensive scores are always ranked from best to worst, and the specific test results of each query will be presented in the list below.
@@ -1,10 +1,10 @@
1
- # VectorDBBench: A Benchmark Tool for VectorDB
1
+ # VectorDBBench(VDBBench): A Benchmark Tool for VectorDB
2
2
 
3
3
  [![version](https://img.shields.io/pypi/v/vectordb-bench.svg?color=blue)](https://pypi.org/project/vectordb-bench/)
4
4
  [![Downloads](https://pepy.tech/badge/vectordb-bench)](https://pepy.tech/project/vectordb-bench)
5
5
 
6
6
  ## What is VectorDBBench
7
- VectorDBBench is not just an offering of benchmark results for mainstream vector databases and cloud services, it's your go-to tool for the ultimate performance and cost-effectiveness comparison. Designed with ease-of-use in mind, VectorDBBench is devised to help users, even non-professionals, reproduce results or test new systems, making the hunt for the optimal choice amongst a plethora of cloud services and open-source vector databases a breeze.
7
+ VectorDBBench(VDBBench) is not just an offering of benchmark results for mainstream vector databases and cloud services, it's your go-to tool for the ultimate performance and cost-effectiveness comparison. Designed with ease-of-use in mind, VectorDBBench is devised to help users, even non-professionals, reproduce results or test new systems, making the hunt for the optimal choice amongst a plethora of cloud services and open-source vector databases a breeze.
8
8
 
9
9
  Understanding the importance of user experience, we provide an intuitive visual interface. This not only empowers users to initiate benchmarks at ease, but also to view comparative result reports, thereby reproducing benchmark results effortlessly.
10
10
  To add more relevance and practicality, we provide cost-effectiveness reports particularly for cloud services. This allows for a more realistic and applicable benchmarking process.
@@ -114,6 +114,10 @@ Options:
114
114
  --num-concurrency TEXT Comma-separated list of concurrency values
115
115
  to test during concurrent search [default:
116
116
  1,10,20]
117
+ --concurrency-timeout INTEGER Timeout (in seconds) to wait for a
118
+ concurrency slot before failing. Set to a
119
+ negative value to wait indefinitely.
120
+ [default: 3600]
117
121
  --user-name TEXT Db username [required]
118
122
  --password TEXT Db password [required]
119
123
  --host TEXT Db host [required]
@@ -197,10 +201,14 @@ Options:
197
201
  --force-merge-enabled BOOLEAN Whether to perform force merge operation
198
202
  --flush-threshold-size TEXT Size threshold for flushing the transaction
199
203
  log
204
+ --engine TEXT type of engine to use valid values [faiss, lucene]
200
205
  # Memory Management
201
206
  --cb-threshold TEXT k-NN Memory circuit breaker threshold
202
-
203
- --help Show this message and exit.```
207
+
208
+ # Quantization Type
209
+ --quantization-type TEXT which type of quantization to use valid values [fp32, fp16]
210
+ --help Show this message and exit.
211
+ ```
204
212
 
205
213
  #### Using a configuration file.
206
214
 
@@ -240,6 +248,49 @@ milvushnsw:
240
248
  > - Options passed on the command line will override the configuration file*
241
249
  > - Parameter names use an _ not -
242
250
 
251
+ #### Using a batch configuration file.
252
+
253
+ The vectordbbench command can read a batch configuration file to run all the test cases in the yaml formatted configuration file.
254
+
255
+ By default, configuration files are expected to be in vectordb_bench/config-files/, this can be overridden by setting
256
+ the environment variable CONFIG_LOCAL_DIR or by passing the full path to the file.
257
+
258
+ The required format is:
259
+ ```yaml
260
+ commandname:
261
+ - parameter_name: parameter_value
262
+ another_parameter_name: parameter_value
263
+ ```
264
+ Example:
265
+ ```yaml
266
+ pgvectorhnsw:
267
+ - db_label: pgConfigTest
268
+ user_name: vectordbbench
269
+ password: vectordbbench
270
+ db_name: vectordbbench
271
+ host: localhost
272
+ m: 16
273
+ ef_construction: 128
274
+ ef_search: 128
275
+ milvushnsw:
276
+ - skip_search_serial: True
277
+ case_type: Performance1536D50K
278
+ uri: http://localhost:19530
279
+ m: 16
280
+ ef_construction: 128
281
+ ef_search: 128
282
+ drop_old: False
283
+ load: False
284
+ ```
285
+ > Notes:
286
+ > - Options can only be passed through configuration files
287
+ > - Parameter names use an _ not -
288
+
289
+ How to use?
290
+ ```shell
291
+ vectordbbench batchcli --batch-config-file <your-yaml-configuration-file>
292
+ ```
293
+
243
294
  ## Leaderboard
244
295
  ### Introduction
245
296
  To facilitate the presentation of test results and provide a comprehensive performance analysis report, we offer a [leaderboard page](https://zilliz.com/benchmark). It allows us to choose from QPS, QP$, and latency metrics, and provides a comprehensive assessment of a system's performance based on the test results of various cases and a set of scoring mechanisms (to be introduced later). On this leaderboard, we can select the systems and models to be compared, and filter out cases we do not want to consider. Comprehensive scores are always ranked from best to worst, and the specific test results of each query will be presented in the list below.
@@ -6,7 +6,7 @@ import environs
6
6
  from . import log_util
7
7
 
8
8
  env = environs.Env()
9
- env.read_env(".env", False)
9
+ env.read_env(path=".env", recurse=False)
10
10
 
11
11
 
12
12
  class config:
@@ -52,6 +52,8 @@ class config:
52
52
 
53
53
  CONCURRENCY_DURATION = 30
54
54
 
55
+ CONCURRENCY_TIMEOUT = 3600
56
+
55
57
  RESULTS_LOCAL_DIR = env.path(
56
58
  "RESULTS_LOCAL_DIR",
57
59
  pathlib.Path(__file__).parent.joinpath("results"),
@@ -27,6 +27,7 @@ class DB(Enum):
27
27
  Pinecone = "Pinecone"
28
28
  ElasticCloud = "ElasticCloud"
29
29
  QdrantCloud = "QdrantCloud"
30
+ QdrantLocal = "QdrantLocal"
30
31
  WeaviateCloud = "WeaviateCloud"
31
32
  PgVector = "PgVector"
32
33
  PgVectoRS = "PgVectoRS"
@@ -75,6 +76,11 @@ class DB(Enum):
75
76
 
76
77
  return QdrantCloud
77
78
 
79
+ if self == DB.QdrantLocal:
80
+ from .qdrant_local.qdrant_local import QdrantLocal
81
+
82
+ return QdrantLocal
83
+
78
84
  if self == DB.WeaviateCloud:
79
85
  from .weaviate_cloud.weaviate_cloud import WeaviateCloud
80
86
 
@@ -201,6 +207,11 @@ class DB(Enum):
201
207
 
202
208
  return QdrantConfig
203
209
 
210
+ if self == DB.QdrantLocal:
211
+ from .qdrant_local.config import QdrantLocalConfig
212
+
213
+ return QdrantLocalConfig
214
+
204
215
  if self == DB.WeaviateCloud:
205
216
  from .weaviate_cloud.config import WeaviateConfig
206
217
 
@@ -323,6 +334,11 @@ class DB(Enum):
323
334
 
324
335
  return QdrantIndexConfig
325
336
 
337
+ if self == DB.QdrantLocal:
338
+ from .qdrant_local.config import QdrantLocalIndexConfig
339
+
340
+ return QdrantLocalIndexConfig
341
+
326
342
  if self == DB.WeaviateCloud:
327
343
  from .weaviate_cloud.config import WeaviateIndexConfig
328
344
 
@@ -36,6 +36,7 @@ class AWSOpenSearch(VectorDB):
36
36
  self.vector_col_name = vector_col_name
37
37
 
38
38
  log.info(f"AWS_OpenSearch client config: {self.db_config}")
39
+ log.info(f"AWS_OpenSearch db case config : {self.case_config}")
39
40
  client = OpenSearch(**self.db_config)
40
41
  if drop_old:
41
42
  log.info(f"AWS_OpenSearch client drop old index: {self.index_name}")
@@ -43,6 +44,14 @@ class AWSOpenSearch(VectorDB):
43
44
  if is_existed:
44
45
  client.indices.delete(index=self.index_name)
45
46
  self._create_index(client)
47
+ else:
48
+ is_existed = client.indices.exists(index=self.index_name)
49
+ if not is_existed:
50
+ self._create_index(client)
51
+ log.info(f"AWS_OpenSearch client create index: {self.index_name}")
52
+
53
+ self._update_ef_search_before_search(client)
54
+ self._load_graphs_to_memory(client)
46
55
 
47
56
  @classmethod
48
57
  def config_cls(cls) -> AWSOpenSearchConfig:
@@ -52,7 +61,17 @@ class AWSOpenSearch(VectorDB):
52
61
  def case_config_cls(cls, index_type: IndexType | None = None) -> AWSOpenSearchIndexConfig:
53
62
  return AWSOpenSearchIndexConfig
54
63
 
55
- def _create_index(self, client: OpenSearch):
64
+ def _create_index(self, client: OpenSearch) -> None:
65
+ ef_search_value = (
66
+ self.case_config.ef_search if self.case_config.ef_search is not None else self.case_config.efSearch
67
+ )
68
+ log.info(f"Creating index with ef_search: {ef_search_value}")
69
+ log.info(f"Creating index with number_of_replicas: {self.case_config.number_of_replicas}")
70
+
71
+ log.info(f"Creating index with engine: {self.case_config.engine}")
72
+ log.info(f"Creating index with metric type: {self.case_config.metric_type_name}")
73
+ log.info(f"All case_config parameters: {self.case_config.__dict__}")
74
+
56
75
  cluster_settings_body = {
57
76
  "persistent": {
58
77
  "knn.algo_param.index_thread_qty": self.case_config.index_thread_qty,
@@ -64,18 +83,15 @@ class AWSOpenSearch(VectorDB):
64
83
  "index": {
65
84
  "knn": True,
66
85
  "number_of_shards": self.case_config.number_of_shards,
67
- "number_of_replicas": 0,
86
+ "number_of_replicas": self.case_config.number_of_replicas,
68
87
  "translog.flush_threshold_size": self.case_config.flush_threshold_size,
69
- # Setting trans log threshold to 5GB
70
- **(
71
- {"knn.algo_param.ef_search": self.case_config.ef_search}
72
- if self.case_config.engine == AWSOS_Engine.nmslib
73
- else {}
74
- ),
88
+ "knn.advanced.approximate_threshold": "-1",
75
89
  },
76
90
  "refresh_interval": self.case_config.refresh_interval,
77
91
  }
92
+ settings["index"]["knn.algo_param.ef_search"] = ef_search_value
78
93
  mappings = {
94
+ "_source": {"excludes": [self.vector_col_name], "recovery_source_excludes": [self.vector_col_name]},
79
95
  "properties": {
80
96
  **{categoryCol: {"type": "keyword"} for categoryCol in self.category_col_names},
81
97
  self.vector_col_name: {
@@ -86,6 +102,8 @@ class AWSOpenSearch(VectorDB):
86
102
  },
87
103
  }
88
104
  try:
105
+ log.info(f"Creating index with settings: {settings}")
106
+ log.info(f"Creating index with mappings: {mappings}")
89
107
  client.indices.create(
90
108
  index=self.index_name,
91
109
  body={"settings": settings, "mappings": mappings},
@@ -112,6 +130,18 @@ class AWSOpenSearch(VectorDB):
112
130
  """Insert the embeddings to the opensearch."""
113
131
  assert self.client is not None, "should self.init() first"
114
132
 
133
+ num_clients = self.case_config.number_of_indexing_clients or 1
134
+ log.info(f"Number of indexing clients from case_config: {num_clients}")
135
+
136
+ if num_clients <= 1:
137
+ log.info("Using single client for data insertion")
138
+ return self._insert_with_single_client(embeddings, metadata)
139
+ log.info(f"Using {num_clients} parallel clients for data insertion")
140
+ return self._insert_with_multiple_clients(embeddings, metadata, num_clients)
141
+
142
+ def _insert_with_single_client(
143
+ self, embeddings: Iterable[list[float]], metadata: list[int]
144
+ ) -> tuple[int, Exception]:
115
145
  insert_data = []
116
146
  for i in range(len(embeddings)):
117
147
  insert_data.append(
@@ -129,7 +159,108 @@ class AWSOpenSearch(VectorDB):
129
159
  except Exception as e:
130
160
  log.warning(f"Failed to insert data: {self.index_name} error: {e!s}")
131
161
  time.sleep(10)
132
- return self.insert_embeddings(embeddings, metadata)
162
+ return self._insert_with_single_client(embeddings, metadata)
163
+
164
+ def _insert_with_multiple_clients(
165
+ self, embeddings: Iterable[list[float]], metadata: list[int], num_clients: int
166
+ ) -> tuple[int, Exception]:
167
+ import concurrent.futures
168
+ from concurrent.futures import ThreadPoolExecutor
169
+
170
+ embeddings_list = list(embeddings)
171
+ chunk_size = max(1, len(embeddings_list) // num_clients)
172
+ chunks = []
173
+
174
+ for i in range(0, len(embeddings_list), chunk_size):
175
+ end = min(i + chunk_size, len(embeddings_list))
176
+ chunks.append((embeddings_list[i:end], metadata[i:end]))
177
+
178
+ clients = []
179
+ for _ in range(min(num_clients, len(chunks))):
180
+ client = OpenSearch(**self.db_config)
181
+ clients.append(client)
182
+
183
+ log.info(f"AWS_OpenSearch using {len(clients)} parallel clients for data insertion")
184
+
185
+ def insert_chunk(client_idx: int, chunk_idx: int):
186
+ chunk_embeddings, chunk_metadata = chunks[chunk_idx]
187
+ client = clients[client_idx]
188
+
189
+ insert_data = []
190
+ for i in range(len(chunk_embeddings)):
191
+ insert_data.append(
192
+ {"index": {"_index": self.index_name, self.id_col_name: chunk_metadata[i]}},
193
+ )
194
+ insert_data.append({self.vector_col_name: chunk_embeddings[i]})
195
+
196
+ try:
197
+ resp = client.bulk(insert_data)
198
+ log.info(f"Client {client_idx} added {len(resp['items'])} documents")
199
+ return len(chunk_embeddings), None
200
+ except Exception as e:
201
+ log.warning(f"Client {client_idx} failed to insert data: {e!s}")
202
+ return 0, e
203
+
204
+ results = []
205
+ with ThreadPoolExecutor(max_workers=len(clients)) as executor:
206
+ futures = []
207
+
208
+ for chunk_idx in range(len(chunks)):
209
+ client_idx = chunk_idx % len(clients)
210
+ futures.append(executor.submit(insert_chunk, client_idx, chunk_idx))
211
+
212
+ for future in concurrent.futures.as_completed(futures):
213
+ count, error = future.result()
214
+ results.append((count, error))
215
+
216
+ from contextlib import suppress
217
+
218
+ for client in clients:
219
+ with suppress(Exception):
220
+ client.close()
221
+
222
+ total_count = sum(count for count, _ in results)
223
+ errors = [error for _, error in results if error is not None]
224
+
225
+ if errors:
226
+ log.warning("Some clients failed to insert data, retrying with single client")
227
+ time.sleep(10)
228
+ return self._insert_with_single_client(embeddings, metadata)
229
+
230
+ resp = self.client.indices.stats(self.index_name)
231
+ log.info(
232
+ f"""Total document count in index after parallel insertion:
233
+ {resp['_all']['primaries']['indexing']['index_total']}""",
234
+ )
235
+
236
+ return (total_count, None)
237
+
238
+ def _update_ef_search_before_search(self, client: OpenSearch):
239
+ ef_search_value = (
240
+ self.case_config.ef_search if self.case_config.ef_search is not None else self.case_config.efSearch
241
+ )
242
+
243
+ try:
244
+ index_settings = client.indices.get_settings(index=self.index_name)
245
+ current_ef_search = (
246
+ index_settings.get(self.index_name, {})
247
+ .get("settings", {})
248
+ .get("index", {})
249
+ .get("knn.algo_param", {})
250
+ .get("ef_search")
251
+ )
252
+
253
+ if current_ef_search != str(ef_search_value):
254
+ log.info(f"Updating ef_search before search from {current_ef_search} to {ef_search_value}")
255
+ settings_body = {"index": {"knn.algo_param.ef_search": ef_search_value}}
256
+ client.indices.put_settings(index=self.index_name, body=settings_body)
257
+ log.info(f"Successfully updated ef_search to {ef_search_value} before search")
258
+
259
+ log.info(f"Current engine: {self.case_config.engine}")
260
+ log.info(f"Current metric_type: {self.case_config.metric_type_name}")
261
+
262
+ except Exception as e:
263
+ log.warning(f"Failed to update ef_search parameter before search: {e}")
133
264
 
134
265
  def search_embedding(
135
266
  self,
@@ -151,9 +282,18 @@ class AWSOpenSearch(VectorDB):
151
282
 
152
283
  body = {
153
284
  "size": k,
154
- "query": {"knn": {self.vector_col_name: {"vector": query, "k": k}}},
285
+ "query": {
286
+ "knn": {
287
+ self.vector_col_name: {
288
+ "vector": query,
289
+ "k": k,
290
+ "method_parameters": {"ef_search": self.case_config.efSearch},
291
+ }
292
+ }
293
+ },
155
294
  **({"filter": {"range": {self.id_col_name: {"gt": filters["id"]}}}} if filters else {}),
156
295
  }
296
+
157
297
  try:
158
298
  resp = self.client.search(
159
299
  index=self.index_name,
@@ -162,6 +302,7 @@ class AWSOpenSearch(VectorDB):
162
302
  _source=False,
163
303
  docvalue_fields=[self.id_col_name],
164
304
  stored_fields="_none_",
305
+ preference="_only_local" if self.case_config.number_of_shards == 1 else None,
165
306
  )
166
307
  log.debug(f"Search took: {resp['took']}")
167
308
  log.debug(f"Search shards: {resp['_shards']}")
@@ -173,6 +314,7 @@ class AWSOpenSearch(VectorDB):
173
314
 
174
315
  def optimize(self, data_size: int | None = None):
175
316
  """optimize will be called between insertion and search in performance cases."""
317
+ self._update_ef_search()
176
318
  # Call refresh first to ensure that all segments are created
177
319
  self._refresh_index()
178
320
  if self.case_config.force_merge_enabled:
@@ -182,7 +324,22 @@ class AWSOpenSearch(VectorDB):
182
324
  # Call refresh again to ensure that the index is ready after force merge.
183
325
  self._refresh_index()
184
326
  # ensure that all graphs are loaded in memory and ready for search
185
- self._load_graphs_to_memory()
327
+ self._load_graphs_to_memory(self.client)
328
+
329
+ def _update_ef_search(self):
330
+ ef_search_value = (
331
+ self.case_config.ef_search if self.case_config.ef_search is not None else self.case_config.efSearch
332
+ )
333
+ log.info(f"Updating ef_search parameter to: {ef_search_value}")
334
+
335
+ settings_body = {"index": {"knn.algo_param.ef_search": ef_search_value}}
336
+ try:
337
+ self.client.indices.put_settings(index=self.index_name, body=settings_body)
338
+ log.info(f"Successfully updated ef_search to {ef_search_value}")
339
+ log.info(f"Current engine: {self.case_config.engine}")
340
+ log.info(f"Current metric_type: {self.case_config.metric_type}")
341
+ except Exception as e:
342
+ log.warning(f"Failed to update ef_search parameter: {e}")
186
343
 
187
344
  def _update_replicas(self):
188
345
  index_settings = self.client.indices.get_settings(index=self.index_name)
@@ -200,7 +357,7 @@ class AWSOpenSearch(VectorDB):
200
357
  while True:
201
358
  res = self.client.cat.indices(index=self.index_name, h="health", format="json")
202
359
  health = res[0]["health"]
203
- if health != "green":
360
+ if health == "green":
204
361
  break
205
362
  log.info(f"The index {self.index_name} has health : {health} and is not green. Retrying")
206
363
  time.sleep(SECONDS_WAITING_FOR_REPLICAS_TO_BE_ENABLED_SEC)
@@ -228,8 +385,16 @@ class AWSOpenSearch(VectorDB):
228
385
  "persistent": {"knn.algo_param.index_thread_qty": self.case_config.index_thread_qty_during_force_merge}
229
386
  }
230
387
  self.client.cluster.put_settings(cluster_settings_body)
388
+
389
+ log.info("Updating the graph threshold to ensure that during merge we can do graph creation.")
390
+ output = self.client.indices.put_settings(
391
+ index=self.index_name, body={"index.knn.advanced.approximate_threshold": "0"}
392
+ )
393
+ log.info(f"response of updating setting is: {output}")
394
+
231
395
  log.debug(f"Starting force merge for index {self.index_name}")
232
- force_merge_endpoint = f"/{self.index_name}/_forcemerge?max_num_segments=1&wait_for_completion=false"
396
+ segments = self.case_config.number_of_segments
397
+ force_merge_endpoint = f"/{self.index_name}/_forcemerge?max_num_segments={segments}&wait_for_completion=false"
233
398
  force_merge_task_id = self.client.transport.perform_request("POST", force_merge_endpoint)["task"]
234
399
  while True:
235
400
  time.sleep(WAITING_FOR_FORCE_MERGE_SEC)
@@ -238,8 +403,8 @@ class AWSOpenSearch(VectorDB):
238
403
  break
239
404
  log.debug(f"Completed force merge for index {self.index_name}")
240
405
 
241
- def _load_graphs_to_memory(self):
406
+ def _load_graphs_to_memory(self, client: OpenSearch):
242
407
  if self.case_config.engine != AWSOS_Engine.lucene:
243
408
  log.info("Calling warmup API to load graphs into memory")
244
409
  warmup_endpoint = f"/_plugins/_knn/warmup/{self.index_name}"
245
- self.client.transport.perform_request("GET", warmup_endpoint)
410
+ client.transport.perform_request("GET", warmup_endpoint)