vectordb-bench 0.0.20__tar.gz → 0.0.22__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (168) hide show
  1. {vectordb_bench-0.0.20 → vectordb_bench-0.0.22}/.gitignore +3 -1
  2. {vectordb_bench-0.0.20 → vectordb_bench-0.0.22}/PKG-INFO +55 -2
  3. {vectordb_bench-0.0.20 → vectordb_bench-0.0.22}/README.md +51 -0
  4. {vectordb_bench-0.0.20 → vectordb_bench-0.0.22}/install/requirements_py3.11.txt +1 -1
  5. {vectordb_bench-0.0.20 → vectordb_bench-0.0.22}/install.py +2 -1
  6. {vectordb_bench-0.0.20 → vectordb_bench-0.0.22}/pyproject.toml +3 -2
  7. {vectordb_bench-0.0.20 → vectordb_bench-0.0.22}/vectordb_bench/backend/assembler.py +2 -2
  8. {vectordb_bench-0.0.20 → vectordb_bench-0.0.22}/vectordb_bench/backend/clients/__init__.py +28 -2
  9. {vectordb_bench-0.0.20 → vectordb_bench-0.0.22}/vectordb_bench/backend/clients/aliyun_opensearch/aliyun_opensearch.py +1 -7
  10. {vectordb_bench-0.0.20 → vectordb_bench-0.0.22}/vectordb_bench/backend/clients/alloydb/alloydb.py +1 -4
  11. {vectordb_bench-0.0.20 → vectordb_bench-0.0.22}/vectordb_bench/backend/clients/api.py +8 -15
  12. {vectordb_bench-0.0.20 → vectordb_bench-0.0.22}/vectordb_bench/backend/clients/aws_opensearch/aws_opensearch.py +54 -8
  13. vectordb_bench-0.0.22/vectordb_bench/backend/clients/aws_opensearch/cli.py +125 -0
  14. {vectordb_bench-0.0.20 → vectordb_bench-0.0.22}/vectordb_bench/backend/clients/aws_opensearch/config.py +10 -0
  15. {vectordb_bench-0.0.20 → vectordb_bench-0.0.22}/vectordb_bench/backend/clients/chroma/chroma.py +1 -4
  16. {vectordb_bench-0.0.20 → vectordb_bench-0.0.22}/vectordb_bench/backend/clients/elastic_cloud/elastic_cloud.py +1 -4
  17. {vectordb_bench-0.0.20 → vectordb_bench-0.0.22}/vectordb_bench/backend/clients/memorydb/cli.py +2 -2
  18. {vectordb_bench-0.0.20 → vectordb_bench-0.0.22}/vectordb_bench/backend/clients/memorydb/memorydb.py +2 -5
  19. {vectordb_bench-0.0.20 → vectordb_bench-0.0.22}/vectordb_bench/backend/clients/milvus/milvus.py +1 -20
  20. vectordb_bench-0.0.22/vectordb_bench/backend/clients/mongodb/config.py +53 -0
  21. vectordb_bench-0.0.22/vectordb_bench/backend/clients/mongodb/mongodb.py +200 -0
  22. {vectordb_bench-0.0.20 → vectordb_bench-0.0.22}/vectordb_bench/backend/clients/pgdiskann/pgdiskann.py +1 -4
  23. {vectordb_bench-0.0.20 → vectordb_bench-0.0.22}/vectordb_bench/backend/clients/pgvecto_rs/pgvecto_rs.py +3 -11
  24. {vectordb_bench-0.0.20 → vectordb_bench-0.0.22}/vectordb_bench/backend/clients/pgvector/pgvector.py +2 -7
  25. {vectordb_bench-0.0.20 → vectordb_bench-0.0.22}/vectordb_bench/backend/clients/pgvectorscale/pgvectorscale.py +2 -7
  26. {vectordb_bench-0.0.20 → vectordb_bench-0.0.22}/vectordb_bench/backend/clients/pinecone/pinecone.py +1 -4
  27. {vectordb_bench-0.0.20 → vectordb_bench-0.0.22}/vectordb_bench/backend/clients/qdrant_cloud/qdrant_cloud.py +3 -6
  28. {vectordb_bench-0.0.20 → vectordb_bench-0.0.22}/vectordb_bench/backend/clients/redis/redis.py +1 -4
  29. {vectordb_bench-0.0.20 → vectordb_bench-0.0.22}/vectordb_bench/backend/clients/test/cli.py +1 -1
  30. {vectordb_bench-0.0.20 → vectordb_bench-0.0.22}/vectordb_bench/backend/clients/test/test.py +1 -4
  31. {vectordb_bench-0.0.20 → vectordb_bench-0.0.22}/vectordb_bench/backend/clients/weaviate_cloud/weaviate_cloud.py +1 -4
  32. {vectordb_bench-0.0.20 → vectordb_bench-0.0.22}/vectordb_bench/backend/data_source.py +4 -12
  33. {vectordb_bench-0.0.20 → vectordb_bench-0.0.22}/vectordb_bench/backend/runner/mp_runner.py +16 -34
  34. {vectordb_bench-0.0.20 → vectordb_bench-0.0.22}/vectordb_bench/backend/runner/rate_runner.py +4 -4
  35. {vectordb_bench-0.0.20 → vectordb_bench-0.0.22}/vectordb_bench/backend/runner/read_write_runner.py +11 -15
  36. {vectordb_bench-0.0.20 → vectordb_bench-0.0.22}/vectordb_bench/backend/runner/serial_runner.py +20 -28
  37. {vectordb_bench-0.0.20 → vectordb_bench-0.0.22}/vectordb_bench/backend/task_runner.py +6 -26
  38. {vectordb_bench-0.0.20 → vectordb_bench-0.0.22}/vectordb_bench/frontend/components/custom/displaypPrams.py +12 -1
  39. {vectordb_bench-0.0.20 → vectordb_bench-0.0.22}/vectordb_bench/frontend/components/run_test/submitTask.py +20 -3
  40. {vectordb_bench-0.0.20 → vectordb_bench-0.0.22}/vectordb_bench/frontend/config/dbCaseConfigs.py +32 -0
  41. {vectordb_bench-0.0.20 → vectordb_bench-0.0.22}/vectordb_bench/interface.py +10 -19
  42. {vectordb_bench-0.0.20 → vectordb_bench-0.0.22}/vectordb_bench/log_util.py +15 -2
  43. {vectordb_bench-0.0.20 → vectordb_bench-0.0.22}/vectordb_bench/models.py +4 -0
  44. {vectordb_bench-0.0.20 → vectordb_bench-0.0.22}/vectordb_bench.egg-info/PKG-INFO +55 -2
  45. {vectordb_bench-0.0.20 → vectordb_bench-0.0.22}/vectordb_bench.egg-info/SOURCES.txt +2 -0
  46. {vectordb_bench-0.0.20 → vectordb_bench-0.0.22}/vectordb_bench.egg-info/requires.txt +4 -1
  47. vectordb_bench-0.0.20/vectordb_bench/backend/clients/aws_opensearch/cli.py +0 -41
  48. {vectordb_bench-0.0.20 → vectordb_bench-0.0.22}/.devcontainer/Dockerfile +0 -0
  49. {vectordb_bench-0.0.20 → vectordb_bench-0.0.22}/.devcontainer/devcontainer.json +0 -0
  50. {vectordb_bench-0.0.20 → vectordb_bench-0.0.22}/.env.example +0 -0
  51. {vectordb_bench-0.0.20 → vectordb_bench-0.0.22}/.github/workflows/publish_package_on_release.yml +0 -0
  52. {vectordb_bench-0.0.20 → vectordb_bench-0.0.22}/.github/workflows/pull_request.yml +0 -0
  53. {vectordb_bench-0.0.20 → vectordb_bench-0.0.22}/Dockerfile +0 -0
  54. {vectordb_bench-0.0.20 → vectordb_bench-0.0.22}/LICENSE +0 -0
  55. {vectordb_bench-0.0.20 → vectordb_bench-0.0.22}/Makefile +0 -0
  56. {vectordb_bench-0.0.20 → vectordb_bench-0.0.22}/OWNERS +0 -0
  57. {vectordb_bench-0.0.20 → vectordb_bench-0.0.22}/fig/custom_case_run_test.png +0 -0
  58. {vectordb_bench-0.0.20 → vectordb_bench-0.0.22}/fig/custom_dataset.png +0 -0
  59. {vectordb_bench-0.0.20 → vectordb_bench-0.0.22}/setup.cfg +0 -0
  60. {vectordb_bench-0.0.20 → vectordb_bench-0.0.22}/tests/conftest.py +0 -0
  61. {vectordb_bench-0.0.20 → vectordb_bench-0.0.22}/tests/pytest.ini +0 -0
  62. {vectordb_bench-0.0.20 → vectordb_bench-0.0.22}/tests/test_bench_runner.py +0 -0
  63. {vectordb_bench-0.0.20 → vectordb_bench-0.0.22}/tests/test_chroma.py +0 -0
  64. {vectordb_bench-0.0.20 → vectordb_bench-0.0.22}/tests/test_data_source.py +0 -0
  65. {vectordb_bench-0.0.20 → vectordb_bench-0.0.22}/tests/test_dataset.py +0 -0
  66. {vectordb_bench-0.0.20 → vectordb_bench-0.0.22}/tests/test_elasticsearch_cloud.py +0 -0
  67. {vectordb_bench-0.0.20 → vectordb_bench-0.0.22}/tests/test_models.py +0 -0
  68. {vectordb_bench-0.0.20 → vectordb_bench-0.0.22}/tests/test_rate_runner.py +0 -0
  69. {vectordb_bench-0.0.20 → vectordb_bench-0.0.22}/tests/test_redis.py +0 -0
  70. {vectordb_bench-0.0.20 → vectordb_bench-0.0.22}/tests/test_utils.py +0 -0
  71. {vectordb_bench-0.0.20 → vectordb_bench-0.0.22}/tests/ut_cases.py +0 -0
  72. {vectordb_bench-0.0.20 → vectordb_bench-0.0.22}/vectordb_bench/__init__.py +0 -0
  73. {vectordb_bench-0.0.20 → vectordb_bench-0.0.22}/vectordb_bench/__main__.py +0 -0
  74. {vectordb_bench-0.0.20 → vectordb_bench-0.0.22}/vectordb_bench/backend/__init__.py +0 -0
  75. {vectordb_bench-0.0.20 → vectordb_bench-0.0.22}/vectordb_bench/backend/cases.py +0 -0
  76. {vectordb_bench-0.0.20 → vectordb_bench-0.0.22}/vectordb_bench/backend/clients/aliyun_elasticsearch/aliyun_elasticsearch.py +0 -0
  77. {vectordb_bench-0.0.20 → vectordb_bench-0.0.22}/vectordb_bench/backend/clients/aliyun_elasticsearch/config.py +0 -0
  78. {vectordb_bench-0.0.20 → vectordb_bench-0.0.22}/vectordb_bench/backend/clients/aliyun_opensearch/config.py +0 -0
  79. {vectordb_bench-0.0.20 → vectordb_bench-0.0.22}/vectordb_bench/backend/clients/alloydb/cli.py +0 -0
  80. {vectordb_bench-0.0.20 → vectordb_bench-0.0.22}/vectordb_bench/backend/clients/alloydb/config.py +0 -0
  81. {vectordb_bench-0.0.20 → vectordb_bench-0.0.22}/vectordb_bench/backend/clients/aws_opensearch/run.py +0 -0
  82. {vectordb_bench-0.0.20 → vectordb_bench-0.0.22}/vectordb_bench/backend/clients/chroma/config.py +0 -0
  83. {vectordb_bench-0.0.20 → vectordb_bench-0.0.22}/vectordb_bench/backend/clients/elastic_cloud/config.py +0 -0
  84. {vectordb_bench-0.0.20 → vectordb_bench-0.0.22}/vectordb_bench/backend/clients/memorydb/config.py +0 -0
  85. {vectordb_bench-0.0.20 → vectordb_bench-0.0.22}/vectordb_bench/backend/clients/milvus/cli.py +0 -0
  86. {vectordb_bench-0.0.20 → vectordb_bench-0.0.22}/vectordb_bench/backend/clients/milvus/config.py +0 -0
  87. {vectordb_bench-0.0.20 → vectordb_bench-0.0.22}/vectordb_bench/backend/clients/pgdiskann/cli.py +0 -0
  88. {vectordb_bench-0.0.20 → vectordb_bench-0.0.22}/vectordb_bench/backend/clients/pgdiskann/config.py +0 -0
  89. {vectordb_bench-0.0.20 → vectordb_bench-0.0.22}/vectordb_bench/backend/clients/pgvecto_rs/cli.py +0 -0
  90. {vectordb_bench-0.0.20 → vectordb_bench-0.0.22}/vectordb_bench/backend/clients/pgvecto_rs/config.py +0 -0
  91. {vectordb_bench-0.0.20 → vectordb_bench-0.0.22}/vectordb_bench/backend/clients/pgvector/cli.py +0 -0
  92. {vectordb_bench-0.0.20 → vectordb_bench-0.0.22}/vectordb_bench/backend/clients/pgvector/config.py +0 -0
  93. {vectordb_bench-0.0.20 → vectordb_bench-0.0.22}/vectordb_bench/backend/clients/pgvectorscale/cli.py +0 -0
  94. {vectordb_bench-0.0.20 → vectordb_bench-0.0.22}/vectordb_bench/backend/clients/pgvectorscale/config.py +0 -0
  95. {vectordb_bench-0.0.20 → vectordb_bench-0.0.22}/vectordb_bench/backend/clients/pinecone/config.py +0 -0
  96. {vectordb_bench-0.0.20 → vectordb_bench-0.0.22}/vectordb_bench/backend/clients/qdrant_cloud/config.py +0 -0
  97. {vectordb_bench-0.0.20 → vectordb_bench-0.0.22}/vectordb_bench/backend/clients/redis/cli.py +0 -0
  98. {vectordb_bench-0.0.20 → vectordb_bench-0.0.22}/vectordb_bench/backend/clients/redis/config.py +0 -0
  99. {vectordb_bench-0.0.20 → vectordb_bench-0.0.22}/vectordb_bench/backend/clients/test/config.py +0 -0
  100. {vectordb_bench-0.0.20 → vectordb_bench-0.0.22}/vectordb_bench/backend/clients/weaviate_cloud/cli.py +0 -0
  101. {vectordb_bench-0.0.20 → vectordb_bench-0.0.22}/vectordb_bench/backend/clients/weaviate_cloud/config.py +0 -0
  102. {vectordb_bench-0.0.20 → vectordb_bench-0.0.22}/vectordb_bench/backend/clients/zilliz_cloud/cli.py +0 -0
  103. {vectordb_bench-0.0.20 → vectordb_bench-0.0.22}/vectordb_bench/backend/clients/zilliz_cloud/config.py +0 -0
  104. {vectordb_bench-0.0.20 → vectordb_bench-0.0.22}/vectordb_bench/backend/clients/zilliz_cloud/zilliz_cloud.py +0 -0
  105. {vectordb_bench-0.0.20 → vectordb_bench-0.0.22}/vectordb_bench/backend/dataset.py +0 -0
  106. {vectordb_bench-0.0.20 → vectordb_bench-0.0.22}/vectordb_bench/backend/result_collector.py +0 -0
  107. {vectordb_bench-0.0.20 → vectordb_bench-0.0.22}/vectordb_bench/backend/runner/__init__.py +0 -0
  108. {vectordb_bench-0.0.20 → vectordb_bench-0.0.22}/vectordb_bench/backend/runner/util.py +0 -0
  109. {vectordb_bench-0.0.20 → vectordb_bench-0.0.22}/vectordb_bench/backend/utils.py +0 -0
  110. {vectordb_bench-0.0.20 → vectordb_bench-0.0.22}/vectordb_bench/base.py +0 -0
  111. {vectordb_bench-0.0.20 → vectordb_bench-0.0.22}/vectordb_bench/cli/__init__.py +0 -0
  112. {vectordb_bench-0.0.20 → vectordb_bench-0.0.22}/vectordb_bench/cli/cli.py +0 -0
  113. {vectordb_bench-0.0.20 → vectordb_bench-0.0.22}/vectordb_bench/cli/vectordbbench.py +0 -0
  114. {vectordb_bench-0.0.20 → vectordb_bench-0.0.22}/vectordb_bench/config-files/sample_config.yml +0 -0
  115. {vectordb_bench-0.0.20 → vectordb_bench-0.0.22}/vectordb_bench/custom/custom_case.json +0 -0
  116. {vectordb_bench-0.0.20 → vectordb_bench-0.0.22}/vectordb_bench/frontend/components/check_results/charts.py +0 -0
  117. {vectordb_bench-0.0.20 → vectordb_bench-0.0.22}/vectordb_bench/frontend/components/check_results/data.py +0 -0
  118. {vectordb_bench-0.0.20 → vectordb_bench-0.0.22}/vectordb_bench/frontend/components/check_results/expanderStyle.py +0 -0
  119. {vectordb_bench-0.0.20 → vectordb_bench-0.0.22}/vectordb_bench/frontend/components/check_results/filters.py +0 -0
  120. {vectordb_bench-0.0.20 → vectordb_bench-0.0.22}/vectordb_bench/frontend/components/check_results/footer.py +0 -0
  121. {vectordb_bench-0.0.20 → vectordb_bench-0.0.22}/vectordb_bench/frontend/components/check_results/headerIcon.py +0 -0
  122. {vectordb_bench-0.0.20 → vectordb_bench-0.0.22}/vectordb_bench/frontend/components/check_results/nav.py +0 -0
  123. {vectordb_bench-0.0.20 → vectordb_bench-0.0.22}/vectordb_bench/frontend/components/check_results/priceTable.py +0 -0
  124. {vectordb_bench-0.0.20 → vectordb_bench-0.0.22}/vectordb_bench/frontend/components/check_results/stPageConfig.py +0 -0
  125. {vectordb_bench-0.0.20 → vectordb_bench-0.0.22}/vectordb_bench/frontend/components/concurrent/charts.py +0 -0
  126. {vectordb_bench-0.0.20 → vectordb_bench-0.0.22}/vectordb_bench/frontend/components/custom/displayCustomCase.py +0 -0
  127. {vectordb_bench-0.0.20 → vectordb_bench-0.0.22}/vectordb_bench/frontend/components/custom/getCustomConfig.py +0 -0
  128. {vectordb_bench-0.0.20 → vectordb_bench-0.0.22}/vectordb_bench/frontend/components/custom/initStyle.py +0 -0
  129. {vectordb_bench-0.0.20 → vectordb_bench-0.0.22}/vectordb_bench/frontend/components/get_results/saveAsImage.py +0 -0
  130. {vectordb_bench-0.0.20 → vectordb_bench-0.0.22}/vectordb_bench/frontend/components/run_test/autoRefresh.py +0 -0
  131. {vectordb_bench-0.0.20 → vectordb_bench-0.0.22}/vectordb_bench/frontend/components/run_test/caseSelector.py +0 -0
  132. {vectordb_bench-0.0.20 → vectordb_bench-0.0.22}/vectordb_bench/frontend/components/run_test/dbConfigSetting.py +0 -0
  133. {vectordb_bench-0.0.20 → vectordb_bench-0.0.22}/vectordb_bench/frontend/components/run_test/dbSelector.py +0 -0
  134. {vectordb_bench-0.0.20 → vectordb_bench-0.0.22}/vectordb_bench/frontend/components/run_test/generateTasks.py +0 -0
  135. {vectordb_bench-0.0.20 → vectordb_bench-0.0.22}/vectordb_bench/frontend/components/run_test/hideSidebar.py +0 -0
  136. {vectordb_bench-0.0.20 → vectordb_bench-0.0.22}/vectordb_bench/frontend/components/run_test/initStyle.py +0 -0
  137. {vectordb_bench-0.0.20 → vectordb_bench-0.0.22}/vectordb_bench/frontend/components/tables/data.py +0 -0
  138. {vectordb_bench-0.0.20 → vectordb_bench-0.0.22}/vectordb_bench/frontend/config/dbPrices.py +0 -0
  139. {vectordb_bench-0.0.20 → vectordb_bench-0.0.22}/vectordb_bench/frontend/config/styles.py +0 -0
  140. {vectordb_bench-0.0.20 → vectordb_bench-0.0.22}/vectordb_bench/frontend/pages/concurrent.py +0 -0
  141. {vectordb_bench-0.0.20 → vectordb_bench-0.0.22}/vectordb_bench/frontend/pages/custom.py +0 -0
  142. {vectordb_bench-0.0.20 → vectordb_bench-0.0.22}/vectordb_bench/frontend/pages/quries_per_dollar.py +0 -0
  143. {vectordb_bench-0.0.20 → vectordb_bench-0.0.22}/vectordb_bench/frontend/pages/run_test.py +0 -0
  144. {vectordb_bench-0.0.20 → vectordb_bench-0.0.22}/vectordb_bench/frontend/pages/tables.py +0 -0
  145. {vectordb_bench-0.0.20 → vectordb_bench-0.0.22}/vectordb_bench/frontend/utils.py +0 -0
  146. {vectordb_bench-0.0.20 → vectordb_bench-0.0.22}/vectordb_bench/frontend/vdb_benchmark.py +0 -0
  147. {vectordb_bench-0.0.20 → vectordb_bench-0.0.22}/vectordb_bench/metric.py +0 -0
  148. {vectordb_bench-0.0.20 → vectordb_bench-0.0.22}/vectordb_bench/results/ElasticCloud/result_20230727_standard_elasticcloud.json +0 -0
  149. {vectordb_bench-0.0.20 → vectordb_bench-0.0.22}/vectordb_bench/results/ElasticCloud/result_20230808_standard_elasticcloud.json +0 -0
  150. {vectordb_bench-0.0.20 → vectordb_bench-0.0.22}/vectordb_bench/results/Milvus/result_20230727_standard_milvus.json +0 -0
  151. {vectordb_bench-0.0.20 → vectordb_bench-0.0.22}/vectordb_bench/results/Milvus/result_20230808_standard_milvus.json +0 -0
  152. {vectordb_bench-0.0.20 → vectordb_bench-0.0.22}/vectordb_bench/results/PgVector/result_20230727_standard_pgvector.json +0 -0
  153. {vectordb_bench-0.0.20 → vectordb_bench-0.0.22}/vectordb_bench/results/PgVector/result_20230808_standard_pgvector.json +0 -0
  154. {vectordb_bench-0.0.20 → vectordb_bench-0.0.22}/vectordb_bench/results/Pinecone/result_20230727_standard_pinecone.json +0 -0
  155. {vectordb_bench-0.0.20 → vectordb_bench-0.0.22}/vectordb_bench/results/Pinecone/result_20230808_standard_pinecone.json +0 -0
  156. {vectordb_bench-0.0.20 → vectordb_bench-0.0.22}/vectordb_bench/results/QdrantCloud/result_20230727_standard_qdrantcloud.json +0 -0
  157. {vectordb_bench-0.0.20 → vectordb_bench-0.0.22}/vectordb_bench/results/QdrantCloud/result_20230808_standard_qdrantcloud.json +0 -0
  158. {vectordb_bench-0.0.20 → vectordb_bench-0.0.22}/vectordb_bench/results/WeaviateCloud/result_20230727_standard_weaviatecloud.json +0 -0
  159. {vectordb_bench-0.0.20 → vectordb_bench-0.0.22}/vectordb_bench/results/WeaviateCloud/result_20230808_standard_weaviatecloud.json +0 -0
  160. {vectordb_bench-0.0.20 → vectordb_bench-0.0.22}/vectordb_bench/results/ZillizCloud/result_20230727_standard_zillizcloud.json +0 -0
  161. {vectordb_bench-0.0.20 → vectordb_bench-0.0.22}/vectordb_bench/results/ZillizCloud/result_20230808_standard_zillizcloud.json +0 -0
  162. {vectordb_bench-0.0.20 → vectordb_bench-0.0.22}/vectordb_bench/results/ZillizCloud/result_20240105_standard_202401_zillizcloud.json +0 -0
  163. {vectordb_bench-0.0.20 → vectordb_bench-0.0.22}/vectordb_bench/results/dbPrices.json +0 -0
  164. {vectordb_bench-0.0.20 → vectordb_bench-0.0.22}/vectordb_bench/results/getLeaderboardData.py +0 -0
  165. {vectordb_bench-0.0.20 → vectordb_bench-0.0.22}/vectordb_bench/results/leaderboard.json +0 -0
  166. {vectordb_bench-0.0.20 → vectordb_bench-0.0.22}/vectordb_bench.egg-info/dependency_links.txt +0 -0
  167. {vectordb_bench-0.0.20 → vectordb_bench-0.0.22}/vectordb_bench.egg-info/entry_points.txt +0 -0
  168. {vectordb_bench-0.0.20 → vectordb_bench-0.0.22}/vectordb_bench.egg-info/top_level.txt +0 -0
@@ -8,5 +8,7 @@ __MACOSX
8
8
  .DS_Store
9
9
  build/
10
10
  venv/
11
+ .venv/
11
12
  .idea/
12
- results/
13
+ results/
14
+ logs/
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: vectordb-bench
3
- Version: 0.0.20
3
+ Version: 0.0.22
4
4
  Summary: VectorDBBench is not just an offering of benchmark results for mainstream vector databases and cloud services, it's your go-to tool for the ultimate performance and cost-effectiveness comparison. Designed with ease-of-use in mind, VectorDBBench is devised to help users, even non-professionals, reproduce results or test new systems, making the hunt for the optimal choice amongst a plethora of cloud services and open-source vector databases a breeze.
5
5
  Author-email: XuanYang-cn <xuan.yang@zilliz.com>
6
6
  Project-URL: repository, https://github.com/zilliztech/VectorDBBench
@@ -21,7 +21,7 @@ Requires-Dist: oss2
21
21
  Requires-Dist: psutil
22
22
  Requires-Dist: polars
23
23
  Requires-Dist: plotly
24
- Requires-Dist: environs
24
+ Requires-Dist: environs<14.1.0
25
25
  Requires-Dist: pydantic<v2
26
26
  Requires-Dist: scikit-learn
27
27
  Requires-Dist: pymilvus
@@ -73,6 +73,8 @@ Requires-Dist: opensearch-py; extra == "opensearch"
73
73
  Provides-Extra: aliyun-opensearch
74
74
  Requires-Dist: alibabacloud_ha3engine_vector; extra == "aliyun-opensearch"
75
75
  Requires-Dist: alibabacloud_searchengine20211025; extra == "aliyun-opensearch"
76
+ Provides-Extra: mongodb
77
+ Requires-Dist: pymongo; extra == "mongodb"
76
78
 
77
79
  # VectorDBBench: A Benchmark Tool for VectorDB
78
80
 
@@ -89,6 +91,8 @@ Closely mimicking real-world production environments, we've set up diverse testi
89
91
 
90
92
  Prepare to delve into the world of VectorDBBench, and let it guide you in uncovering your perfect vector database match.
91
93
 
94
+ VectorDBBench is sponsered by Zilliz,the leading opensource vectorDB company behind Milvus. Choose smarter with VectorDBBench- start your free test on [zilliz cloud](https://zilliz.com/) today!
95
+
92
96
  **Leaderboard:** https://zilliz.com/benchmark
93
97
  ## Quick Start
94
98
  ### Prerequirement
@@ -128,6 +132,7 @@ All the database client supported
128
132
  | chromadb | `pip install vectordb-bench[chromadb]` |
129
133
  | awsopensearch | `pip install vectordb-bench[opensearch]` |
130
134
  | aliyun_opensearch | `pip install vectordb-bench[aliyun_opensearch]` |
135
+ | mongodb | `pip install vectordb-bench[mongodb]` |
131
136
 
132
137
  ### Run
133
138
 
@@ -228,6 +233,47 @@ Options:
228
233
  with-gt]
229
234
  --help Show this message and exit.
230
235
  ```
236
+
237
+ ### Run awsopensearch from command line
238
+
239
+ ```shell
240
+ vectordbbench awsopensearch --db-label awsopensearch \
241
+ --m 16 --ef-construction 256 \
242
+ --host search-vector-db-prod-h4f6m4of6x7yp2rz7gdmots7w4.us-west-2.es.amazonaws.com --port 443 \
243
+ --user vector --password '<password>' \
244
+ --case-type Performance1536D5M --num-insert-workers 10 \
245
+ --skip-load --num-concurrency 75
246
+ ```
247
+
248
+ To list the options for awsopensearch, execute `vectordbbench awsopensearch --help`
249
+
250
+ ```text
251
+ $ vectordbbench awsopensearch --help
252
+ Usage: vectordbbench awsopensearch [OPTIONS]
253
+
254
+ Options:
255
+ # Sharding and Replication
256
+ --number-of-shards INTEGER Number of primary shards for the index
257
+ --number-of-replicas INTEGER Number of replica copies for each primary
258
+ shard
259
+ # Indexing Performance
260
+ --index-thread-qty INTEGER Thread count for native engine indexing
261
+ --index-thread-qty-during-force-merge INTEGER
262
+ Thread count during force merge operations
263
+ --number-of-indexing-clients INTEGER
264
+ Number of concurrent indexing clients
265
+ # Index Management
266
+ --number-of-segments INTEGER Target number of segments after merging
267
+ --refresh-interval TEXT How often to make new data available for
268
+ search
269
+ --force-merge-enabled BOOLEAN Whether to perform force merge operation
270
+ --flush-threshold-size TEXT Size threshold for flushing the transaction
271
+ log
272
+ # Memory Management
273
+ --cb-threshold TEXT k-NN Memory circuit breaker threshold
274
+
275
+ --help Show this message and exit.```
276
+
231
277
  #### Using a configuration file.
232
278
 
233
279
  The vectordbbench command can optionally read some or all the options from a yaml formatted configuration file.
@@ -394,6 +440,13 @@ We have strict requirements for the data set format, please follow them.
394
440
  - `Folder Path` - The path to the folder containing all the files. Please ensure that all files in the folder are in the `Parquet` format.
395
441
  - Vectors data files: The file must be named `train.parquet` and should have two columns: `id` as an incrementing `int` and `emb` as an array of `float32`.
396
442
  - Query test vectors: The file must be named `test.parquet` and should have two columns: `id` as an incrementing `int` and `emb` as an array of `float32`.
443
+ - We recommend limiting the number of test query vectors, like 1,000.
444
+ When conducting concurrent query tests, Vdbbench creates a large number of processes.
445
+ To minimize additional communication overhead during testing,
446
+ we prepare a complete set of test queries for each process, allowing them to run independently.
447
+ However, this means that as the number of concurrent processes increases,
448
+ the number of copied query vectors also increases significantly,
449
+ which can place substantial pressure on memory resources.
397
450
  - Ground truth file: The file must be named `neighbors.parquet` and should have two columns: `id` corresponding to query vectors and `neighbors_id` as an array of `int`.
398
451
 
399
452
  - `Train File Count` - If the vector file is too large, you can consider splitting it into multiple files. The naming format for the split files should be `train-[index]-of-[file_count].parquet`. For example, `train-01-of-10.parquet` represents the second file (0-indexed) among 10 split files.
@@ -13,6 +13,8 @@ Closely mimicking real-world production environments, we've set up diverse testi
13
13
 
14
14
  Prepare to delve into the world of VectorDBBench, and let it guide you in uncovering your perfect vector database match.
15
15
 
16
+ VectorDBBench is sponsered by Zilliz,the leading opensource vectorDB company behind Milvus. Choose smarter with VectorDBBench- start your free test on [zilliz cloud](https://zilliz.com/) today!
17
+
16
18
  **Leaderboard:** https://zilliz.com/benchmark
17
19
  ## Quick Start
18
20
  ### Prerequirement
@@ -52,6 +54,7 @@ All the database client supported
52
54
  | chromadb | `pip install vectordb-bench[chromadb]` |
53
55
  | awsopensearch | `pip install vectordb-bench[opensearch]` |
54
56
  | aliyun_opensearch | `pip install vectordb-bench[aliyun_opensearch]` |
57
+ | mongodb | `pip install vectordb-bench[mongodb]` |
55
58
 
56
59
  ### Run
57
60
 
@@ -152,6 +155,47 @@ Options:
152
155
  with-gt]
153
156
  --help Show this message and exit.
154
157
  ```
158
+
159
+ ### Run awsopensearch from command line
160
+
161
+ ```shell
162
+ vectordbbench awsopensearch --db-label awsopensearch \
163
+ --m 16 --ef-construction 256 \
164
+ --host search-vector-db-prod-h4f6m4of6x7yp2rz7gdmots7w4.us-west-2.es.amazonaws.com --port 443 \
165
+ --user vector --password '<password>' \
166
+ --case-type Performance1536D5M --num-insert-workers 10 \
167
+ --skip-load --num-concurrency 75
168
+ ```
169
+
170
+ To list the options for awsopensearch, execute `vectordbbench awsopensearch --help`
171
+
172
+ ```text
173
+ $ vectordbbench awsopensearch --help
174
+ Usage: vectordbbench awsopensearch [OPTIONS]
175
+
176
+ Options:
177
+ # Sharding and Replication
178
+ --number-of-shards INTEGER Number of primary shards for the index
179
+ --number-of-replicas INTEGER Number of replica copies for each primary
180
+ shard
181
+ # Indexing Performance
182
+ --index-thread-qty INTEGER Thread count for native engine indexing
183
+ --index-thread-qty-during-force-merge INTEGER
184
+ Thread count during force merge operations
185
+ --number-of-indexing-clients INTEGER
186
+ Number of concurrent indexing clients
187
+ # Index Management
188
+ --number-of-segments INTEGER Target number of segments after merging
189
+ --refresh-interval TEXT How often to make new data available for
190
+ search
191
+ --force-merge-enabled BOOLEAN Whether to perform force merge operation
192
+ --flush-threshold-size TEXT Size threshold for flushing the transaction
193
+ log
194
+ # Memory Management
195
+ --cb-threshold TEXT k-NN Memory circuit breaker threshold
196
+
197
+ --help Show this message and exit.```
198
+
155
199
  #### Using a configuration file.
156
200
 
157
201
  The vectordbbench command can optionally read some or all the options from a yaml formatted configuration file.
@@ -318,6 +362,13 @@ We have strict requirements for the data set format, please follow them.
318
362
  - `Folder Path` - The path to the folder containing all the files. Please ensure that all files in the folder are in the `Parquet` format.
319
363
  - Vectors data files: The file must be named `train.parquet` and should have two columns: `id` as an incrementing `int` and `emb` as an array of `float32`.
320
364
  - Query test vectors: The file must be named `test.parquet` and should have two columns: `id` as an incrementing `int` and `emb` as an array of `float32`.
365
+ - We recommend limiting the number of test query vectors, like 1,000.
366
+ When conducting concurrent query tests, Vdbbench creates a large number of processes.
367
+ To minimize additional communication overhead during testing,
368
+ we prepare a complete set of test queries for each process, allowing them to run independently.
369
+ However, this means that as the number of concurrent processes increases,
370
+ the number of copied query vectors also increases significantly,
371
+ which can place substantial pressure on memory resources.
321
372
  - Ground truth file: The file must be named `neighbors.parquet` and should have two columns: `id` corresponding to query vectors and `neighbors_id` as an array of `int`.
322
373
 
323
374
  - `Train File Count` - If the vector file is too large, you can consider splitting it into multiple files. The naming format for the split files should be `train-[index]-of-[file_count].parquet`. For example, `train-01-of-10.parquet` represents the second file (0-indexed) among 10 split files.
@@ -1,4 +1,4 @@
1
- grpcio==1.53.0
1
+ grpcio==1.53.2
2
2
  grpcio-tools==1.53.0
3
3
  qdrant-client
4
4
  pinecone-client
@@ -1,7 +1,8 @@
1
- import os
2
1
  import argparse
2
+ import os
3
3
  import subprocess
4
4
 
5
+
5
6
  def docker_tag_base():
6
7
  return 'vdbbench'
7
8
 
@@ -35,7 +35,7 @@ dependencies = [
35
35
  "psutil",
36
36
  "polars",
37
37
  "plotly",
38
- "environs",
38
+ "environs<14.1.0",
39
39
  "pydantic<v2",
40
40
  "scikit-learn",
41
41
  "pymilvus", # with pandas, numpy, ujson
@@ -85,6 +85,7 @@ memorydb = [ "memorydb" ]
85
85
  chromadb = [ "chromadb" ]
86
86
  opensearch = [ "opensearch-py" ]
87
87
  aliyun_opensearch = [ "alibabacloud_ha3engine_vector", "alibabacloud_searchengine20211025"]
88
+ mongodb = [ "pymongo" ]
88
89
 
89
90
  [project.urls]
90
91
  "repository" = "https://github.com/zilliztech/VectorDBBench"
@@ -133,6 +134,7 @@ lint.ignore = [
133
134
  "RUF017",
134
135
  "C416",
135
136
  "PLW0603",
137
+ "COM812",
136
138
  ]
137
139
 
138
140
  # Allow autofix for all enabled rules (when `--fix`) is provided.
@@ -206,4 +208,3 @@ builtins-ignorelist = [
206
208
  # "dict", # TODO
207
209
  # "filter",
208
210
  ]
209
-
@@ -53,8 +53,8 @@ class Assembler:
53
53
  _ = k.init_cls
54
54
 
55
55
  # sort by dataset size
56
- for k, _ in db2runner:
57
- db2runner[k].sort(key=lambda x: x.ca.dataset.data.size)
56
+ for _, runner in db2runner.items():
57
+ runner.sort(key=lambda x: x.ca.dataset.data.size)
58
58
 
59
59
  all_runners = []
60
60
  all_runners.extend(load_runners)
@@ -40,9 +40,10 @@ class DB(Enum):
40
40
  AliyunElasticsearch = "AliyunElasticsearch"
41
41
  Test = "test"
42
42
  AliyunOpenSearch = "AliyunOpenSearch"
43
+ MongoDB = "MongoDB"
43
44
 
44
45
  @property
45
- def init_cls(self) -> type[VectorDB]: # noqa: PLR0911, PLR0912
46
+ def init_cls(self) -> type[VectorDB]: # noqa: PLR0911, PLR0912, C901
46
47
  """Import while in use"""
47
48
  if self == DB.Milvus:
48
49
  from .milvus.milvus import Milvus
@@ -129,11 +130,21 @@ class DB(Enum):
129
130
 
130
131
  return AliyunOpenSearch
131
132
 
133
+ if self == DB.MongoDB:
134
+ from .mongodb.mongodb import MongoDB
135
+
136
+ return MongoDB
137
+
138
+ if self == DB.Test:
139
+ from .test.test import Test
140
+
141
+ return Test
142
+
132
143
  msg = f"Unknown DB: {self.name}"
133
144
  raise ValueError(msg)
134
145
 
135
146
  @property
136
- def config_cls(self) -> type[DBConfig]: # noqa: PLR0911, PLR0912
147
+ def config_cls(self) -> type[DBConfig]: # noqa: PLR0911, PLR0912, C901
137
148
  """Import while in use"""
138
149
  if self == DB.Milvus:
139
150
  from .milvus.config import MilvusConfig
@@ -220,6 +231,16 @@ class DB(Enum):
220
231
 
221
232
  return AliyunOpenSearchConfig
222
233
 
234
+ if self == DB.MongoDB:
235
+ from .mongodb.config import MongoDBConfig
236
+
237
+ return MongoDBConfig
238
+
239
+ if self == DB.Test:
240
+ from .test.config import TestConfig
241
+
242
+ return TestConfig
243
+
223
244
  msg = f"Unknown DB: {self.name}"
224
245
  raise ValueError(msg)
225
246
 
@@ -292,6 +313,11 @@ class DB(Enum):
292
313
 
293
314
  return AliyunOpenSearchIndexConfig
294
315
 
316
+ if self == DB.MongoDB:
317
+ from .mongodb.config import MongoDBIndexConfig
318
+
319
+ return MongoDBIndexConfig
320
+
295
321
  # DB.Pinecone, DB.Chroma, DB.Redis
296
322
  return EmptyDBCaseConfig
297
323
 
@@ -325,10 +325,7 @@ class AliyunOpenSearch(VectorDB):
325
325
 
326
326
  return False
327
327
 
328
- def optimize(self):
329
- pass
330
-
331
- def optimize_with_size(self, data_size: int):
328
+ def optimize(self, data_size: int):
332
329
  log.info(f"optimize count: {data_size}")
333
330
  retry_times = 0
334
331
  while True:
@@ -340,6 +337,3 @@ class AliyunOpenSearch(VectorDB):
340
337
  if total_count == data_size:
341
338
  log.info("optimize table finish.")
342
339
  return
343
-
344
- def ready_to_load(self):
345
- """ready_to_load will be called before load in load cases."""
@@ -149,10 +149,7 @@ class AlloyDB(VectorDB):
149
149
  )
150
150
  self.conn.commit()
151
151
 
152
- def ready_to_load(self):
153
- pass
154
-
155
- def optimize(self):
152
+ def optimize(self, data_size: int | None = None):
156
153
  self._post_insert()
157
154
 
158
155
  def _post_insert(self):
@@ -137,6 +137,13 @@ class VectorDB(ABC):
137
137
  @contextmanager
138
138
  def init(self) -> None:
139
139
  """create and destory connections to database.
140
+ Why contextmanager:
141
+
142
+ In multiprocessing search tasks, vectordbbench might init
143
+ totally hundreds of thousands of connections with DB server.
144
+
145
+ Too many connections may drain local FDs or server connection resources.
146
+ If the DB client doesn't have `close()` method, just set the object to None.
140
147
 
141
148
  Examples:
142
149
  >>> with self.init():
@@ -187,9 +194,8 @@ class VectorDB(ABC):
187
194
  """
188
195
  raise NotImplementedError
189
196
 
190
- # TODO: remove
191
197
  @abstractmethod
192
- def optimize(self):
198
+ def optimize(self, data_size: int | None = None):
193
199
  """optimize will be called between insertion and search in performance cases.
194
200
 
195
201
  Should be blocked until the vectorDB is ready to be tested on
@@ -199,16 +205,3 @@ class VectorDB(ABC):
199
205
  Optimize's execution time is limited, the limited time is based on cases.
200
206
  """
201
207
  raise NotImplementedError
202
-
203
- def optimize_with_size(self, data_size: int):
204
- self.optimize()
205
-
206
- # TODO: remove
207
- @abstractmethod
208
- def ready_to_load(self):
209
- """ready_to_load will be called before load in load cases.
210
-
211
- Should be blocked until the vectorDB is ready to be tested on
212
- heavy load cases.
213
- """
214
- raise NotImplementedError
@@ -12,6 +12,7 @@ log = logging.getLogger(__name__)
12
12
 
13
13
  WAITING_FOR_REFRESH_SEC = 30
14
14
  WAITING_FOR_FORCE_MERGE_SEC = 30
15
+ SECONDS_WAITING_FOR_REPLICAS_TO_BE_ENABLED_SEC = 30
15
16
 
16
17
 
17
18
  class AWSOpenSearch(VectorDB):
@@ -52,10 +53,27 @@ class AWSOpenSearch(VectorDB):
52
53
  return AWSOpenSearchIndexConfig
53
54
 
54
55
  def _create_index(self, client: OpenSearch):
56
+ cluster_settings_body = {
57
+ "persistent": {
58
+ "knn.algo_param.index_thread_qty": self.case_config.index_thread_qty,
59
+ "knn.memory.circuit_breaker.limit": self.case_config.cb_threshold,
60
+ }
61
+ }
62
+ client.cluster.put_settings(cluster_settings_body)
55
63
  settings = {
56
64
  "index": {
57
65
  "knn": True,
66
+ "number_of_shards": self.case_config.number_of_shards,
67
+ "number_of_replicas": 0,
68
+ "translog.flush_threshold_size": self.case_config.flush_threshold_size,
69
+ # Setting trans log threshold to 5GB
70
+ **(
71
+ {"knn.algo_param.ef_search": self.case_config.ef_search}
72
+ if self.case_config.engine == AWSOS_Engine.nmslib
73
+ else {}
74
+ ),
58
75
  },
76
+ "refresh_interval": self.case_config.refresh_interval,
59
77
  }
60
78
  mappings = {
61
79
  "properties": {
@@ -145,24 +163,49 @@ class AWSOpenSearch(VectorDB):
145
163
  docvalue_fields=[self.id_col_name],
146
164
  stored_fields="_none_",
147
165
  )
148
- log.info(f'Search took: {resp["took"]}')
149
- log.info(f'Search shards: {resp["_shards"]}')
150
- log.info(f'Search hits total: {resp["hits"]["total"]}')
166
+ log.debug(f"Search took: {resp['took']}")
167
+ log.debug(f"Search shards: {resp['_shards']}")
168
+ log.debug(f"Search hits total: {resp['hits']['total']}")
151
169
  return [int(h["fields"][self.id_col_name][0]) for h in resp["hits"]["hits"]]
152
170
  except Exception as e:
153
171
  log.warning(f"Failed to search: {self.index_name} error: {e!s}")
154
172
  raise e from None
155
173
 
156
- def optimize(self):
174
+ def optimize(self, data_size: int | None = None):
157
175
  """optimize will be called between insertion and search in performance cases."""
158
176
  # Call refresh first to ensure that all segments are created
159
177
  self._refresh_index()
160
- self._do_force_merge()
178
+ if self.case_config.force_merge_enabled:
179
+ self._do_force_merge()
180
+ self._refresh_index()
181
+ self._update_replicas()
161
182
  # Call refresh again to ensure that the index is ready after force merge.
162
183
  self._refresh_index()
163
184
  # ensure that all graphs are loaded in memory and ready for search
164
185
  self._load_graphs_to_memory()
165
186
 
187
+ def _update_replicas(self):
188
+ index_settings = self.client.indices.get_settings(index=self.index_name)
189
+ current_number_of_replicas = int(index_settings[self.index_name]["settings"]["index"]["number_of_replicas"])
190
+ log.info(
191
+ f"Current Number of replicas are {current_number_of_replicas}"
192
+ f" and changing the replicas to {self.case_config.number_of_replicas}"
193
+ )
194
+ settings_body = {"index": {"number_of_replicas": self.case_config.number_of_replicas}}
195
+ self.client.indices.put_settings(index=self.index_name, body=settings_body)
196
+ self._wait_till_green()
197
+
198
+ def _wait_till_green(self):
199
+ log.info("Wait for index to become green..")
200
+ while True:
201
+ res = self.client.cat.indices(index=self.index_name, h="health", format="json")
202
+ health = res[0]["health"]
203
+ if health != "green":
204
+ break
205
+ log.info(f"The index {self.index_name} has health : {health} and is not green. Retrying")
206
+ time.sleep(SECONDS_WAITING_FOR_REPLICAS_TO_BE_ENABLED_SEC)
207
+ log.info(f"Index {self.index_name} is green..")
208
+
166
209
  def _refresh_index(self):
167
210
  log.debug(f"Starting refresh for index {self.index_name}")
168
211
  while True:
@@ -179,6 +222,12 @@ class AWSOpenSearch(VectorDB):
179
222
  log.debug(f"Completed refresh for index {self.index_name}")
180
223
 
181
224
  def _do_force_merge(self):
225
+ log.info(f"Updating the Index thread qty to {self.case_config.index_thread_qty_during_force_merge}.")
226
+
227
+ cluster_settings_body = {
228
+ "persistent": {"knn.algo_param.index_thread_qty": self.case_config.index_thread_qty_during_force_merge}
229
+ }
230
+ self.client.cluster.put_settings(cluster_settings_body)
182
231
  log.debug(f"Starting force merge for index {self.index_name}")
183
232
  force_merge_endpoint = f"/{self.index_name}/_forcemerge?max_num_segments=1&wait_for_completion=false"
184
233
  force_merge_task_id = self.client.transport.perform_request("POST", force_merge_endpoint)["task"]
@@ -194,6 +243,3 @@ class AWSOpenSearch(VectorDB):
194
243
  log.info("Calling warmup API to load graphs into memory")
195
244
  warmup_endpoint = f"/_plugins/_knn/warmup/{self.index_name}"
196
245
  self.client.transport.perform_request("GET", warmup_endpoint)
197
-
198
- def ready_to_load(self):
199
- """ready_to_load will be called before load in load cases."""
@@ -0,0 +1,125 @@
1
+ from typing import Annotated, TypedDict, Unpack
2
+
3
+ import click
4
+ from pydantic import SecretStr
5
+
6
+ from ....cli.cli import (
7
+ CommonTypedDict,
8
+ HNSWFlavor2,
9
+ cli,
10
+ click_parameter_decorators_from_typed_dict,
11
+ run,
12
+ )
13
+ from .. import DB
14
+
15
+
16
+ class AWSOpenSearchTypedDict(TypedDict):
17
+ host: Annotated[str, click.option("--host", type=str, help="Db host", required=True)]
18
+ port: Annotated[int, click.option("--port", type=int, default=443, help="Db Port")]
19
+ user: Annotated[str, click.option("--user", type=str, default="admin", help="Db User")]
20
+ password: Annotated[str, click.option("--password", type=str, help="Db password")]
21
+ number_of_shards: Annotated[
22
+ int,
23
+ click.option("--number-of-shards", type=int, help="Number of primary shards for the index", default=1),
24
+ ]
25
+ number_of_replicas: Annotated[
26
+ int,
27
+ click.option(
28
+ "--number-of-replicas", type=int, help="Number of replica copies for each primary shard", default=1
29
+ ),
30
+ ]
31
+ index_thread_qty: Annotated[
32
+ int,
33
+ click.option(
34
+ "--index-thread-qty",
35
+ type=int,
36
+ help="Thread count for native engine indexing",
37
+ default=4,
38
+ ),
39
+ ]
40
+
41
+ index_thread_qty_during_force_merge: Annotated[
42
+ int,
43
+ click.option(
44
+ "--index-thread-qty-during-force-merge",
45
+ type=int,
46
+ help="Thread count during force merge operations",
47
+ default=4,
48
+ ),
49
+ ]
50
+
51
+ number_of_indexing_clients: Annotated[
52
+ int,
53
+ click.option(
54
+ "--number-of-indexing-clients",
55
+ type=int,
56
+ help="Number of concurrent indexing clients",
57
+ default=1,
58
+ ),
59
+ ]
60
+
61
+ number_of_segments: Annotated[
62
+ int,
63
+ click.option("--number-of-segments", type=int, help="Target number of segments after merging", default=1),
64
+ ]
65
+
66
+ refresh_interval: Annotated[
67
+ int,
68
+ click.option(
69
+ "--refresh-interval", type=str, help="How often to make new data available for search", default="60s"
70
+ ),
71
+ ]
72
+
73
+ force_merge_enabled: Annotated[
74
+ int,
75
+ click.option("--force-merge-enabled", type=bool, help="Whether to perform force merge operation", default=True),
76
+ ]
77
+
78
+ flush_threshold_size: Annotated[
79
+ int,
80
+ click.option(
81
+ "--flush-threshold-size", type=str, help="Size threshold for flushing the transaction log", default="5120mb"
82
+ ),
83
+ ]
84
+
85
+ cb_threshold: Annotated[
86
+ int,
87
+ click.option(
88
+ "--cb-threshold",
89
+ type=str,
90
+ help="k-NN Memory circuit breaker threshold",
91
+ default="50%",
92
+ ),
93
+ ]
94
+
95
+
96
+ class AWSOpenSearchHNSWTypedDict(CommonTypedDict, AWSOpenSearchTypedDict, HNSWFlavor2): ...
97
+
98
+
99
+ @cli.command()
100
+ @click_parameter_decorators_from_typed_dict(AWSOpenSearchHNSWTypedDict)
101
+ def AWSOpenSearch(**parameters: Unpack[AWSOpenSearchHNSWTypedDict]):
102
+ from .config import AWSOpenSearchConfig, AWSOpenSearchIndexConfig
103
+
104
+ run(
105
+ db=DB.AWSOpenSearch,
106
+ db_config=AWSOpenSearchConfig(
107
+ host=parameters["host"],
108
+ port=parameters["port"],
109
+ user=parameters["user"],
110
+ password=SecretStr(parameters["password"]),
111
+ ),
112
+ db_case_config=AWSOpenSearchIndexConfig(
113
+ number_of_shards=parameters["number_of_shards"],
114
+ number_of_replicas=parameters["number_of_replicas"],
115
+ index_thread_qty=parameters["index_thread_qty"],
116
+ number_of_segments=parameters["number_of_segments"],
117
+ refresh_interval=parameters["refresh_interval"],
118
+ force_merge_enabled=parameters["force_merge_enabled"],
119
+ flush_threshold_size=parameters["flush_threshold_size"],
120
+ number_of_indexing_clients=parameters["number_of_indexing_clients"],
121
+ index_thread_qty_during_force_merge=parameters["index_thread_qty_during_force_merge"],
122
+ cb_threshold=parameters["cb_threshold"],
123
+ ),
124
+ **parameters,
125
+ )
@@ -39,6 +39,16 @@ class AWSOpenSearchIndexConfig(BaseModel, DBCaseConfig):
39
39
  efConstruction: int = 256
40
40
  efSearch: int = 256
41
41
  M: int = 16
42
+ index_thread_qty: int | None = 4
43
+ number_of_shards: int | None = 1
44
+ number_of_replicas: int | None = 0
45
+ number_of_segments: int | None = 1
46
+ refresh_interval: str | None = "60s"
47
+ force_merge_enabled: bool | None = True
48
+ flush_threshold_size: str | None = "5120mb"
49
+ number_of_indexing_clients: int | None = 1
50
+ index_thread_qty_during_force_merge: int
51
+ cb_threshold: str | None = "50%"
42
52
 
43
53
  def parse_metric(self) -> str:
44
54
  if self.metric_type == MetricType.IP:
@@ -57,10 +57,7 @@ class ChromaClient(VectorDB):
57
57
  def ready_to_search(self) -> bool:
58
58
  pass
59
59
 
60
- def ready_to_load(self) -> bool:
61
- pass
62
-
63
- def optimize(self) -> None:
60
+ def optimize(self, data_size: int | None = None):
64
61
  pass
65
62
 
66
63
  def insert_embeddings(
@@ -143,7 +143,7 @@ class ElasticCloud(VectorDB):
143
143
  log.warning(f"Failed to search: {self.indice} error: {e!s}")
144
144
  raise e from None
145
145
 
146
- def optimize(self):
146
+ def optimize(self, data_size: int | None = None):
147
147
  """optimize will be called between insertion and search in performance cases."""
148
148
  assert self.client is not None, "should self.init() first"
149
149
  self.client.indices.refresh(index=self.indice)
@@ -158,6 +158,3 @@ class ElasticCloud(VectorDB):
158
158
  task_status = self.client.tasks.get(task_id=force_merge_task_id)
159
159
  if task_status["completed"]:
160
160
  return
161
-
162
- def ready_to_load(self):
163
- """ready_to_load will be called before load in load cases."""