vectordb-bench 0.0.21__tar.gz → 0.0.22__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (168) hide show
  1. {vectordb_bench-0.0.21 → vectordb_bench-0.0.22}/.gitignore +3 -1
  2. {vectordb_bench-0.0.21 → vectordb_bench-0.0.22}/PKG-INFO +55 -2
  3. {vectordb_bench-0.0.21 → vectordb_bench-0.0.22}/README.md +51 -0
  4. {vectordb_bench-0.0.21 → vectordb_bench-0.0.22}/install/requirements_py3.11.txt +1 -1
  5. {vectordb_bench-0.0.21 → vectordb_bench-0.0.22}/install.py +2 -1
  6. {vectordb_bench-0.0.21 → vectordb_bench-0.0.22}/pyproject.toml +2 -2
  7. {vectordb_bench-0.0.21 → vectordb_bench-0.0.22}/vectordb_bench/backend/clients/__init__.py +16 -0
  8. {vectordb_bench-0.0.21 → vectordb_bench-0.0.22}/vectordb_bench/backend/clients/aws_opensearch/aws_opensearch.py +53 -4
  9. vectordb_bench-0.0.22/vectordb_bench/backend/clients/aws_opensearch/cli.py +125 -0
  10. {vectordb_bench-0.0.21 → vectordb_bench-0.0.22}/vectordb_bench/backend/clients/aws_opensearch/config.py +10 -0
  11. vectordb_bench-0.0.22/vectordb_bench/backend/clients/mongodb/config.py +53 -0
  12. vectordb_bench-0.0.22/vectordb_bench/backend/clients/mongodb/mongodb.py +200 -0
  13. {vectordb_bench-0.0.21 → vectordb_bench-0.0.22}/vectordb_bench/frontend/components/custom/displaypPrams.py +12 -1
  14. {vectordb_bench-0.0.21 → vectordb_bench-0.0.22}/vectordb_bench/frontend/components/run_test/submitTask.py +20 -3
  15. {vectordb_bench-0.0.21 → vectordb_bench-0.0.22}/vectordb_bench/frontend/config/dbCaseConfigs.py +32 -0
  16. {vectordb_bench-0.0.21 → vectordb_bench-0.0.22}/vectordb_bench/log_util.py +15 -2
  17. {vectordb_bench-0.0.21 → vectordb_bench-0.0.22}/vectordb_bench/models.py +4 -0
  18. {vectordb_bench-0.0.21 → vectordb_bench-0.0.22}/vectordb_bench.egg-info/PKG-INFO +55 -2
  19. {vectordb_bench-0.0.21 → vectordb_bench-0.0.22}/vectordb_bench.egg-info/SOURCES.txt +2 -0
  20. {vectordb_bench-0.0.21 → vectordb_bench-0.0.22}/vectordb_bench.egg-info/requires.txt +4 -1
  21. vectordb_bench-0.0.21/vectordb_bench/backend/clients/aws_opensearch/cli.py +0 -41
  22. {vectordb_bench-0.0.21 → vectordb_bench-0.0.22}/.devcontainer/Dockerfile +0 -0
  23. {vectordb_bench-0.0.21 → vectordb_bench-0.0.22}/.devcontainer/devcontainer.json +0 -0
  24. {vectordb_bench-0.0.21 → vectordb_bench-0.0.22}/.env.example +0 -0
  25. {vectordb_bench-0.0.21 → vectordb_bench-0.0.22}/.github/workflows/publish_package_on_release.yml +0 -0
  26. {vectordb_bench-0.0.21 → vectordb_bench-0.0.22}/.github/workflows/pull_request.yml +0 -0
  27. {vectordb_bench-0.0.21 → vectordb_bench-0.0.22}/Dockerfile +0 -0
  28. {vectordb_bench-0.0.21 → vectordb_bench-0.0.22}/LICENSE +0 -0
  29. {vectordb_bench-0.0.21 → vectordb_bench-0.0.22}/Makefile +0 -0
  30. {vectordb_bench-0.0.21 → vectordb_bench-0.0.22}/OWNERS +0 -0
  31. {vectordb_bench-0.0.21 → vectordb_bench-0.0.22}/fig/custom_case_run_test.png +0 -0
  32. {vectordb_bench-0.0.21 → vectordb_bench-0.0.22}/fig/custom_dataset.png +0 -0
  33. {vectordb_bench-0.0.21 → vectordb_bench-0.0.22}/setup.cfg +0 -0
  34. {vectordb_bench-0.0.21 → vectordb_bench-0.0.22}/tests/conftest.py +0 -0
  35. {vectordb_bench-0.0.21 → vectordb_bench-0.0.22}/tests/pytest.ini +0 -0
  36. {vectordb_bench-0.0.21 → vectordb_bench-0.0.22}/tests/test_bench_runner.py +0 -0
  37. {vectordb_bench-0.0.21 → vectordb_bench-0.0.22}/tests/test_chroma.py +0 -0
  38. {vectordb_bench-0.0.21 → vectordb_bench-0.0.22}/tests/test_data_source.py +0 -0
  39. {vectordb_bench-0.0.21 → vectordb_bench-0.0.22}/tests/test_dataset.py +0 -0
  40. {vectordb_bench-0.0.21 → vectordb_bench-0.0.22}/tests/test_elasticsearch_cloud.py +0 -0
  41. {vectordb_bench-0.0.21 → vectordb_bench-0.0.22}/tests/test_models.py +0 -0
  42. {vectordb_bench-0.0.21 → vectordb_bench-0.0.22}/tests/test_rate_runner.py +0 -0
  43. {vectordb_bench-0.0.21 → vectordb_bench-0.0.22}/tests/test_redis.py +0 -0
  44. {vectordb_bench-0.0.21 → vectordb_bench-0.0.22}/tests/test_utils.py +0 -0
  45. {vectordb_bench-0.0.21 → vectordb_bench-0.0.22}/tests/ut_cases.py +0 -0
  46. {vectordb_bench-0.0.21 → vectordb_bench-0.0.22}/vectordb_bench/__init__.py +0 -0
  47. {vectordb_bench-0.0.21 → vectordb_bench-0.0.22}/vectordb_bench/__main__.py +0 -0
  48. {vectordb_bench-0.0.21 → vectordb_bench-0.0.22}/vectordb_bench/backend/__init__.py +0 -0
  49. {vectordb_bench-0.0.21 → vectordb_bench-0.0.22}/vectordb_bench/backend/assembler.py +0 -0
  50. {vectordb_bench-0.0.21 → vectordb_bench-0.0.22}/vectordb_bench/backend/cases.py +0 -0
  51. {vectordb_bench-0.0.21 → vectordb_bench-0.0.22}/vectordb_bench/backend/clients/aliyun_elasticsearch/aliyun_elasticsearch.py +0 -0
  52. {vectordb_bench-0.0.21 → vectordb_bench-0.0.22}/vectordb_bench/backend/clients/aliyun_elasticsearch/config.py +0 -0
  53. {vectordb_bench-0.0.21 → vectordb_bench-0.0.22}/vectordb_bench/backend/clients/aliyun_opensearch/aliyun_opensearch.py +0 -0
  54. {vectordb_bench-0.0.21 → vectordb_bench-0.0.22}/vectordb_bench/backend/clients/aliyun_opensearch/config.py +0 -0
  55. {vectordb_bench-0.0.21 → vectordb_bench-0.0.22}/vectordb_bench/backend/clients/alloydb/alloydb.py +0 -0
  56. {vectordb_bench-0.0.21 → vectordb_bench-0.0.22}/vectordb_bench/backend/clients/alloydb/cli.py +0 -0
  57. {vectordb_bench-0.0.21 → vectordb_bench-0.0.22}/vectordb_bench/backend/clients/alloydb/config.py +0 -0
  58. {vectordb_bench-0.0.21 → vectordb_bench-0.0.22}/vectordb_bench/backend/clients/api.py +0 -0
  59. {vectordb_bench-0.0.21 → vectordb_bench-0.0.22}/vectordb_bench/backend/clients/aws_opensearch/run.py +0 -0
  60. {vectordb_bench-0.0.21 → vectordb_bench-0.0.22}/vectordb_bench/backend/clients/chroma/chroma.py +0 -0
  61. {vectordb_bench-0.0.21 → vectordb_bench-0.0.22}/vectordb_bench/backend/clients/chroma/config.py +0 -0
  62. {vectordb_bench-0.0.21 → vectordb_bench-0.0.22}/vectordb_bench/backend/clients/elastic_cloud/config.py +0 -0
  63. {vectordb_bench-0.0.21 → vectordb_bench-0.0.22}/vectordb_bench/backend/clients/elastic_cloud/elastic_cloud.py +0 -0
  64. {vectordb_bench-0.0.21 → vectordb_bench-0.0.22}/vectordb_bench/backend/clients/memorydb/cli.py +0 -0
  65. {vectordb_bench-0.0.21 → vectordb_bench-0.0.22}/vectordb_bench/backend/clients/memorydb/config.py +0 -0
  66. {vectordb_bench-0.0.21 → vectordb_bench-0.0.22}/vectordb_bench/backend/clients/memorydb/memorydb.py +0 -0
  67. {vectordb_bench-0.0.21 → vectordb_bench-0.0.22}/vectordb_bench/backend/clients/milvus/cli.py +0 -0
  68. {vectordb_bench-0.0.21 → vectordb_bench-0.0.22}/vectordb_bench/backend/clients/milvus/config.py +0 -0
  69. {vectordb_bench-0.0.21 → vectordb_bench-0.0.22}/vectordb_bench/backend/clients/milvus/milvus.py +0 -0
  70. {vectordb_bench-0.0.21 → vectordb_bench-0.0.22}/vectordb_bench/backend/clients/pgdiskann/cli.py +0 -0
  71. {vectordb_bench-0.0.21 → vectordb_bench-0.0.22}/vectordb_bench/backend/clients/pgdiskann/config.py +0 -0
  72. {vectordb_bench-0.0.21 → vectordb_bench-0.0.22}/vectordb_bench/backend/clients/pgdiskann/pgdiskann.py +0 -0
  73. {vectordb_bench-0.0.21 → vectordb_bench-0.0.22}/vectordb_bench/backend/clients/pgvecto_rs/cli.py +0 -0
  74. {vectordb_bench-0.0.21 → vectordb_bench-0.0.22}/vectordb_bench/backend/clients/pgvecto_rs/config.py +0 -0
  75. {vectordb_bench-0.0.21 → vectordb_bench-0.0.22}/vectordb_bench/backend/clients/pgvecto_rs/pgvecto_rs.py +0 -0
  76. {vectordb_bench-0.0.21 → vectordb_bench-0.0.22}/vectordb_bench/backend/clients/pgvector/cli.py +0 -0
  77. {vectordb_bench-0.0.21 → vectordb_bench-0.0.22}/vectordb_bench/backend/clients/pgvector/config.py +0 -0
  78. {vectordb_bench-0.0.21 → vectordb_bench-0.0.22}/vectordb_bench/backend/clients/pgvector/pgvector.py +0 -0
  79. {vectordb_bench-0.0.21 → vectordb_bench-0.0.22}/vectordb_bench/backend/clients/pgvectorscale/cli.py +0 -0
  80. {vectordb_bench-0.0.21 → vectordb_bench-0.0.22}/vectordb_bench/backend/clients/pgvectorscale/config.py +0 -0
  81. {vectordb_bench-0.0.21 → vectordb_bench-0.0.22}/vectordb_bench/backend/clients/pgvectorscale/pgvectorscale.py +0 -0
  82. {vectordb_bench-0.0.21 → vectordb_bench-0.0.22}/vectordb_bench/backend/clients/pinecone/config.py +0 -0
  83. {vectordb_bench-0.0.21 → vectordb_bench-0.0.22}/vectordb_bench/backend/clients/pinecone/pinecone.py +0 -0
  84. {vectordb_bench-0.0.21 → vectordb_bench-0.0.22}/vectordb_bench/backend/clients/qdrant_cloud/config.py +0 -0
  85. {vectordb_bench-0.0.21 → vectordb_bench-0.0.22}/vectordb_bench/backend/clients/qdrant_cloud/qdrant_cloud.py +0 -0
  86. {vectordb_bench-0.0.21 → vectordb_bench-0.0.22}/vectordb_bench/backend/clients/redis/cli.py +0 -0
  87. {vectordb_bench-0.0.21 → vectordb_bench-0.0.22}/vectordb_bench/backend/clients/redis/config.py +0 -0
  88. {vectordb_bench-0.0.21 → vectordb_bench-0.0.22}/vectordb_bench/backend/clients/redis/redis.py +0 -0
  89. {vectordb_bench-0.0.21 → vectordb_bench-0.0.22}/vectordb_bench/backend/clients/test/cli.py +0 -0
  90. {vectordb_bench-0.0.21 → vectordb_bench-0.0.22}/vectordb_bench/backend/clients/test/config.py +0 -0
  91. {vectordb_bench-0.0.21 → vectordb_bench-0.0.22}/vectordb_bench/backend/clients/test/test.py +0 -0
  92. {vectordb_bench-0.0.21 → vectordb_bench-0.0.22}/vectordb_bench/backend/clients/weaviate_cloud/cli.py +0 -0
  93. {vectordb_bench-0.0.21 → vectordb_bench-0.0.22}/vectordb_bench/backend/clients/weaviate_cloud/config.py +0 -0
  94. {vectordb_bench-0.0.21 → vectordb_bench-0.0.22}/vectordb_bench/backend/clients/weaviate_cloud/weaviate_cloud.py +0 -0
  95. {vectordb_bench-0.0.21 → vectordb_bench-0.0.22}/vectordb_bench/backend/clients/zilliz_cloud/cli.py +0 -0
  96. {vectordb_bench-0.0.21 → vectordb_bench-0.0.22}/vectordb_bench/backend/clients/zilliz_cloud/config.py +0 -0
  97. {vectordb_bench-0.0.21 → vectordb_bench-0.0.22}/vectordb_bench/backend/clients/zilliz_cloud/zilliz_cloud.py +0 -0
  98. {vectordb_bench-0.0.21 → vectordb_bench-0.0.22}/vectordb_bench/backend/data_source.py +0 -0
  99. {vectordb_bench-0.0.21 → vectordb_bench-0.0.22}/vectordb_bench/backend/dataset.py +0 -0
  100. {vectordb_bench-0.0.21 → vectordb_bench-0.0.22}/vectordb_bench/backend/result_collector.py +0 -0
  101. {vectordb_bench-0.0.21 → vectordb_bench-0.0.22}/vectordb_bench/backend/runner/__init__.py +0 -0
  102. {vectordb_bench-0.0.21 → vectordb_bench-0.0.22}/vectordb_bench/backend/runner/mp_runner.py +0 -0
  103. {vectordb_bench-0.0.21 → vectordb_bench-0.0.22}/vectordb_bench/backend/runner/rate_runner.py +0 -0
  104. {vectordb_bench-0.0.21 → vectordb_bench-0.0.22}/vectordb_bench/backend/runner/read_write_runner.py +0 -0
  105. {vectordb_bench-0.0.21 → vectordb_bench-0.0.22}/vectordb_bench/backend/runner/serial_runner.py +0 -0
  106. {vectordb_bench-0.0.21 → vectordb_bench-0.0.22}/vectordb_bench/backend/runner/util.py +0 -0
  107. {vectordb_bench-0.0.21 → vectordb_bench-0.0.22}/vectordb_bench/backend/task_runner.py +0 -0
  108. {vectordb_bench-0.0.21 → vectordb_bench-0.0.22}/vectordb_bench/backend/utils.py +0 -0
  109. {vectordb_bench-0.0.21 → vectordb_bench-0.0.22}/vectordb_bench/base.py +0 -0
  110. {vectordb_bench-0.0.21 → vectordb_bench-0.0.22}/vectordb_bench/cli/__init__.py +0 -0
  111. {vectordb_bench-0.0.21 → vectordb_bench-0.0.22}/vectordb_bench/cli/cli.py +0 -0
  112. {vectordb_bench-0.0.21 → vectordb_bench-0.0.22}/vectordb_bench/cli/vectordbbench.py +0 -0
  113. {vectordb_bench-0.0.21 → vectordb_bench-0.0.22}/vectordb_bench/config-files/sample_config.yml +0 -0
  114. {vectordb_bench-0.0.21 → vectordb_bench-0.0.22}/vectordb_bench/custom/custom_case.json +0 -0
  115. {vectordb_bench-0.0.21 → vectordb_bench-0.0.22}/vectordb_bench/frontend/components/check_results/charts.py +0 -0
  116. {vectordb_bench-0.0.21 → vectordb_bench-0.0.22}/vectordb_bench/frontend/components/check_results/data.py +0 -0
  117. {vectordb_bench-0.0.21 → vectordb_bench-0.0.22}/vectordb_bench/frontend/components/check_results/expanderStyle.py +0 -0
  118. {vectordb_bench-0.0.21 → vectordb_bench-0.0.22}/vectordb_bench/frontend/components/check_results/filters.py +0 -0
  119. {vectordb_bench-0.0.21 → vectordb_bench-0.0.22}/vectordb_bench/frontend/components/check_results/footer.py +0 -0
  120. {vectordb_bench-0.0.21 → vectordb_bench-0.0.22}/vectordb_bench/frontend/components/check_results/headerIcon.py +0 -0
  121. {vectordb_bench-0.0.21 → vectordb_bench-0.0.22}/vectordb_bench/frontend/components/check_results/nav.py +0 -0
  122. {vectordb_bench-0.0.21 → vectordb_bench-0.0.22}/vectordb_bench/frontend/components/check_results/priceTable.py +0 -0
  123. {vectordb_bench-0.0.21 → vectordb_bench-0.0.22}/vectordb_bench/frontend/components/check_results/stPageConfig.py +0 -0
  124. {vectordb_bench-0.0.21 → vectordb_bench-0.0.22}/vectordb_bench/frontend/components/concurrent/charts.py +0 -0
  125. {vectordb_bench-0.0.21 → vectordb_bench-0.0.22}/vectordb_bench/frontend/components/custom/displayCustomCase.py +0 -0
  126. {vectordb_bench-0.0.21 → vectordb_bench-0.0.22}/vectordb_bench/frontend/components/custom/getCustomConfig.py +0 -0
  127. {vectordb_bench-0.0.21 → vectordb_bench-0.0.22}/vectordb_bench/frontend/components/custom/initStyle.py +0 -0
  128. {vectordb_bench-0.0.21 → vectordb_bench-0.0.22}/vectordb_bench/frontend/components/get_results/saveAsImage.py +0 -0
  129. {vectordb_bench-0.0.21 → vectordb_bench-0.0.22}/vectordb_bench/frontend/components/run_test/autoRefresh.py +0 -0
  130. {vectordb_bench-0.0.21 → vectordb_bench-0.0.22}/vectordb_bench/frontend/components/run_test/caseSelector.py +0 -0
  131. {vectordb_bench-0.0.21 → vectordb_bench-0.0.22}/vectordb_bench/frontend/components/run_test/dbConfigSetting.py +0 -0
  132. {vectordb_bench-0.0.21 → vectordb_bench-0.0.22}/vectordb_bench/frontend/components/run_test/dbSelector.py +0 -0
  133. {vectordb_bench-0.0.21 → vectordb_bench-0.0.22}/vectordb_bench/frontend/components/run_test/generateTasks.py +0 -0
  134. {vectordb_bench-0.0.21 → vectordb_bench-0.0.22}/vectordb_bench/frontend/components/run_test/hideSidebar.py +0 -0
  135. {vectordb_bench-0.0.21 → vectordb_bench-0.0.22}/vectordb_bench/frontend/components/run_test/initStyle.py +0 -0
  136. {vectordb_bench-0.0.21 → vectordb_bench-0.0.22}/vectordb_bench/frontend/components/tables/data.py +0 -0
  137. {vectordb_bench-0.0.21 → vectordb_bench-0.0.22}/vectordb_bench/frontend/config/dbPrices.py +0 -0
  138. {vectordb_bench-0.0.21 → vectordb_bench-0.0.22}/vectordb_bench/frontend/config/styles.py +0 -0
  139. {vectordb_bench-0.0.21 → vectordb_bench-0.0.22}/vectordb_bench/frontend/pages/concurrent.py +0 -0
  140. {vectordb_bench-0.0.21 → vectordb_bench-0.0.22}/vectordb_bench/frontend/pages/custom.py +0 -0
  141. {vectordb_bench-0.0.21 → vectordb_bench-0.0.22}/vectordb_bench/frontend/pages/quries_per_dollar.py +0 -0
  142. {vectordb_bench-0.0.21 → vectordb_bench-0.0.22}/vectordb_bench/frontend/pages/run_test.py +0 -0
  143. {vectordb_bench-0.0.21 → vectordb_bench-0.0.22}/vectordb_bench/frontend/pages/tables.py +0 -0
  144. {vectordb_bench-0.0.21 → vectordb_bench-0.0.22}/vectordb_bench/frontend/utils.py +0 -0
  145. {vectordb_bench-0.0.21 → vectordb_bench-0.0.22}/vectordb_bench/frontend/vdb_benchmark.py +0 -0
  146. {vectordb_bench-0.0.21 → vectordb_bench-0.0.22}/vectordb_bench/interface.py +0 -0
  147. {vectordb_bench-0.0.21 → vectordb_bench-0.0.22}/vectordb_bench/metric.py +0 -0
  148. {vectordb_bench-0.0.21 → vectordb_bench-0.0.22}/vectordb_bench/results/ElasticCloud/result_20230727_standard_elasticcloud.json +0 -0
  149. {vectordb_bench-0.0.21 → vectordb_bench-0.0.22}/vectordb_bench/results/ElasticCloud/result_20230808_standard_elasticcloud.json +0 -0
  150. {vectordb_bench-0.0.21 → vectordb_bench-0.0.22}/vectordb_bench/results/Milvus/result_20230727_standard_milvus.json +0 -0
  151. {vectordb_bench-0.0.21 → vectordb_bench-0.0.22}/vectordb_bench/results/Milvus/result_20230808_standard_milvus.json +0 -0
  152. {vectordb_bench-0.0.21 → vectordb_bench-0.0.22}/vectordb_bench/results/PgVector/result_20230727_standard_pgvector.json +0 -0
  153. {vectordb_bench-0.0.21 → vectordb_bench-0.0.22}/vectordb_bench/results/PgVector/result_20230808_standard_pgvector.json +0 -0
  154. {vectordb_bench-0.0.21 → vectordb_bench-0.0.22}/vectordb_bench/results/Pinecone/result_20230727_standard_pinecone.json +0 -0
  155. {vectordb_bench-0.0.21 → vectordb_bench-0.0.22}/vectordb_bench/results/Pinecone/result_20230808_standard_pinecone.json +0 -0
  156. {vectordb_bench-0.0.21 → vectordb_bench-0.0.22}/vectordb_bench/results/QdrantCloud/result_20230727_standard_qdrantcloud.json +0 -0
  157. {vectordb_bench-0.0.21 → vectordb_bench-0.0.22}/vectordb_bench/results/QdrantCloud/result_20230808_standard_qdrantcloud.json +0 -0
  158. {vectordb_bench-0.0.21 → vectordb_bench-0.0.22}/vectordb_bench/results/WeaviateCloud/result_20230727_standard_weaviatecloud.json +0 -0
  159. {vectordb_bench-0.0.21 → vectordb_bench-0.0.22}/vectordb_bench/results/WeaviateCloud/result_20230808_standard_weaviatecloud.json +0 -0
  160. {vectordb_bench-0.0.21 → vectordb_bench-0.0.22}/vectordb_bench/results/ZillizCloud/result_20230727_standard_zillizcloud.json +0 -0
  161. {vectordb_bench-0.0.21 → vectordb_bench-0.0.22}/vectordb_bench/results/ZillizCloud/result_20230808_standard_zillizcloud.json +0 -0
  162. {vectordb_bench-0.0.21 → vectordb_bench-0.0.22}/vectordb_bench/results/ZillizCloud/result_20240105_standard_202401_zillizcloud.json +0 -0
  163. {vectordb_bench-0.0.21 → vectordb_bench-0.0.22}/vectordb_bench/results/dbPrices.json +0 -0
  164. {vectordb_bench-0.0.21 → vectordb_bench-0.0.22}/vectordb_bench/results/getLeaderboardData.py +0 -0
  165. {vectordb_bench-0.0.21 → vectordb_bench-0.0.22}/vectordb_bench/results/leaderboard.json +0 -0
  166. {vectordb_bench-0.0.21 → vectordb_bench-0.0.22}/vectordb_bench.egg-info/dependency_links.txt +0 -0
  167. {vectordb_bench-0.0.21 → vectordb_bench-0.0.22}/vectordb_bench.egg-info/entry_points.txt +0 -0
  168. {vectordb_bench-0.0.21 → vectordb_bench-0.0.22}/vectordb_bench.egg-info/top_level.txt +0 -0
@@ -8,5 +8,7 @@ __MACOSX
8
8
  .DS_Store
9
9
  build/
10
10
  venv/
11
+ .venv/
11
12
  .idea/
12
- results/
13
+ results/
14
+ logs/
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: vectordb-bench
3
- Version: 0.0.21
3
+ Version: 0.0.22
4
4
  Summary: VectorDBBench is not just an offering of benchmark results for mainstream vector databases and cloud services, it's your go-to tool for the ultimate performance and cost-effectiveness comparison. Designed with ease-of-use in mind, VectorDBBench is devised to help users, even non-professionals, reproduce results or test new systems, making the hunt for the optimal choice amongst a plethora of cloud services and open-source vector databases a breeze.
5
5
  Author-email: XuanYang-cn <xuan.yang@zilliz.com>
6
6
  Project-URL: repository, https://github.com/zilliztech/VectorDBBench
@@ -21,7 +21,7 @@ Requires-Dist: oss2
21
21
  Requires-Dist: psutil
22
22
  Requires-Dist: polars
23
23
  Requires-Dist: plotly
24
- Requires-Dist: environs
24
+ Requires-Dist: environs<14.1.0
25
25
  Requires-Dist: pydantic<v2
26
26
  Requires-Dist: scikit-learn
27
27
  Requires-Dist: pymilvus
@@ -73,6 +73,8 @@ Requires-Dist: opensearch-py; extra == "opensearch"
73
73
  Provides-Extra: aliyun-opensearch
74
74
  Requires-Dist: alibabacloud_ha3engine_vector; extra == "aliyun-opensearch"
75
75
  Requires-Dist: alibabacloud_searchengine20211025; extra == "aliyun-opensearch"
76
+ Provides-Extra: mongodb
77
+ Requires-Dist: pymongo; extra == "mongodb"
76
78
 
77
79
  # VectorDBBench: A Benchmark Tool for VectorDB
78
80
 
@@ -89,6 +91,8 @@ Closely mimicking real-world production environments, we've set up diverse testi
89
91
 
90
92
  Prepare to delve into the world of VectorDBBench, and let it guide you in uncovering your perfect vector database match.
91
93
 
94
+ VectorDBBench is sponsered by Zilliz,the leading opensource vectorDB company behind Milvus. Choose smarter with VectorDBBench- start your free test on [zilliz cloud](https://zilliz.com/) today!
95
+
92
96
  **Leaderboard:** https://zilliz.com/benchmark
93
97
  ## Quick Start
94
98
  ### Prerequirement
@@ -128,6 +132,7 @@ All the database client supported
128
132
  | chromadb | `pip install vectordb-bench[chromadb]` |
129
133
  | awsopensearch | `pip install vectordb-bench[opensearch]` |
130
134
  | aliyun_opensearch | `pip install vectordb-bench[aliyun_opensearch]` |
135
+ | mongodb | `pip install vectordb-bench[mongodb]` |
131
136
 
132
137
  ### Run
133
138
 
@@ -228,6 +233,47 @@ Options:
228
233
  with-gt]
229
234
  --help Show this message and exit.
230
235
  ```
236
+
237
+ ### Run awsopensearch from command line
238
+
239
+ ```shell
240
+ vectordbbench awsopensearch --db-label awsopensearch \
241
+ --m 16 --ef-construction 256 \
242
+ --host search-vector-db-prod-h4f6m4of6x7yp2rz7gdmots7w4.us-west-2.es.amazonaws.com --port 443 \
243
+ --user vector --password '<password>' \
244
+ --case-type Performance1536D5M --num-insert-workers 10 \
245
+ --skip-load --num-concurrency 75
246
+ ```
247
+
248
+ To list the options for awsopensearch, execute `vectordbbench awsopensearch --help`
249
+
250
+ ```text
251
+ $ vectordbbench awsopensearch --help
252
+ Usage: vectordbbench awsopensearch [OPTIONS]
253
+
254
+ Options:
255
+ # Sharding and Replication
256
+ --number-of-shards INTEGER Number of primary shards for the index
257
+ --number-of-replicas INTEGER Number of replica copies for each primary
258
+ shard
259
+ # Indexing Performance
260
+ --index-thread-qty INTEGER Thread count for native engine indexing
261
+ --index-thread-qty-during-force-merge INTEGER
262
+ Thread count during force merge operations
263
+ --number-of-indexing-clients INTEGER
264
+ Number of concurrent indexing clients
265
+ # Index Management
266
+ --number-of-segments INTEGER Target number of segments after merging
267
+ --refresh-interval TEXT How often to make new data available for
268
+ search
269
+ --force-merge-enabled BOOLEAN Whether to perform force merge operation
270
+ --flush-threshold-size TEXT Size threshold for flushing the transaction
271
+ log
272
+ # Memory Management
273
+ --cb-threshold TEXT k-NN Memory circuit breaker threshold
274
+
275
+ --help Show this message and exit.```
276
+
231
277
  #### Using a configuration file.
232
278
 
233
279
  The vectordbbench command can optionally read some or all the options from a yaml formatted configuration file.
@@ -394,6 +440,13 @@ We have strict requirements for the data set format, please follow them.
394
440
  - `Folder Path` - The path to the folder containing all the files. Please ensure that all files in the folder are in the `Parquet` format.
395
441
  - Vectors data files: The file must be named `train.parquet` and should have two columns: `id` as an incrementing `int` and `emb` as an array of `float32`.
396
442
  - Query test vectors: The file must be named `test.parquet` and should have two columns: `id` as an incrementing `int` and `emb` as an array of `float32`.
443
+ - We recommend limiting the number of test query vectors, like 1,000.
444
+ When conducting concurrent query tests, Vdbbench creates a large number of processes.
445
+ To minimize additional communication overhead during testing,
446
+ we prepare a complete set of test queries for each process, allowing them to run independently.
447
+ However, this means that as the number of concurrent processes increases,
448
+ the number of copied query vectors also increases significantly,
449
+ which can place substantial pressure on memory resources.
397
450
  - Ground truth file: The file must be named `neighbors.parquet` and should have two columns: `id` corresponding to query vectors and `neighbors_id` as an array of `int`.
398
451
 
399
452
  - `Train File Count` - If the vector file is too large, you can consider splitting it into multiple files. The naming format for the split files should be `train-[index]-of-[file_count].parquet`. For example, `train-01-of-10.parquet` represents the second file (0-indexed) among 10 split files.
@@ -13,6 +13,8 @@ Closely mimicking real-world production environments, we've set up diverse testi
13
13
 
14
14
  Prepare to delve into the world of VectorDBBench, and let it guide you in uncovering your perfect vector database match.
15
15
 
16
+ VectorDBBench is sponsered by Zilliz,the leading opensource vectorDB company behind Milvus. Choose smarter with VectorDBBench- start your free test on [zilliz cloud](https://zilliz.com/) today!
17
+
16
18
  **Leaderboard:** https://zilliz.com/benchmark
17
19
  ## Quick Start
18
20
  ### Prerequirement
@@ -52,6 +54,7 @@ All the database client supported
52
54
  | chromadb | `pip install vectordb-bench[chromadb]` |
53
55
  | awsopensearch | `pip install vectordb-bench[opensearch]` |
54
56
  | aliyun_opensearch | `pip install vectordb-bench[aliyun_opensearch]` |
57
+ | mongodb | `pip install vectordb-bench[mongodb]` |
55
58
 
56
59
  ### Run
57
60
 
@@ -152,6 +155,47 @@ Options:
152
155
  with-gt]
153
156
  --help Show this message and exit.
154
157
  ```
158
+
159
+ ### Run awsopensearch from command line
160
+
161
+ ```shell
162
+ vectordbbench awsopensearch --db-label awsopensearch \
163
+ --m 16 --ef-construction 256 \
164
+ --host search-vector-db-prod-h4f6m4of6x7yp2rz7gdmots7w4.us-west-2.es.amazonaws.com --port 443 \
165
+ --user vector --password '<password>' \
166
+ --case-type Performance1536D5M --num-insert-workers 10 \
167
+ --skip-load --num-concurrency 75
168
+ ```
169
+
170
+ To list the options for awsopensearch, execute `vectordbbench awsopensearch --help`
171
+
172
+ ```text
173
+ $ vectordbbench awsopensearch --help
174
+ Usage: vectordbbench awsopensearch [OPTIONS]
175
+
176
+ Options:
177
+ # Sharding and Replication
178
+ --number-of-shards INTEGER Number of primary shards for the index
179
+ --number-of-replicas INTEGER Number of replica copies for each primary
180
+ shard
181
+ # Indexing Performance
182
+ --index-thread-qty INTEGER Thread count for native engine indexing
183
+ --index-thread-qty-during-force-merge INTEGER
184
+ Thread count during force merge operations
185
+ --number-of-indexing-clients INTEGER
186
+ Number of concurrent indexing clients
187
+ # Index Management
188
+ --number-of-segments INTEGER Target number of segments after merging
189
+ --refresh-interval TEXT How often to make new data available for
190
+ search
191
+ --force-merge-enabled BOOLEAN Whether to perform force merge operation
192
+ --flush-threshold-size TEXT Size threshold for flushing the transaction
193
+ log
194
+ # Memory Management
195
+ --cb-threshold TEXT k-NN Memory circuit breaker threshold
196
+
197
+ --help Show this message and exit.```
198
+
155
199
  #### Using a configuration file.
156
200
 
157
201
  The vectordbbench command can optionally read some or all the options from a yaml formatted configuration file.
@@ -318,6 +362,13 @@ We have strict requirements for the data set format, please follow them.
318
362
  - `Folder Path` - The path to the folder containing all the files. Please ensure that all files in the folder are in the `Parquet` format.
319
363
  - Vectors data files: The file must be named `train.parquet` and should have two columns: `id` as an incrementing `int` and `emb` as an array of `float32`.
320
364
  - Query test vectors: The file must be named `test.parquet` and should have two columns: `id` as an incrementing `int` and `emb` as an array of `float32`.
365
+ - We recommend limiting the number of test query vectors, like 1,000.
366
+ When conducting concurrent query tests, Vdbbench creates a large number of processes.
367
+ To minimize additional communication overhead during testing,
368
+ we prepare a complete set of test queries for each process, allowing them to run independently.
369
+ However, this means that as the number of concurrent processes increases,
370
+ the number of copied query vectors also increases significantly,
371
+ which can place substantial pressure on memory resources.
321
372
  - Ground truth file: The file must be named `neighbors.parquet` and should have two columns: `id` corresponding to query vectors and `neighbors_id` as an array of `int`.
322
373
 
323
374
  - `Train File Count` - If the vector file is too large, you can consider splitting it into multiple files. The naming format for the split files should be `train-[index]-of-[file_count].parquet`. For example, `train-01-of-10.parquet` represents the second file (0-indexed) among 10 split files.
@@ -1,4 +1,4 @@
1
- grpcio==1.53.0
1
+ grpcio==1.53.2
2
2
  grpcio-tools==1.53.0
3
3
  qdrant-client
4
4
  pinecone-client
@@ -1,7 +1,8 @@
1
- import os
2
1
  import argparse
2
+ import os
3
3
  import subprocess
4
4
 
5
+
5
6
  def docker_tag_base():
6
7
  return 'vdbbench'
7
8
 
@@ -35,7 +35,7 @@ dependencies = [
35
35
  "psutil",
36
36
  "polars",
37
37
  "plotly",
38
- "environs",
38
+ "environs<14.1.0",
39
39
  "pydantic<v2",
40
40
  "scikit-learn",
41
41
  "pymilvus", # with pandas, numpy, ujson
@@ -85,6 +85,7 @@ memorydb = [ "memorydb" ]
85
85
  chromadb = [ "chromadb" ]
86
86
  opensearch = [ "opensearch-py" ]
87
87
  aliyun_opensearch = [ "alibabacloud_ha3engine_vector", "alibabacloud_searchengine20211025"]
88
+ mongodb = [ "pymongo" ]
88
89
 
89
90
  [project.urls]
90
91
  "repository" = "https://github.com/zilliztech/VectorDBBench"
@@ -207,4 +208,3 @@ builtins-ignorelist = [
207
208
  # "dict", # TODO
208
209
  # "filter",
209
210
  ]
210
-
@@ -40,6 +40,7 @@ class DB(Enum):
40
40
  AliyunElasticsearch = "AliyunElasticsearch"
41
41
  Test = "test"
42
42
  AliyunOpenSearch = "AliyunOpenSearch"
43
+ MongoDB = "MongoDB"
43
44
 
44
45
  @property
45
46
  def init_cls(self) -> type[VectorDB]: # noqa: PLR0911, PLR0912, C901
@@ -129,6 +130,11 @@ class DB(Enum):
129
130
 
130
131
  return AliyunOpenSearch
131
132
 
133
+ if self == DB.MongoDB:
134
+ from .mongodb.mongodb import MongoDB
135
+
136
+ return MongoDB
137
+
132
138
  if self == DB.Test:
133
139
  from .test.test import Test
134
140
 
@@ -225,6 +231,11 @@ class DB(Enum):
225
231
 
226
232
  return AliyunOpenSearchConfig
227
233
 
234
+ if self == DB.MongoDB:
235
+ from .mongodb.config import MongoDBConfig
236
+
237
+ return MongoDBConfig
238
+
228
239
  if self == DB.Test:
229
240
  from .test.config import TestConfig
230
241
 
@@ -302,6 +313,11 @@ class DB(Enum):
302
313
 
303
314
  return AliyunOpenSearchIndexConfig
304
315
 
316
+ if self == DB.MongoDB:
317
+ from .mongodb.config import MongoDBIndexConfig
318
+
319
+ return MongoDBIndexConfig
320
+
305
321
  # DB.Pinecone, DB.Chroma, DB.Redis
306
322
  return EmptyDBCaseConfig
307
323
 
@@ -12,6 +12,7 @@ log = logging.getLogger(__name__)
12
12
 
13
13
  WAITING_FOR_REFRESH_SEC = 30
14
14
  WAITING_FOR_FORCE_MERGE_SEC = 30
15
+ SECONDS_WAITING_FOR_REPLICAS_TO_BE_ENABLED_SEC = 30
15
16
 
16
17
 
17
18
  class AWSOpenSearch(VectorDB):
@@ -52,10 +53,27 @@ class AWSOpenSearch(VectorDB):
52
53
  return AWSOpenSearchIndexConfig
53
54
 
54
55
  def _create_index(self, client: OpenSearch):
56
+ cluster_settings_body = {
57
+ "persistent": {
58
+ "knn.algo_param.index_thread_qty": self.case_config.index_thread_qty,
59
+ "knn.memory.circuit_breaker.limit": self.case_config.cb_threshold,
60
+ }
61
+ }
62
+ client.cluster.put_settings(cluster_settings_body)
55
63
  settings = {
56
64
  "index": {
57
65
  "knn": True,
66
+ "number_of_shards": self.case_config.number_of_shards,
67
+ "number_of_replicas": 0,
68
+ "translog.flush_threshold_size": self.case_config.flush_threshold_size,
69
+ # Setting trans log threshold to 5GB
70
+ **(
71
+ {"knn.algo_param.ef_search": self.case_config.ef_search}
72
+ if self.case_config.engine == AWSOS_Engine.nmslib
73
+ else {}
74
+ ),
58
75
  },
76
+ "refresh_interval": self.case_config.refresh_interval,
59
77
  }
60
78
  mappings = {
61
79
  "properties": {
@@ -145,9 +163,9 @@ class AWSOpenSearch(VectorDB):
145
163
  docvalue_fields=[self.id_col_name],
146
164
  stored_fields="_none_",
147
165
  )
148
- log.info(f"Search took: {resp['took']}")
149
- log.info(f"Search shards: {resp['_shards']}")
150
- log.info(f"Search hits total: {resp['hits']['total']}")
166
+ log.debug(f"Search took: {resp['took']}")
167
+ log.debug(f"Search shards: {resp['_shards']}")
168
+ log.debug(f"Search hits total: {resp['hits']['total']}")
151
169
  return [int(h["fields"][self.id_col_name][0]) for h in resp["hits"]["hits"]]
152
170
  except Exception as e:
153
171
  log.warning(f"Failed to search: {self.index_name} error: {e!s}")
@@ -157,12 +175,37 @@ class AWSOpenSearch(VectorDB):
157
175
  """optimize will be called between insertion and search in performance cases."""
158
176
  # Call refresh first to ensure that all segments are created
159
177
  self._refresh_index()
160
- self._do_force_merge()
178
+ if self.case_config.force_merge_enabled:
179
+ self._do_force_merge()
180
+ self._refresh_index()
181
+ self._update_replicas()
161
182
  # Call refresh again to ensure that the index is ready after force merge.
162
183
  self._refresh_index()
163
184
  # ensure that all graphs are loaded in memory and ready for search
164
185
  self._load_graphs_to_memory()
165
186
 
187
+ def _update_replicas(self):
188
+ index_settings = self.client.indices.get_settings(index=self.index_name)
189
+ current_number_of_replicas = int(index_settings[self.index_name]["settings"]["index"]["number_of_replicas"])
190
+ log.info(
191
+ f"Current Number of replicas are {current_number_of_replicas}"
192
+ f" and changing the replicas to {self.case_config.number_of_replicas}"
193
+ )
194
+ settings_body = {"index": {"number_of_replicas": self.case_config.number_of_replicas}}
195
+ self.client.indices.put_settings(index=self.index_name, body=settings_body)
196
+ self._wait_till_green()
197
+
198
+ def _wait_till_green(self):
199
+ log.info("Wait for index to become green..")
200
+ while True:
201
+ res = self.client.cat.indices(index=self.index_name, h="health", format="json")
202
+ health = res[0]["health"]
203
+ if health != "green":
204
+ break
205
+ log.info(f"The index {self.index_name} has health : {health} and is not green. Retrying")
206
+ time.sleep(SECONDS_WAITING_FOR_REPLICAS_TO_BE_ENABLED_SEC)
207
+ log.info(f"Index {self.index_name} is green..")
208
+
166
209
  def _refresh_index(self):
167
210
  log.debug(f"Starting refresh for index {self.index_name}")
168
211
  while True:
@@ -179,6 +222,12 @@ class AWSOpenSearch(VectorDB):
179
222
  log.debug(f"Completed refresh for index {self.index_name}")
180
223
 
181
224
  def _do_force_merge(self):
225
+ log.info(f"Updating the Index thread qty to {self.case_config.index_thread_qty_during_force_merge}.")
226
+
227
+ cluster_settings_body = {
228
+ "persistent": {"knn.algo_param.index_thread_qty": self.case_config.index_thread_qty_during_force_merge}
229
+ }
230
+ self.client.cluster.put_settings(cluster_settings_body)
182
231
  log.debug(f"Starting force merge for index {self.index_name}")
183
232
  force_merge_endpoint = f"/{self.index_name}/_forcemerge?max_num_segments=1&wait_for_completion=false"
184
233
  force_merge_task_id = self.client.transport.perform_request("POST", force_merge_endpoint)["task"]
@@ -0,0 +1,125 @@
1
+ from typing import Annotated, TypedDict, Unpack
2
+
3
+ import click
4
+ from pydantic import SecretStr
5
+
6
+ from ....cli.cli import (
7
+ CommonTypedDict,
8
+ HNSWFlavor2,
9
+ cli,
10
+ click_parameter_decorators_from_typed_dict,
11
+ run,
12
+ )
13
+ from .. import DB
14
+
15
+
16
+ class AWSOpenSearchTypedDict(TypedDict):
17
+ host: Annotated[str, click.option("--host", type=str, help="Db host", required=True)]
18
+ port: Annotated[int, click.option("--port", type=int, default=443, help="Db Port")]
19
+ user: Annotated[str, click.option("--user", type=str, default="admin", help="Db User")]
20
+ password: Annotated[str, click.option("--password", type=str, help="Db password")]
21
+ number_of_shards: Annotated[
22
+ int,
23
+ click.option("--number-of-shards", type=int, help="Number of primary shards for the index", default=1),
24
+ ]
25
+ number_of_replicas: Annotated[
26
+ int,
27
+ click.option(
28
+ "--number-of-replicas", type=int, help="Number of replica copies for each primary shard", default=1
29
+ ),
30
+ ]
31
+ index_thread_qty: Annotated[
32
+ int,
33
+ click.option(
34
+ "--index-thread-qty",
35
+ type=int,
36
+ help="Thread count for native engine indexing",
37
+ default=4,
38
+ ),
39
+ ]
40
+
41
+ index_thread_qty_during_force_merge: Annotated[
42
+ int,
43
+ click.option(
44
+ "--index-thread-qty-during-force-merge",
45
+ type=int,
46
+ help="Thread count during force merge operations",
47
+ default=4,
48
+ ),
49
+ ]
50
+
51
+ number_of_indexing_clients: Annotated[
52
+ int,
53
+ click.option(
54
+ "--number-of-indexing-clients",
55
+ type=int,
56
+ help="Number of concurrent indexing clients",
57
+ default=1,
58
+ ),
59
+ ]
60
+
61
+ number_of_segments: Annotated[
62
+ int,
63
+ click.option("--number-of-segments", type=int, help="Target number of segments after merging", default=1),
64
+ ]
65
+
66
+ refresh_interval: Annotated[
67
+ int,
68
+ click.option(
69
+ "--refresh-interval", type=str, help="How often to make new data available for search", default="60s"
70
+ ),
71
+ ]
72
+
73
+ force_merge_enabled: Annotated[
74
+ int,
75
+ click.option("--force-merge-enabled", type=bool, help="Whether to perform force merge operation", default=True),
76
+ ]
77
+
78
+ flush_threshold_size: Annotated[
79
+ int,
80
+ click.option(
81
+ "--flush-threshold-size", type=str, help="Size threshold for flushing the transaction log", default="5120mb"
82
+ ),
83
+ ]
84
+
85
+ cb_threshold: Annotated[
86
+ int,
87
+ click.option(
88
+ "--cb-threshold",
89
+ type=str,
90
+ help="k-NN Memory circuit breaker threshold",
91
+ default="50%",
92
+ ),
93
+ ]
94
+
95
+
96
+ class AWSOpenSearchHNSWTypedDict(CommonTypedDict, AWSOpenSearchTypedDict, HNSWFlavor2): ...
97
+
98
+
99
+ @cli.command()
100
+ @click_parameter_decorators_from_typed_dict(AWSOpenSearchHNSWTypedDict)
101
+ def AWSOpenSearch(**parameters: Unpack[AWSOpenSearchHNSWTypedDict]):
102
+ from .config import AWSOpenSearchConfig, AWSOpenSearchIndexConfig
103
+
104
+ run(
105
+ db=DB.AWSOpenSearch,
106
+ db_config=AWSOpenSearchConfig(
107
+ host=parameters["host"],
108
+ port=parameters["port"],
109
+ user=parameters["user"],
110
+ password=SecretStr(parameters["password"]),
111
+ ),
112
+ db_case_config=AWSOpenSearchIndexConfig(
113
+ number_of_shards=parameters["number_of_shards"],
114
+ number_of_replicas=parameters["number_of_replicas"],
115
+ index_thread_qty=parameters["index_thread_qty"],
116
+ number_of_segments=parameters["number_of_segments"],
117
+ refresh_interval=parameters["refresh_interval"],
118
+ force_merge_enabled=parameters["force_merge_enabled"],
119
+ flush_threshold_size=parameters["flush_threshold_size"],
120
+ number_of_indexing_clients=parameters["number_of_indexing_clients"],
121
+ index_thread_qty_during_force_merge=parameters["index_thread_qty_during_force_merge"],
122
+ cb_threshold=parameters["cb_threshold"],
123
+ ),
124
+ **parameters,
125
+ )
@@ -39,6 +39,16 @@ class AWSOpenSearchIndexConfig(BaseModel, DBCaseConfig):
39
39
  efConstruction: int = 256
40
40
  efSearch: int = 256
41
41
  M: int = 16
42
+ index_thread_qty: int | None = 4
43
+ number_of_shards: int | None = 1
44
+ number_of_replicas: int | None = 0
45
+ number_of_segments: int | None = 1
46
+ refresh_interval: str | None = "60s"
47
+ force_merge_enabled: bool | None = True
48
+ flush_threshold_size: str | None = "5120mb"
49
+ number_of_indexing_clients: int | None = 1
50
+ index_thread_qty_during_force_merge: int
51
+ cb_threshold: str | None = "50%"
42
52
 
43
53
  def parse_metric(self) -> str:
44
54
  if self.metric_type == MetricType.IP:
@@ -0,0 +1,53 @@
1
+ from enum import Enum
2
+
3
+ from pydantic import BaseModel, SecretStr
4
+
5
+ from ..api import DBCaseConfig, DBConfig, IndexType, MetricType
6
+
7
+
8
+ class QuantizationType(Enum):
9
+ NONE = "none"
10
+ BINARY = "binary"
11
+ SCALAR = "scalar"
12
+
13
+
14
+ class MongoDBConfig(DBConfig, BaseModel):
15
+ connection_string: SecretStr = "mongodb+srv://<user>:<password>@<cluster_name>.heatl.mongodb.net"
16
+ database: str = "vdb_bench"
17
+
18
+ def to_dict(self) -> dict:
19
+ return {
20
+ "connection_string": self.connection_string.get_secret_value(),
21
+ "database": self.database,
22
+ }
23
+
24
+
25
+ class MongoDBIndexConfig(BaseModel, DBCaseConfig):
26
+ index: IndexType = IndexType.HNSW # MongoDB uses HNSW for vector search
27
+ metric_type: MetricType = MetricType.COSINE
28
+ num_candidates_ratio: int = 10 # Default numCandidates ratio for vector search
29
+ quantization: QuantizationType = QuantizationType.NONE # Quantization type if applicable
30
+
31
+ def parse_metric(self) -> str:
32
+ if self.metric_type == MetricType.L2:
33
+ return "euclidean"
34
+ if self.metric_type == MetricType.IP:
35
+ return "dotProduct"
36
+ return "cosine" # Default to cosine similarity
37
+
38
+ def index_param(self) -> dict:
39
+ return {
40
+ "type": "vectorSearch",
41
+ "fields": [
42
+ {
43
+ "type": "vector",
44
+ "similarity": self.parse_metric(),
45
+ "numDimensions": None, # Will be set in MongoDB class
46
+ "path": "vector", # Vector field name
47
+ "quantization": self.quantization.value,
48
+ }
49
+ ],
50
+ }
51
+
52
+ def search_param(self) -> dict:
53
+ return {"num_candidates_ratio": self.num_candidates_ratio}