vectordb-bench 0.0.29__tar.gz → 1.0.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (234) hide show
  1. {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/.github/workflows/pull_request.yml +1 -0
  2. {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/PKG-INFO +131 -32
  3. {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/README.md +126 -30
  4. vectordb_bench-1.0.0/fig/homepage/bar-chart.png +0 -0
  5. vectordb_bench-1.0.0/fig/homepage/concurrent.png +0 -0
  6. vectordb_bench-1.0.0/fig/homepage/custom.png +0 -0
  7. vectordb_bench-1.0.0/fig/homepage/label_filter.png +0 -0
  8. vectordb_bench-1.0.0/fig/homepage/qp$.png +0 -0
  9. vectordb_bench-1.0.0/fig/homepage/run_test.png +0 -0
  10. vectordb_bench-1.0.0/fig/homepage/streaming.png +0 -0
  11. vectordb_bench-1.0.0/fig/homepage/table.png +0 -0
  12. vectordb_bench-1.0.0/fig/run_test_select_case.png +0 -0
  13. vectordb_bench-1.0.0/fig/run_test_select_db.png +0 -0
  14. vectordb_bench-1.0.0/fig/run_test_submit.png +0 -0
  15. {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/install/requirements_py3.11.txt +1 -0
  16. {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/pyproject.toml +3 -1
  17. {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/__init__.py +14 -27
  18. {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/backend/assembler.py +19 -6
  19. {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/backend/cases.py +186 -23
  20. {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/backend/clients/__init__.py +32 -0
  21. {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/backend/clients/api.py +22 -1
  22. vectordb_bench-1.0.0/vectordb_bench/backend/clients/aws_opensearch/aws_opensearch.py +451 -0
  23. {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/backend/clients/aws_opensearch/cli.py +51 -21
  24. vectordb_bench-1.0.0/vectordb_bench/backend/clients/aws_opensearch/config.py +120 -0
  25. {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/backend/clients/chroma/chroma.py +6 -2
  26. {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/backend/clients/elastic_cloud/config.py +19 -1
  27. vectordb_bench-1.0.0/vectordb_bench/backend/clients/elastic_cloud/elastic_cloud.py +248 -0
  28. vectordb_bench-1.0.0/vectordb_bench/backend/clients/lancedb/cli.py +146 -0
  29. {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/backend/clients/lancedb/config.py +14 -1
  30. {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/backend/clients/lancedb/lancedb.py +21 -9
  31. {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/backend/clients/memorydb/memorydb.py +2 -2
  32. {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/backend/clients/milvus/cli.py +30 -9
  33. {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/backend/clients/milvus/config.py +3 -0
  34. {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/backend/clients/milvus/milvus.py +81 -23
  35. vectordb_bench-1.0.0/vectordb_bench/backend/clients/oceanbase/cli.py +100 -0
  36. vectordb_bench-1.0.0/vectordb_bench/backend/clients/oceanbase/config.py +125 -0
  37. vectordb_bench-1.0.0/vectordb_bench/backend/clients/oceanbase/oceanbase.py +215 -0
  38. {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/backend/clients/pinecone/pinecone.py +39 -25
  39. vectordb_bench-1.0.0/vectordb_bench/backend/clients/qdrant_cloud/config.py +96 -0
  40. {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/backend/clients/qdrant_cloud/qdrant_cloud.py +100 -33
  41. vectordb_bench-1.0.0/vectordb_bench/backend/clients/qdrant_local/cli.py +60 -0
  42. vectordb_bench-1.0.0/vectordb_bench/backend/clients/qdrant_local/config.py +47 -0
  43. vectordb_bench-1.0.0/vectordb_bench/backend/clients/qdrant_local/qdrant_local.py +232 -0
  44. vectordb_bench-1.0.0/vectordb_bench/backend/clients/weaviate_cloud/cli.py +66 -0
  45. {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/backend/clients/weaviate_cloud/config.py +2 -0
  46. {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/backend/clients/weaviate_cloud/weaviate_cloud.py +5 -0
  47. {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/backend/dataset.py +143 -27
  48. vectordb_bench-1.0.0/vectordb_bench/backend/filter.py +76 -0
  49. {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/backend/runner/__init__.py +3 -3
  50. {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/backend/runner/mp_runner.py +52 -39
  51. vectordb_bench-1.0.0/vectordb_bench/backend/runner/rate_runner.py +123 -0
  52. vectordb_bench-1.0.0/vectordb_bench/backend/runner/read_write_runner.py +259 -0
  53. {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/backend/runner/serial_runner.py +56 -23
  54. {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/backend/task_runner.py +48 -20
  55. vectordb_bench-1.0.0/vectordb_bench/cli/batch_cli.py +121 -0
  56. {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/cli/cli.py +59 -1
  57. {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/cli/vectordbbench.py +7 -0
  58. vectordb_bench-1.0.0/vectordb_bench/config-files/batch_sample_config.yml +17 -0
  59. {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/frontend/components/check_results/data.py +16 -11
  60. {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/frontend/components/check_results/filters.py +53 -25
  61. vectordb_bench-1.0.0/vectordb_bench/frontend/components/check_results/headerIcon.py +25 -0
  62. vectordb_bench-1.0.0/vectordb_bench/frontend/components/check_results/nav.py +42 -0
  63. vectordb_bench-1.0.0/vectordb_bench/frontend/components/custom/displayCustomCase.py +72 -0
  64. vectordb_bench-1.0.0/vectordb_bench/frontend/components/custom/displaypPrams.py +29 -0
  65. {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/frontend/components/custom/getCustomConfig.py +10 -0
  66. vectordb_bench-1.0.0/vectordb_bench/frontend/components/label_filter/charts.py +60 -0
  67. vectordb_bench-1.0.0/vectordb_bench/frontend/components/run_test/caseSelector.py +111 -0
  68. {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/frontend/components/run_test/dbSelector.py +9 -5
  69. vectordb_bench-1.0.0/vectordb_bench/frontend/components/run_test/inputWidget.py +48 -0
  70. {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/frontend/components/run_test/submitTask.py +3 -1
  71. vectordb_bench-1.0.0/vectordb_bench/frontend/components/streaming/charts.py +253 -0
  72. vectordb_bench-1.0.0/vectordb_bench/frontend/components/streaming/data.py +62 -0
  73. {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/frontend/components/tables/data.py +1 -1
  74. vectordb_bench-1.0.0/vectordb_bench/frontend/components/welcome/explainPrams.py +66 -0
  75. vectordb_bench-1.0.0/vectordb_bench/frontend/components/welcome/pagestyle.py +106 -0
  76. vectordb_bench-1.0.0/vectordb_bench/frontend/components/welcome/welcomePrams.py +147 -0
  77. {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/frontend/config/dbCaseConfigs.py +420 -41
  78. vectordb_bench-1.0.0/vectordb_bench/frontend/config/styles.py +99 -0
  79. {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/frontend/pages/concurrent.py +5 -1
  80. {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/frontend/pages/custom.py +4 -0
  81. vectordb_bench-1.0.0/vectordb_bench/frontend/pages/label_filter.py +56 -0
  82. {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/frontend/pages/quries_per_dollar.py +5 -1
  83. vectordb_bench-0.0.29/vectordb_bench/frontend/vdb_benchmark.py → vectordb_bench-1.0.0/vectordb_bench/frontend/pages/results.py +5 -1
  84. {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/frontend/pages/run_test.py +3 -3
  85. vectordb_bench-1.0.0/vectordb_bench/frontend/pages/streaming.py +135 -0
  86. {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/frontend/pages/tables.py +4 -0
  87. vectordb_bench-1.0.0/vectordb_bench/frontend/vdb_benchmark.py +31 -0
  88. {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/interface.py +6 -2
  89. {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/metric.py +15 -1
  90. {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/models.py +38 -11
  91. vectordb_bench-1.0.0/vectordb_bench/results/ElasticCloud/result_20250318_standard_elasticcloud.json +5890 -0
  92. vectordb_bench-1.0.0/vectordb_bench/results/Milvus/result_20250509_standard_milvus.json +6138 -0
  93. vectordb_bench-1.0.0/vectordb_bench/results/OpenSearch/result_20250224_standard_opensearch.json +7319 -0
  94. vectordb_bench-1.0.0/vectordb_bench/results/Pinecone/result_20250124_standard_pinecone.json +2365 -0
  95. vectordb_bench-1.0.0/vectordb_bench/results/QdrantCloud/result_20250602_standard_qdrantcloud.json +3556 -0
  96. vectordb_bench-1.0.0/vectordb_bench/results/ZillizCloud/result_20250613_standard_zillizcloud.json +6290 -0
  97. {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/results/dbPrices.json +12 -4
  98. {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench.egg-info/PKG-INFO +131 -32
  99. {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench.egg-info/SOURCES.txt +36 -3
  100. {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench.egg-info/requires.txt +5 -1
  101. vectordb_bench-0.0.29/vectordb_bench/backend/clients/aws_opensearch/aws_opensearch.py +0 -245
  102. vectordb_bench-0.0.29/vectordb_bench/backend/clients/aws_opensearch/config.py +0 -78
  103. vectordb_bench-0.0.29/vectordb_bench/backend/clients/elastic_cloud/elastic_cloud.py +0 -160
  104. vectordb_bench-0.0.29/vectordb_bench/backend/clients/lancedb/cli.py +0 -92
  105. vectordb_bench-0.0.29/vectordb_bench/backend/clients/qdrant_cloud/config.py +0 -40
  106. vectordb_bench-0.0.29/vectordb_bench/backend/clients/weaviate_cloud/cli.py +0 -40
  107. vectordb_bench-0.0.29/vectordb_bench/backend/runner/rate_runner.py +0 -107
  108. vectordb_bench-0.0.29/vectordb_bench/backend/runner/read_write_runner.py +0 -202
  109. vectordb_bench-0.0.29/vectordb_bench/frontend/components/check_results/headerIcon.py +0 -22
  110. vectordb_bench-0.0.29/vectordb_bench/frontend/components/check_results/nav.py +0 -22
  111. vectordb_bench-0.0.29/vectordb_bench/frontend/components/custom/displayCustomCase.py +0 -37
  112. vectordb_bench-0.0.29/vectordb_bench/frontend/components/custom/displaypPrams.py +0 -24
  113. vectordb_bench-0.0.29/vectordb_bench/frontend/components/run_test/caseSelector.py +0 -115
  114. vectordb_bench-0.0.29/vectordb_bench/frontend/config/styles.py +0 -69
  115. vectordb_bench-0.0.29/vectordb_bench/results/ZillizCloud/result_20230727_standard_zillizcloud.json +0 -791
  116. vectordb_bench-0.0.29/vectordb_bench/results/ZillizCloud/result_20230808_standard_zillizcloud.json +0 -679
  117. vectordb_bench-0.0.29/vectordb_bench/results/ZillizCloud/result_20240105_standard_202401_zillizcloud.json +0 -1352
  118. {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/.devcontainer/Dockerfile +0 -0
  119. {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/.devcontainer/devcontainer.json +0 -0
  120. {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/.env.example +0 -0
  121. {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/.github/workflows/publish_package_on_release.yml +0 -0
  122. {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/.gitignore +0 -0
  123. {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/Dockerfile +0 -0
  124. {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/LICENSE +0 -0
  125. {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/Makefile +0 -0
  126. {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/OWNERS +0 -0
  127. {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/fig/custom_case_run_test.png +0 -0
  128. {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/fig/custom_dataset.png +0 -0
  129. {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/install.py +0 -0
  130. {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/setup.cfg +0 -0
  131. {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/tests/conftest.py +0 -0
  132. {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/tests/pytest.ini +0 -0
  133. {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/tests/test_bench_runner.py +0 -0
  134. {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/tests/test_chroma.py +0 -0
  135. {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/tests/test_data_source.py +0 -0
  136. {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/tests/test_dataset.py +0 -0
  137. {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/tests/test_elasticsearch_cloud.py +0 -0
  138. {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/tests/test_models.py +0 -0
  139. {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/tests/test_rate_runner.py +0 -0
  140. {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/tests/test_redis.py +0 -0
  141. {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/tests/test_utils.py +0 -0
  142. {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/tests/ut_cases.py +0 -0
  143. {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/__main__.py +0 -0
  144. {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/backend/__init__.py +0 -0
  145. {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/backend/clients/aliyun_elasticsearch/aliyun_elasticsearch.py +0 -0
  146. {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/backend/clients/aliyun_elasticsearch/config.py +0 -0
  147. {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/backend/clients/aliyun_opensearch/aliyun_opensearch.py +0 -0
  148. {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/backend/clients/aliyun_opensearch/config.py +0 -0
  149. {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/backend/clients/alloydb/alloydb.py +0 -0
  150. {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/backend/clients/alloydb/cli.py +0 -0
  151. {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/backend/clients/alloydb/config.py +0 -0
  152. {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/backend/clients/aws_opensearch/run.py +0 -0
  153. {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/backend/clients/chroma/config.py +0 -0
  154. {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/backend/clients/clickhouse/cli.py +0 -0
  155. {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/backend/clients/clickhouse/clickhouse.py +0 -0
  156. {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/backend/clients/clickhouse/config.py +0 -0
  157. {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/backend/clients/mariadb/cli.py +0 -0
  158. {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/backend/clients/mariadb/config.py +0 -0
  159. {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/backend/clients/mariadb/mariadb.py +0 -0
  160. {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/backend/clients/memorydb/cli.py +0 -0
  161. {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/backend/clients/memorydb/config.py +0 -0
  162. {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/backend/clients/mongodb/config.py +0 -0
  163. {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/backend/clients/mongodb/mongodb.py +0 -0
  164. {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/backend/clients/pgdiskann/cli.py +0 -0
  165. {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/backend/clients/pgdiskann/config.py +0 -0
  166. {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/backend/clients/pgdiskann/pgdiskann.py +0 -0
  167. {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/backend/clients/pgvecto_rs/cli.py +0 -0
  168. {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/backend/clients/pgvecto_rs/config.py +0 -0
  169. {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/backend/clients/pgvecto_rs/pgvecto_rs.py +0 -0
  170. {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/backend/clients/pgvector/cli.py +0 -0
  171. {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/backend/clients/pgvector/config.py +0 -0
  172. {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/backend/clients/pgvector/pgvector.py +0 -0
  173. {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/backend/clients/pgvectorscale/cli.py +0 -0
  174. {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/backend/clients/pgvectorscale/config.py +0 -0
  175. {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/backend/clients/pgvectorscale/pgvectorscale.py +0 -0
  176. {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/backend/clients/pinecone/config.py +0 -0
  177. {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/backend/clients/qdrant_cloud/cli.py +0 -0
  178. {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/backend/clients/redis/cli.py +0 -0
  179. {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/backend/clients/redis/config.py +0 -0
  180. {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/backend/clients/redis/redis.py +0 -0
  181. {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/backend/clients/test/cli.py +0 -0
  182. {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/backend/clients/test/config.py +0 -0
  183. {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/backend/clients/test/test.py +0 -0
  184. {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/backend/clients/tidb/cli.py +0 -0
  185. {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/backend/clients/tidb/config.py +0 -0
  186. {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/backend/clients/tidb/tidb.py +0 -0
  187. {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/backend/clients/vespa/cli.py +0 -0
  188. {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/backend/clients/vespa/config.py +0 -0
  189. {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/backend/clients/vespa/util.py +0 -0
  190. {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/backend/clients/vespa/vespa.py +0 -0
  191. {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/backend/clients/zilliz_cloud/cli.py +0 -0
  192. {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/backend/clients/zilliz_cloud/config.py +0 -0
  193. {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/backend/clients/zilliz_cloud/zilliz_cloud.py +0 -0
  194. {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/backend/data_source.py +0 -0
  195. {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/backend/result_collector.py +0 -0
  196. {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/backend/runner/util.py +0 -0
  197. {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/backend/utils.py +0 -0
  198. {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/base.py +0 -0
  199. {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/cli/__init__.py +0 -0
  200. {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/config-files/sample_config.yml +0 -0
  201. {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/custom/custom_case.json +0 -0
  202. {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/frontend/components/check_results/charts.py +0 -0
  203. {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/frontend/components/check_results/expanderStyle.py +0 -0
  204. {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/frontend/components/check_results/footer.py +0 -0
  205. {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/frontend/components/check_results/priceTable.py +0 -0
  206. {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/frontend/components/check_results/stPageConfig.py +0 -0
  207. {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/frontend/components/concurrent/charts.py +0 -0
  208. {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/frontend/components/custom/initStyle.py +0 -0
  209. {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/frontend/components/get_results/saveAsImage.py +0 -0
  210. {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/frontend/components/run_test/autoRefresh.py +0 -0
  211. {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/frontend/components/run_test/dbConfigSetting.py +0 -0
  212. {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/frontend/components/run_test/generateTasks.py +0 -0
  213. {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/frontend/components/run_test/hideSidebar.py +0 -0
  214. {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/frontend/components/run_test/initStyle.py +0 -0
  215. {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/frontend/config/dbPrices.py +0 -0
  216. {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/frontend/utils.py +0 -0
  217. {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/log_util.py +0 -0
  218. {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/results/ElasticCloud/result_20230727_standard_elasticcloud.json +0 -0
  219. {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/results/ElasticCloud/result_20230808_standard_elasticcloud.json +0 -0
  220. {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/results/Milvus/result_20230727_standard_milvus.json +0 -0
  221. {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/results/Milvus/result_20230808_standard_milvus.json +0 -0
  222. {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/results/PgVector/result_20230727_standard_pgvector.json +0 -0
  223. {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/results/PgVector/result_20230808_standard_pgvector.json +0 -0
  224. {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/results/Pinecone/result_20230727_standard_pinecone.json +0 -0
  225. {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/results/Pinecone/result_20230808_standard_pinecone.json +0 -0
  226. {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/results/QdrantCloud/result_20230727_standard_qdrantcloud.json +0 -0
  227. {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/results/QdrantCloud/result_20230808_standard_qdrantcloud.json +0 -0
  228. {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/results/WeaviateCloud/result_20230727_standard_weaviatecloud.json +0 -0
  229. {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/results/WeaviateCloud/result_20230808_standard_weaviatecloud.json +0 -0
  230. {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/results/getLeaderboardData.py +0 -0
  231. {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/results/leaderboard.json +0 -0
  232. {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench.egg-info/dependency_links.txt +0 -0
  233. {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench.egg-info/entry_points.txt +0 -0
  234. {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench.egg-info/top_level.txt +0 -0
@@ -4,6 +4,7 @@ on:
4
4
  pull_request:
5
5
  branches:
6
6
  - main
7
+ - vdbbench_*
7
8
 
8
9
  jobs:
9
10
  build:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: vectordb-bench
3
- Version: 0.0.29
3
+ Version: 1.0.0
4
4
  Summary: VectorDBBench is not just an offering of benchmark results for mainstream vector databases and cloud services, it's your go-to tool for the ultimate performance and cost-effectiveness comparison. Designed with ease-of-use in mind, VectorDBBench is devised to help users, even non-professionals, reproduce results or test new systems, making the hunt for the optimal choice amongst a plethora of cloud services and open-source vector databases a breeze.
5
5
  Author-email: XuanYang-cn <xuan.yang@zilliz.com>
6
6
  Project-URL: repository, https://github.com/zilliztech/VectorDBBench
@@ -21,7 +21,7 @@ Requires-Dist: oss2
21
21
  Requires-Dist: psutil
22
22
  Requires-Dist: polars
23
23
  Requires-Dist: plotly
24
- Requires-Dist: environs<14.1.0
24
+ Requires-Dist: environs
25
25
  Requires-Dist: pydantic<v2
26
26
  Requires-Dist: scikit-learn
27
27
  Requires-Dist: pymilvus
@@ -53,6 +53,7 @@ Requires-Dist: PyMySQL; extra == "all"
53
53
  Requires-Dist: clickhouse-connect; extra == "all"
54
54
  Requires-Dist: pyvespa; extra == "all"
55
55
  Requires-Dist: lancedb; extra == "all"
56
+ Requires-Dist: mysql-connector-python; extra == "all"
56
57
  Provides-Extra: qdrant
57
58
  Requires-Dist: qdrant-client; extra == "qdrant"
58
59
  Provides-Extra: pinecone
@@ -90,6 +91,8 @@ Provides-Extra: vespa
90
91
  Requires-Dist: pyvespa; extra == "vespa"
91
92
  Provides-Extra: lancedb
92
93
  Requires-Dist: lancedb; extra == "lancedb"
94
+ Provides-Extra: oceanbase
95
+ Requires-Dist: mysql-connector-python; extra == "oceanbase"
93
96
  Dynamic: license-file
94
97
 
95
98
  # VectorDBBench(VDBBench): A Benchmark Tool for VectorDB
@@ -151,6 +154,7 @@ All the database client supported
151
154
  | mongodb | `pip install vectordb-bench[mongodb]` |
152
155
  | tidb | `pip install vectordb-bench[tidb]` |
153
156
  | vespa | `pip install vectordb-bench[vespa]` |
157
+ | oceanbase | `pip install vectordb-bench[oceanbase]` |
154
158
 
155
159
  ### Run
156
160
 
@@ -295,12 +299,81 @@ Options:
295
299
  --force-merge-enabled BOOLEAN Whether to perform force merge operation
296
300
  --flush-threshold-size TEXT Size threshold for flushing the transaction
297
301
  log
302
+ --engine TEXT type of engine to use valid values [faiss, lucene]
298
303
  # Memory Management
299
304
  --cb-threshold TEXT k-NN Memory circuit breaker threshold
305
+
306
+ # Quantization Type
307
+ --quantization-type TEXT which type of quantization to use valid values [fp32, fp16]
308
+ --help Show this message and exit.
309
+ ```
310
+ ### Run OceanBase from command line
311
+
312
+ Execute tests for the index types: HNSW, HNSW_SQ, or HNSW_BQ.
313
+
314
+ ```shell
315
+ vectordbbench oceanbasehnsw --host xxx --port xxx --user root@mysql_tenant --database test \
316
+ --m 16 --ef-construction 200 --case-type Performance1536D50K \
317
+ --index-type HNSW --ef-search 100
318
+ ```
300
319
 
320
+ To list the options for oceanbase, execute `vectordbbench oceanbasehnsw --help`, The following are some OceanBase-specific command-line options.
321
+
322
+ ```text
323
+ $ vectordbbench oceanbasehnsw --help
324
+ Usage: vectordbbench oceanbasehnsw [OPTIONS]
325
+
326
+ Options:
327
+ [...]
328
+ --host TEXT OceanBase host
329
+ --user TEXT OceanBase username [required]
330
+ --password TEXT OceanBase database password
331
+ --database TEXT DataBase name [required]
332
+ --port INTEGER OceanBase port [required]
333
+ --m INTEGER hnsw m [required]
334
+ --ef-construction INTEGER hnsw ef-construction [required]
335
+ --ef-search INTEGER hnsw ef-search [required]
336
+ --index-type [HNSW|HNSW_SQ|HNSW_BQ]
337
+ Type of index to use. Supported values:
338
+ HNSW, HNSW_SQ, HNSW_BQ [required]
301
339
  --help Show this message and exit.
302
340
  ```
303
341
 
342
+ Execute tests for the index types: IVF_FLAT, IVF_SQ8, or IVF_PQ.
343
+
344
+ ```shell
345
+ vectordbbench oceanbaseivf --host xxx --port xxx --user root@mysql_tenant --database test \
346
+ --nlist 1000 --sample_per_nlist 256 --case-type Performance768D1M \
347
+ --index-type IVF_FLAT --ivf_nprobes 100
348
+ ```
349
+
350
+ To list the options for oceanbase, execute `vectordbbench oceanbaseivf --help`, The following are some OceanBase-specific command-line options.
351
+
352
+ ```text
353
+ $ vectordbbench oceanbaseivf --help
354
+ Usage: vectordbbench oceanbaseivf [OPTIONS]
355
+
356
+ Options:
357
+ [...]
358
+ --host TEXT OceanBase host
359
+ --user TEXT OceanBase username [required]
360
+ --password TEXT OceanBase database password
361
+ --database TEXT DataBase name [required]
362
+ --port INTEGER OceanBase port [required]
363
+ --index-type [IVF_FLAT|IVF_SQ8|IVF_PQ]
364
+ Type of index to use. Supported values:
365
+ IVF_FLAT, IVF_SQ8, IVF_PQ [required]
366
+ --nlist INTEGER Number of cluster centers [required]
367
+ --sample_per_nlist INTEGER The cluster centers are calculated by total
368
+ sampling sample_per_nlist * nlist vectors
369
+ [required]
370
+ --ivf_nprobes TEXT How many clustering centers to search during
371
+ the query [required]
372
+ --m INTEGER The number of sub-vectors that each data
373
+ vector is divided into during IVF-PQ
374
+ --help Show this message and exit. Show this message and exit.
375
+ ```
376
+
304
377
  #### Using a configuration file.
305
378
 
306
379
  The vectordbbench command can optionally read some or all the options from a yaml formatted configuration file.
@@ -339,6 +412,49 @@ milvushnsw:
339
412
  > - Options passed on the command line will override the configuration file*
340
413
  > - Parameter names use an _ not -
341
414
 
415
+ #### Using a batch configuration file.
416
+
417
+ The vectordbbench command can read a batch configuration file to run all the test cases in the yaml formatted configuration file.
418
+
419
+ By default, configuration files are expected to be in vectordb_bench/config-files/, this can be overridden by setting
420
+ the environment variable CONFIG_LOCAL_DIR or by passing the full path to the file.
421
+
422
+ The required format is:
423
+ ```yaml
424
+ commandname:
425
+ - parameter_name: parameter_value
426
+ another_parameter_name: parameter_value
427
+ ```
428
+ Example:
429
+ ```yaml
430
+ pgvectorhnsw:
431
+ - db_label: pgConfigTest
432
+ user_name: vectordbbench
433
+ password: vectordbbench
434
+ db_name: vectordbbench
435
+ host: localhost
436
+ m: 16
437
+ ef_construction: 128
438
+ ef_search: 128
439
+ milvushnsw:
440
+ - skip_search_serial: True
441
+ case_type: Performance1536D50K
442
+ uri: http://localhost:19530
443
+ m: 16
444
+ ef_construction: 128
445
+ ef_search: 128
446
+ drop_old: False
447
+ load: False
448
+ ```
449
+ > Notes:
450
+ > - Options can only be passed through configuration files
451
+ > - Parameter names use an _ not -
452
+
453
+ How to use?
454
+ ```shell
455
+ vectordbbench batchcli --batch-config-file <your-yaml-configuration-file>
456
+ ```
457
+
342
458
  ## Leaderboard
343
459
  ### Introduction
344
460
  To facilitate the presentation of test results and provide a comprehensive performance analysis report, we offer a [leaderboard page](https://zilliz.com/benchmark). It allows us to choose from QPS, QP$, and latency metrics, and provides a comprehensive assessment of a system's performance based on the test results of various cases and a set of scoring mechanisms (to be introduced later). On this leaderboard, we can select the systems and models to be compared, and filter out cases we do not want to consider. Comprehensive scores are always ranked from best to worst, and the specific test results of each query will be presented in the list below.
@@ -407,52 +523,35 @@ The standard benchmark results displayed here include all 15 cases that we curre
407
523
 
408
524
  All standard benchmark results are generated by a client running on an 8 core, 32 GB host, which is located in the same region as the server being tested. The client host is equipped with an `Intel(R) Xeon(R) Platinum 8375C CPU @ 2.90GHz` processor. Also all the servers for the open-source systems tested in our benchmarks run on hosts with the same type of processor.
409
525
  ### Run Test Page
410
- ![image](https://github.com/zilliztech/VectorDBBench/assets/105927039/f3135a29-8f12-4aac-bbb3-f2f55e2a2ff0)
411
- This is the page to run a test:
412
526
  1. Initially, you select the systems to be tested - multiple selections are allowed. Once selected, corresponding forms will pop up to gather necessary information for using the chosen databases. The db_label is used to differentiate different instances of the same system. We recommend filling in the host size or instance type here (as we do in our standard results).
413
527
  2. The next step is to select the test cases you want to perform. You can select multiple cases at once, and a form to collect corresponding parameters will appear.
414
528
  3. Finally, you'll need to provide a task label to distinguish different test results. Using the same label for different tests will result in the previous results being overwritten.
415
529
  Now we can only run one task at the same time.
530
+ ![image](fig/run_test_select_db.png)
531
+ ![image](fig/run_test_select_case.png)
532
+ ![image](fig/run_test_submit.png)
533
+
416
534
 
417
535
  ## Module
418
536
  ### Code Structure
419
537
  ![image](https://github.com/zilliztech/VectorDBBench/assets/105927039/8c06512e-5419-4381-b084-9c93aed59639)
420
538
  ### Client
421
- Our client module is designed with flexibility and extensibility in mind, aiming to integrate APIs from different systems seamlessly. As of now, it supports Milvus, Zilliz Cloud, Elastic Search, Pinecone, Qdrant Cloud, Weaviate Cloud, PgVector, Redis, and Chroma. Stay tuned for more options, as we are consistently working on extending our reach to other systems.
539
+ Our client module is designed with flexibility and extensibility in mind, aiming to integrate APIs from different systems seamlessly. As of now, it supports Milvus, Zilliz Cloud, Elastic Search, Pinecone, Qdrant Cloud, Weaviate Cloud, PgVector, Redis, Chroma, etc. Stay tuned for more options, as we are consistently working on extending our reach to other systems.
422
540
  ### Benchmark Cases
423
- We've developed an array of 15 comprehensive benchmark cases to test vector databases' various capabilities, each designed to give you a different piece of the puzzle. These cases are categorized into three main types:
541
+ We've developed lots of comprehensive benchmark cases to test vector databases' various capabilities, each designed to give you a different piece of the puzzle. These cases are categorized into four main types:
424
542
  #### Capacity Case
425
543
  - **Large Dim:** Tests the database's loading capacity by inserting large-dimension vectors (GIST 100K vectors, 960 dimensions) until fully loaded. The final number of inserted vectors is reported.
426
544
  - **Small Dim:** Similar to the Large Dim case but uses small-dimension vectors (SIFT 500K vectors, 128 dimensions).
427
545
  #### Search Performance Case
428
546
  - **XLarge Dataset:** Measures search performance with a massive dataset (LAION 100M vectors, 768 dimensions) at varying parallel levels. The results include index building time, recall, latency, and maximum QPS.
429
- - **Large Dataset:** Similar to the XLarge Dataset case, but uses a slightly smaller dataset (10M-768dim, 5M-1536dim).
430
- - **Medium Dataset:** A case using a medium dataset (1M-768dim, 500K-1536dim).
547
+ - **Large Dataset:** Similar to the XLarge Dataset case, but uses a slightly smaller dataset (10M-1024dim, 10M-768dim, 5M-1536dim).
548
+ - **Medium Dataset:** A case using a medium dataset (1M-1024dim, 1M-768dim, 500K-1536dim).
549
+ - **Small Dataset:** For development (100K-768dim, 50K-1536dim).
431
550
  #### Filtering Search Performance Case
432
- - **Large Dataset, Low Filtering Rate:** Evaluates search performance with a large dataset (10M-768dim, 5M-1536dim) under a low filtering rate (1% vectors) at different parallel levels.
433
- - **Medium Dataset, Low Filtering Rate:** This case uses a medium dataset (1M-768dim, 500K-1536dim) with a similar low filtering rate.
434
- - **Large Dataset, High Filtering Rate:** It tests with a large dataset (10M-768dim, 5M-1536dim) but under a high filtering rate (99% vectors).
435
- - **Medium Dataset, High Filtering Rate:** This case uses a medium dataset (1M-768dim, 500K-1536dim) with a high filtering rate.
436
- For a quick reference, here is a table summarizing the key aspects of each case:
437
-
438
- Case No. | Case Type | Dataset Size | Filtering Rate | Results |
439
- |----------|-----------|--------------|----------------|---------|
440
- 1 | Capacity Case | SIFT 500K vectors, 128 dimensions | N/A | Number of inserted vectors |
441
- 2 | Capacity Case | GIST 100K vectors, 960 dimensions | N/A | Number of inserted vectors |
442
- 3 | Search Performance Case | LAION 100M vectors, 768 dimensions | N/A | Index building time, recall, latency, maximum QPS |
443
- 4 | Search Performance Case | Cohere 10M vectors, 768 dimensions | N/A | Index building time, recall, latency, maximum QPS |
444
- 5 | Search Performance Case | Cohere 1M vectors, 768 dimensions | N/A | Index building time, recall, latency, maximum QPS |
445
- 6 | Filtering Search Performance Case | Cohere 10M vectors, 768 dimensions | 1% vectors | Index building time, recall, latency, maximum QPS |
446
- 7 | Filtering Search Performance Case | Cohere 1M vectors, 768 dimensions | 1% vectors | Index building time, recall, latency, maximum QPS |
447
- 8 | Filtering Search Performance Case | Cohere 10M vectors, 768 dimensions | 99% vectors | Index building time, recall, latency, maximum QPS |
448
- 9 | Filtering Search Performance Case | Cohere 1M vectors, 768 dimensions | 99% vectors | Index building time, recall, latency, maximum QPS |
449
- 10 | Search Performance Case | OpenAI generated 500K vectors, 1536 dimensions | N/A | Index building time, recall, latency, maximum QPS |
450
- 11 | Search Performance Case | OpenAI generated 5M vectors, 1536 dimensions | N/A | Index building time, recall, latency, maximum QPS |
451
- 12 | Filtering Search Performance Case | OpenAI generated 500K vectors, 1536 dimensions | 1% vectors | Index building time, recall, latency, maximum QPS |
452
- 13 | Filtering Search Performance Case | OpenAI generated 5M vectors, 1536 dimensions | 1% vectors | Index building time, recall, latency, maximum QPS |
453
- 14 | Filtering Search Performance Case | OpenAI generated 500K vectors, 1536 dimensions | 99% vectors | Index building time, recall, latency, maximum QPS |
454
- 15 | Filtering Search Performance Case | OpenAI generated 5M vectors, 1536 dimensions | 99% vectors | Index building time, recall, latency, maximum QPS |
455
-
551
+ - **Int-Filter Cases:** Evaluates search performance with int-based filter expression (e.g. "id >= 2,000").
552
+ - **Label-Filter Cases:** Evaluates search performance with label-based filter expressions (e.g., "color == 'red'"). The test includes randomly generated labels to simulate real-world filtering scenarios.
553
+ #### Streaming Cases
554
+ - **Insertion-Under-Load Case:** Evaluates search performance while maintaining a constant insertion workload. VectorDBBench applies a steady stream of insert requests at a fixed rate to simulate real-world scenarios where search operations must perform reliably under continuous data ingestion.
456
555
 
457
556
  Each case provides an in-depth examination of a vector database's abilities, providing you a comprehensive view of the database's performance.
458
557
 
@@ -57,6 +57,7 @@ All the database client supported
57
57
  | mongodb | `pip install vectordb-bench[mongodb]` |
58
58
  | tidb | `pip install vectordb-bench[tidb]` |
59
59
  | vespa | `pip install vectordb-bench[vespa]` |
60
+ | oceanbase | `pip install vectordb-bench[oceanbase]` |
60
61
 
61
62
  ### Run
62
63
 
@@ -201,12 +202,81 @@ Options:
201
202
  --force-merge-enabled BOOLEAN Whether to perform force merge operation
202
203
  --flush-threshold-size TEXT Size threshold for flushing the transaction
203
204
  log
205
+ --engine TEXT type of engine to use valid values [faiss, lucene]
204
206
  # Memory Management
205
207
  --cb-threshold TEXT k-NN Memory circuit breaker threshold
208
+
209
+ # Quantization Type
210
+ --quantization-type TEXT which type of quantization to use valid values [fp32, fp16]
211
+ --help Show this message and exit.
212
+ ```
213
+ ### Run OceanBase from command line
214
+
215
+ Execute tests for the index types: HNSW, HNSW_SQ, or HNSW_BQ.
216
+
217
+ ```shell
218
+ vectordbbench oceanbasehnsw --host xxx --port xxx --user root@mysql_tenant --database test \
219
+ --m 16 --ef-construction 200 --case-type Performance1536D50K \
220
+ --index-type HNSW --ef-search 100
221
+ ```
206
222
 
223
+ To list the options for oceanbase, execute `vectordbbench oceanbasehnsw --help`, The following are some OceanBase-specific command-line options.
224
+
225
+ ```text
226
+ $ vectordbbench oceanbasehnsw --help
227
+ Usage: vectordbbench oceanbasehnsw [OPTIONS]
228
+
229
+ Options:
230
+ [...]
231
+ --host TEXT OceanBase host
232
+ --user TEXT OceanBase username [required]
233
+ --password TEXT OceanBase database password
234
+ --database TEXT DataBase name [required]
235
+ --port INTEGER OceanBase port [required]
236
+ --m INTEGER hnsw m [required]
237
+ --ef-construction INTEGER hnsw ef-construction [required]
238
+ --ef-search INTEGER hnsw ef-search [required]
239
+ --index-type [HNSW|HNSW_SQ|HNSW_BQ]
240
+ Type of index to use. Supported values:
241
+ HNSW, HNSW_SQ, HNSW_BQ [required]
207
242
  --help Show this message and exit.
208
243
  ```
209
244
 
245
+ Execute tests for the index types: IVF_FLAT, IVF_SQ8, or IVF_PQ.
246
+
247
+ ```shell
248
+ vectordbbench oceanbaseivf --host xxx --port xxx --user root@mysql_tenant --database test \
249
+ --nlist 1000 --sample_per_nlist 256 --case-type Performance768D1M \
250
+ --index-type IVF_FLAT --ivf_nprobes 100
251
+ ```
252
+
253
+ To list the options for oceanbase, execute `vectordbbench oceanbaseivf --help`, The following are some OceanBase-specific command-line options.
254
+
255
+ ```text
256
+ $ vectordbbench oceanbaseivf --help
257
+ Usage: vectordbbench oceanbaseivf [OPTIONS]
258
+
259
+ Options:
260
+ [...]
261
+ --host TEXT OceanBase host
262
+ --user TEXT OceanBase username [required]
263
+ --password TEXT OceanBase database password
264
+ --database TEXT DataBase name [required]
265
+ --port INTEGER OceanBase port [required]
266
+ --index-type [IVF_FLAT|IVF_SQ8|IVF_PQ]
267
+ Type of index to use. Supported values:
268
+ IVF_FLAT, IVF_SQ8, IVF_PQ [required]
269
+ --nlist INTEGER Number of cluster centers [required]
270
+ --sample_per_nlist INTEGER The cluster centers are calculated by total
271
+ sampling sample_per_nlist * nlist vectors
272
+ [required]
273
+ --ivf_nprobes TEXT How many clustering centers to search during
274
+ the query [required]
275
+ --m INTEGER The number of sub-vectors that each data
276
+ vector is divided into during IVF-PQ
277
+ --help Show this message and exit. Show this message and exit.
278
+ ```
279
+
210
280
  #### Using a configuration file.
211
281
 
212
282
  The vectordbbench command can optionally read some or all the options from a yaml formatted configuration file.
@@ -245,6 +315,49 @@ milvushnsw:
245
315
  > - Options passed on the command line will override the configuration file*
246
316
  > - Parameter names use an _ not -
247
317
 
318
+ #### Using a batch configuration file.
319
+
320
+ The vectordbbench command can read a batch configuration file to run all the test cases in the yaml formatted configuration file.
321
+
322
+ By default, configuration files are expected to be in vectordb_bench/config-files/, this can be overridden by setting
323
+ the environment variable CONFIG_LOCAL_DIR or by passing the full path to the file.
324
+
325
+ The required format is:
326
+ ```yaml
327
+ commandname:
328
+ - parameter_name: parameter_value
329
+ another_parameter_name: parameter_value
330
+ ```
331
+ Example:
332
+ ```yaml
333
+ pgvectorhnsw:
334
+ - db_label: pgConfigTest
335
+ user_name: vectordbbench
336
+ password: vectordbbench
337
+ db_name: vectordbbench
338
+ host: localhost
339
+ m: 16
340
+ ef_construction: 128
341
+ ef_search: 128
342
+ milvushnsw:
343
+ - skip_search_serial: True
344
+ case_type: Performance1536D50K
345
+ uri: http://localhost:19530
346
+ m: 16
347
+ ef_construction: 128
348
+ ef_search: 128
349
+ drop_old: False
350
+ load: False
351
+ ```
352
+ > Notes:
353
+ > - Options can only be passed through configuration files
354
+ > - Parameter names use an _ not -
355
+
356
+ How to use?
357
+ ```shell
358
+ vectordbbench batchcli --batch-config-file <your-yaml-configuration-file>
359
+ ```
360
+
248
361
  ## Leaderboard
249
362
  ### Introduction
250
363
  To facilitate the presentation of test results and provide a comprehensive performance analysis report, we offer a [leaderboard page](https://zilliz.com/benchmark). It allows us to choose from QPS, QP$, and latency metrics, and provides a comprehensive assessment of a system's performance based on the test results of various cases and a set of scoring mechanisms (to be introduced later). On this leaderboard, we can select the systems and models to be compared, and filter out cases we do not want to consider. Comprehensive scores are always ranked from best to worst, and the specific test results of each query will be presented in the list below.
@@ -313,52 +426,35 @@ The standard benchmark results displayed here include all 15 cases that we curre
313
426
 
314
427
  All standard benchmark results are generated by a client running on an 8 core, 32 GB host, which is located in the same region as the server being tested. The client host is equipped with an `Intel(R) Xeon(R) Platinum 8375C CPU @ 2.90GHz` processor. Also all the servers for the open-source systems tested in our benchmarks run on hosts with the same type of processor.
315
428
  ### Run Test Page
316
- ![image](https://github.com/zilliztech/VectorDBBench/assets/105927039/f3135a29-8f12-4aac-bbb3-f2f55e2a2ff0)
317
- This is the page to run a test:
318
429
  1. Initially, you select the systems to be tested - multiple selections are allowed. Once selected, corresponding forms will pop up to gather necessary information for using the chosen databases. The db_label is used to differentiate different instances of the same system. We recommend filling in the host size or instance type here (as we do in our standard results).
319
430
  2. The next step is to select the test cases you want to perform. You can select multiple cases at once, and a form to collect corresponding parameters will appear.
320
431
  3. Finally, you'll need to provide a task label to distinguish different test results. Using the same label for different tests will result in the previous results being overwritten.
321
432
  Now we can only run one task at the same time.
433
+ ![image](fig/run_test_select_db.png)
434
+ ![image](fig/run_test_select_case.png)
435
+ ![image](fig/run_test_submit.png)
436
+
322
437
 
323
438
  ## Module
324
439
  ### Code Structure
325
440
  ![image](https://github.com/zilliztech/VectorDBBench/assets/105927039/8c06512e-5419-4381-b084-9c93aed59639)
326
441
  ### Client
327
- Our client module is designed with flexibility and extensibility in mind, aiming to integrate APIs from different systems seamlessly. As of now, it supports Milvus, Zilliz Cloud, Elastic Search, Pinecone, Qdrant Cloud, Weaviate Cloud, PgVector, Redis, and Chroma. Stay tuned for more options, as we are consistently working on extending our reach to other systems.
442
+ Our client module is designed with flexibility and extensibility in mind, aiming to integrate APIs from different systems seamlessly. As of now, it supports Milvus, Zilliz Cloud, Elastic Search, Pinecone, Qdrant Cloud, Weaviate Cloud, PgVector, Redis, Chroma, etc. Stay tuned for more options, as we are consistently working on extending our reach to other systems.
328
443
  ### Benchmark Cases
329
- We've developed an array of 15 comprehensive benchmark cases to test vector databases' various capabilities, each designed to give you a different piece of the puzzle. These cases are categorized into three main types:
444
+ We've developed lots of comprehensive benchmark cases to test vector databases' various capabilities, each designed to give you a different piece of the puzzle. These cases are categorized into four main types:
330
445
  #### Capacity Case
331
446
  - **Large Dim:** Tests the database's loading capacity by inserting large-dimension vectors (GIST 100K vectors, 960 dimensions) until fully loaded. The final number of inserted vectors is reported.
332
447
  - **Small Dim:** Similar to the Large Dim case but uses small-dimension vectors (SIFT 500K vectors, 128 dimensions).
333
448
  #### Search Performance Case
334
449
  - **XLarge Dataset:** Measures search performance with a massive dataset (LAION 100M vectors, 768 dimensions) at varying parallel levels. The results include index building time, recall, latency, and maximum QPS.
335
- - **Large Dataset:** Similar to the XLarge Dataset case, but uses a slightly smaller dataset (10M-768dim, 5M-1536dim).
336
- - **Medium Dataset:** A case using a medium dataset (1M-768dim, 500K-1536dim).
450
+ - **Large Dataset:** Similar to the XLarge Dataset case, but uses a slightly smaller dataset (10M-1024dim, 10M-768dim, 5M-1536dim).
451
+ - **Medium Dataset:** A case using a medium dataset (1M-1024dim, 1M-768dim, 500K-1536dim).
452
+ - **Small Dataset:** For development (100K-768dim, 50K-1536dim).
337
453
  #### Filtering Search Performance Case
338
- - **Large Dataset, Low Filtering Rate:** Evaluates search performance with a large dataset (10M-768dim, 5M-1536dim) under a low filtering rate (1% vectors) at different parallel levels.
339
- - **Medium Dataset, Low Filtering Rate:** This case uses a medium dataset (1M-768dim, 500K-1536dim) with a similar low filtering rate.
340
- - **Large Dataset, High Filtering Rate:** It tests with a large dataset (10M-768dim, 5M-1536dim) but under a high filtering rate (99% vectors).
341
- - **Medium Dataset, High Filtering Rate:** This case uses a medium dataset (1M-768dim, 500K-1536dim) with a high filtering rate.
342
- For a quick reference, here is a table summarizing the key aspects of each case:
343
-
344
- Case No. | Case Type | Dataset Size | Filtering Rate | Results |
345
- |----------|-----------|--------------|----------------|---------|
346
- 1 | Capacity Case | SIFT 500K vectors, 128 dimensions | N/A | Number of inserted vectors |
347
- 2 | Capacity Case | GIST 100K vectors, 960 dimensions | N/A | Number of inserted vectors |
348
- 3 | Search Performance Case | LAION 100M vectors, 768 dimensions | N/A | Index building time, recall, latency, maximum QPS |
349
- 4 | Search Performance Case | Cohere 10M vectors, 768 dimensions | N/A | Index building time, recall, latency, maximum QPS |
350
- 5 | Search Performance Case | Cohere 1M vectors, 768 dimensions | N/A | Index building time, recall, latency, maximum QPS |
351
- 6 | Filtering Search Performance Case | Cohere 10M vectors, 768 dimensions | 1% vectors | Index building time, recall, latency, maximum QPS |
352
- 7 | Filtering Search Performance Case | Cohere 1M vectors, 768 dimensions | 1% vectors | Index building time, recall, latency, maximum QPS |
353
- 8 | Filtering Search Performance Case | Cohere 10M vectors, 768 dimensions | 99% vectors | Index building time, recall, latency, maximum QPS |
354
- 9 | Filtering Search Performance Case | Cohere 1M vectors, 768 dimensions | 99% vectors | Index building time, recall, latency, maximum QPS |
355
- 10 | Search Performance Case | OpenAI generated 500K vectors, 1536 dimensions | N/A | Index building time, recall, latency, maximum QPS |
356
- 11 | Search Performance Case | OpenAI generated 5M vectors, 1536 dimensions | N/A | Index building time, recall, latency, maximum QPS |
357
- 12 | Filtering Search Performance Case | OpenAI generated 500K vectors, 1536 dimensions | 1% vectors | Index building time, recall, latency, maximum QPS |
358
- 13 | Filtering Search Performance Case | OpenAI generated 5M vectors, 1536 dimensions | 1% vectors | Index building time, recall, latency, maximum QPS |
359
- 14 | Filtering Search Performance Case | OpenAI generated 500K vectors, 1536 dimensions | 99% vectors | Index building time, recall, latency, maximum QPS |
360
- 15 | Filtering Search Performance Case | OpenAI generated 5M vectors, 1536 dimensions | 99% vectors | Index building time, recall, latency, maximum QPS |
361
-
454
+ - **Int-Filter Cases:** Evaluates search performance with int-based filter expression (e.g. "id >= 2,000").
455
+ - **Label-Filter Cases:** Evaluates search performance with label-based filter expressions (e.g., "color == 'red'"). The test includes randomly generated labels to simulate real-world filtering scenarios.
456
+ #### Streaming Cases
457
+ - **Insertion-Under-Load Case:** Evaluates search performance while maintaining a constant insertion workload. VectorDBBench applies a steady stream of insert requests at a fixed rate to simulate real-world scenarios where search operations must perform reliably under continuous data ingestion.
362
458
 
363
459
  Each case provides an in-depth examination of a vector database's abilities, providing you a comprehensive view of the database's performance.
364
460
 
@@ -24,3 +24,4 @@ scikit-learn
24
24
  pymilvus
25
25
  clickhouse_connect
26
26
  pyvespa
27
+ mysql-connector-python
@@ -35,7 +35,7 @@ dependencies = [
35
35
  "psutil",
36
36
  "polars",
37
37
  "plotly",
38
- "environs<14.1.0",
38
+ "environs",
39
39
  "pydantic<v2",
40
40
  "scikit-learn",
41
41
  "pymilvus", # with pandas, numpy, ujson
@@ -73,6 +73,7 @@ all = [
73
73
  "clickhouse-connect",
74
74
  "pyvespa",
75
75
  "lancedb",
76
+ "mysql-connector-python",
76
77
  ]
77
78
 
78
79
  qdrant = [ "qdrant-client" ]
@@ -96,6 +97,7 @@ tidb = [ "PyMySQL" ]
96
97
  clickhouse = [ "clickhouse-connect" ]
97
98
  vespa = [ "pyvespa" ]
98
99
  lancedb = [ "lancedb" ]
100
+ oceanbase = [ "mysql-connector-python" ]
99
101
 
100
102
  [project.urls]
101
103
  "repository" = "https://github.com/zilliztech/VectorDBBench"
@@ -18,37 +18,16 @@ class config:
18
18
  DEFAULT_DATASET_URL = env.str("DEFAULT_DATASET_URL", AWS_S3_URL)
19
19
  DATASET_LOCAL_DIR = env.path("DATASET_LOCAL_DIR", "/tmp/vectordb_bench/dataset")
20
20
  NUM_PER_BATCH = env.int("NUM_PER_BATCH", 100)
21
+ TIME_PER_BATCH = 1 # 1s. for streaming insertion.
22
+ MAX_INSERT_RETRY = 5
23
+ MAX_SEARCH_RETRY = 5
24
+
25
+ LOAD_MAX_TRY_COUNT = 10
21
26
 
22
27
  DROP_OLD = env.bool("DROP_OLD", True)
23
28
  USE_SHUFFLED_DATA = env.bool("USE_SHUFFLED_DATA", True)
24
29
 
25
- NUM_CONCURRENCY = env.list(
26
- "NUM_CONCURRENCY",
27
- [
28
- 1,
29
- 5,
30
- 10,
31
- 15,
32
- 20,
33
- 25,
34
- 30,
35
- 35,
36
- 40,
37
- 45,
38
- 50,
39
- 55,
40
- 60,
41
- 65,
42
- 70,
43
- 75,
44
- 80,
45
- 85,
46
- 90,
47
- 95,
48
- 100,
49
- ],
50
- subcast=int,
51
- )
30
+ NUM_CONCURRENCY = env.list("NUM_CONCURRENCY", [1, 5, 10, 20, 30, 40, 60, 80], subcast=int)
52
31
 
53
32
  CONCURRENCY_DURATION = 30
54
33
 
@@ -68,6 +47,7 @@ class config:
68
47
 
69
48
  CAPACITY_TIMEOUT_IN_SECONDS = 24 * 3600 # 24h
70
49
  LOAD_TIMEOUT_DEFAULT = 24 * 3600 # 24h
50
+ LOAD_TIMEOUT_768D_100K = 24 * 3600 # 24h
71
51
  LOAD_TIMEOUT_768D_1M = 24 * 3600 # 24h
72
52
  LOAD_TIMEOUT_768D_10M = 240 * 3600 # 10d
73
53
  LOAD_TIMEOUT_768D_100M = 2400 * 3600 # 100d
@@ -75,7 +55,11 @@ class config:
75
55
  LOAD_TIMEOUT_1536D_500K = 24 * 3600 # 24h
76
56
  LOAD_TIMEOUT_1536D_5M = 240 * 3600 # 10d
77
57
 
58
+ LOAD_TIMEOUT_1024D_1M = 24 * 3600 # 24h
59
+ LOAD_TIMEOUT_1024D_10M = 240 * 3600 # 10d
60
+
78
61
  OPTIMIZE_TIMEOUT_DEFAULT = 24 * 3600 # 24h
62
+ OPTIMIZE_TIMEOUT_768D_100K = 24 * 3600 # 24h
79
63
  OPTIMIZE_TIMEOUT_768D_1M = 24 * 3600 # 24h
80
64
  OPTIMIZE_TIMEOUT_768D_10M = 240 * 3600 # 10d
81
65
  OPTIMIZE_TIMEOUT_768D_100M = 2400 * 3600 # 100d
@@ -83,6 +67,9 @@ class config:
83
67
  OPTIMIZE_TIMEOUT_1536D_500K = 24 * 3600 # 24h
84
68
  OPTIMIZE_TIMEOUT_1536D_5M = 240 * 3600 # 10d
85
69
 
70
+ OPTIMIZE_TIMEOUT_1024D_1M = 24 * 3600 # 24h
71
+ OPTIMIZE_TIMEOUT_1024D_10M = 240 * 3600 # 10d
72
+
86
73
  def display(self) -> str:
87
74
  return [
88
75
  i
@@ -1,7 +1,8 @@
1
1
  import logging
2
2
 
3
- from vectordb_bench.backend.clients import EmptyDBCaseConfig
3
+ from vectordb_bench.backend.clients import DB, EmptyDBCaseConfig
4
4
  from vectordb_bench.backend.data_source import DatasetSource
5
+ from vectordb_bench.backend.filter import FilterOp
5
6
  from vectordb_bench.models import TaskConfig
6
7
 
7
8
  from .cases import CaseLabel
@@ -10,6 +11,13 @@ from .task_runner import CaseRunner, RunningStatus, TaskRunner
10
11
  log = logging.getLogger(__name__)
11
12
 
12
13
 
14
+ class FilterNotSupportedError(ValueError):
15
+ """Raised when a filter type is not supported by a vector database."""
16
+
17
+ def __init__(self, db_name: str, filter_type: FilterOp):
18
+ super().__init__(f"{filter_type} Filter test is not supported by {db_name}.")
19
+
20
+
13
21
  class Assembler:
14
22
  @classmethod
15
23
  def assemble(cls, run_id: str, task: TaskConfig, source: DatasetSource) -> CaseRunner:
@@ -39,25 +47,30 @@ class Assembler:
39
47
  runners = [cls.assemble(run_id, task, source) for task in tasks]
40
48
  load_runners = [r for r in runners if r.ca.label == CaseLabel.Load]
41
49
  perf_runners = [r for r in runners if r.ca.label == CaseLabel.Performance]
50
+ streaming_runners = [r for r in runners if r.ca.label == CaseLabel.Streaming]
42
51
 
43
52
  # group by db
44
- db2runner = {}
53
+ db2runner: dict[DB, list[CaseRunner]] = {}
45
54
  for r in perf_runners:
46
55
  db = r.config.db
47
56
  if db not in db2runner:
48
57
  db2runner[db] = []
49
58
  db2runner[db].append(r)
50
59
 
51
- # check dbclient installed
52
- for k in db2runner:
53
- _ = k.init_cls
60
+ # check
61
+ for db, runners in db2runner.items():
62
+ db_instance = db.init_cls
63
+ for runner in runners:
64
+ if not db_instance.filter_supported(runner.ca.filters):
65
+ raise FilterNotSupportedError(db.value, runner.ca.filters.type)
54
66
 
55
67
  # sort by dataset size
56
68
  for _, runner in db2runner.items():
57
- runner.sort(key=lambda x: x.ca.dataset.data.size)
69
+ runner.sort(key=lambda x: (x.ca.dataset.data.size, 0 if x.ca.filters.type == FilterOp.StrEqual else 1))
58
70
 
59
71
  all_runners = []
60
72
  all_runners.extend(load_runners)
73
+ all_runners.extend(streaming_runners)
61
74
  for v in db2runner.values():
62
75
  all_runners.extend(v)
63
76