vectordb-bench 0.0.30__tar.gz → 1.0.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (230) hide show
  1. {vectordb_bench-0.0.30 → vectordb_bench-1.0.0}/.github/workflows/pull_request.yml +1 -0
  2. {vectordb_bench-0.0.30 → vectordb_bench-1.0.0}/PKG-INFO +85 -32
  3. {vectordb_bench-0.0.30 → vectordb_bench-1.0.0}/README.md +80 -30
  4. vectordb_bench-1.0.0/fig/homepage/bar-chart.png +0 -0
  5. vectordb_bench-1.0.0/fig/homepage/concurrent.png +0 -0
  6. vectordb_bench-1.0.0/fig/homepage/custom.png +0 -0
  7. vectordb_bench-1.0.0/fig/homepage/label_filter.png +0 -0
  8. vectordb_bench-1.0.0/fig/homepage/qp$.png +0 -0
  9. vectordb_bench-1.0.0/fig/homepage/run_test.png +0 -0
  10. vectordb_bench-1.0.0/fig/homepage/streaming.png +0 -0
  11. vectordb_bench-1.0.0/fig/homepage/table.png +0 -0
  12. vectordb_bench-1.0.0/fig/run_test_select_case.png +0 -0
  13. vectordb_bench-1.0.0/fig/run_test_select_db.png +0 -0
  14. vectordb_bench-1.0.0/fig/run_test_submit.png +0 -0
  15. {vectordb_bench-0.0.30 → vectordb_bench-1.0.0}/install/requirements_py3.11.txt +1 -0
  16. {vectordb_bench-0.0.30 → vectordb_bench-1.0.0}/pyproject.toml +3 -1
  17. {vectordb_bench-0.0.30 → vectordb_bench-1.0.0}/vectordb_bench/__init__.py +14 -27
  18. {vectordb_bench-0.0.30 → vectordb_bench-1.0.0}/vectordb_bench/backend/assembler.py +19 -6
  19. {vectordb_bench-0.0.30 → vectordb_bench-1.0.0}/vectordb_bench/backend/cases.py +186 -23
  20. {vectordb_bench-0.0.30 → vectordb_bench-1.0.0}/vectordb_bench/backend/clients/__init__.py +16 -0
  21. {vectordb_bench-0.0.30 → vectordb_bench-1.0.0}/vectordb_bench/backend/clients/api.py +22 -1
  22. {vectordb_bench-0.0.30 → vectordb_bench-1.0.0}/vectordb_bench/backend/clients/aws_opensearch/aws_opensearch.py +82 -41
  23. {vectordb_bench-0.0.30 → vectordb_bench-1.0.0}/vectordb_bench/backend/clients/aws_opensearch/config.py +23 -4
  24. {vectordb_bench-0.0.30 → vectordb_bench-1.0.0}/vectordb_bench/backend/clients/chroma/chroma.py +6 -2
  25. {vectordb_bench-0.0.30 → vectordb_bench-1.0.0}/vectordb_bench/backend/clients/elastic_cloud/config.py +19 -1
  26. vectordb_bench-1.0.0/vectordb_bench/backend/clients/elastic_cloud/elastic_cloud.py +248 -0
  27. {vectordb_bench-0.0.30 → vectordb_bench-1.0.0}/vectordb_bench/backend/clients/milvus/config.py +1 -0
  28. {vectordb_bench-0.0.30 → vectordb_bench-1.0.0}/vectordb_bench/backend/clients/milvus/milvus.py +74 -22
  29. vectordb_bench-1.0.0/vectordb_bench/backend/clients/oceanbase/cli.py +100 -0
  30. vectordb_bench-1.0.0/vectordb_bench/backend/clients/oceanbase/config.py +125 -0
  31. vectordb_bench-1.0.0/vectordb_bench/backend/clients/oceanbase/oceanbase.py +215 -0
  32. {vectordb_bench-0.0.30 → vectordb_bench-1.0.0}/vectordb_bench/backend/clients/pinecone/pinecone.py +39 -25
  33. vectordb_bench-1.0.0/vectordb_bench/backend/clients/qdrant_cloud/config.py +96 -0
  34. {vectordb_bench-0.0.30 → vectordb_bench-1.0.0}/vectordb_bench/backend/clients/qdrant_cloud/qdrant_cloud.py +100 -33
  35. {vectordb_bench-0.0.30 → vectordb_bench-1.0.0}/vectordb_bench/backend/dataset.py +143 -27
  36. vectordb_bench-1.0.0/vectordb_bench/backend/filter.py +76 -0
  37. {vectordb_bench-0.0.30 → vectordb_bench-1.0.0}/vectordb_bench/backend/runner/__init__.py +3 -3
  38. {vectordb_bench-0.0.30 → vectordb_bench-1.0.0}/vectordb_bench/backend/runner/mp_runner.py +52 -39
  39. vectordb_bench-1.0.0/vectordb_bench/backend/runner/rate_runner.py +123 -0
  40. vectordb_bench-1.0.0/vectordb_bench/backend/runner/read_write_runner.py +259 -0
  41. {vectordb_bench-0.0.30 → vectordb_bench-1.0.0}/vectordb_bench/backend/runner/serial_runner.py +56 -23
  42. {vectordb_bench-0.0.30 → vectordb_bench-1.0.0}/vectordb_bench/backend/task_runner.py +48 -20
  43. {vectordb_bench-0.0.30 → vectordb_bench-1.0.0}/vectordb_bench/cli/cli.py +59 -1
  44. {vectordb_bench-0.0.30 → vectordb_bench-1.0.0}/vectordb_bench/cli/vectordbbench.py +3 -0
  45. {vectordb_bench-0.0.30 → vectordb_bench-1.0.0}/vectordb_bench/frontend/components/check_results/data.py +16 -11
  46. {vectordb_bench-0.0.30 → vectordb_bench-1.0.0}/vectordb_bench/frontend/components/check_results/filters.py +53 -25
  47. vectordb_bench-1.0.0/vectordb_bench/frontend/components/check_results/headerIcon.py +25 -0
  48. vectordb_bench-1.0.0/vectordb_bench/frontend/components/check_results/nav.py +42 -0
  49. vectordb_bench-1.0.0/vectordb_bench/frontend/components/custom/displayCustomCase.py +72 -0
  50. vectordb_bench-1.0.0/vectordb_bench/frontend/components/custom/displaypPrams.py +29 -0
  51. {vectordb_bench-0.0.30 → vectordb_bench-1.0.0}/vectordb_bench/frontend/components/custom/getCustomConfig.py +10 -0
  52. vectordb_bench-1.0.0/vectordb_bench/frontend/components/label_filter/charts.py +60 -0
  53. vectordb_bench-1.0.0/vectordb_bench/frontend/components/run_test/caseSelector.py +111 -0
  54. {vectordb_bench-0.0.30 → vectordb_bench-1.0.0}/vectordb_bench/frontend/components/run_test/dbSelector.py +9 -5
  55. vectordb_bench-1.0.0/vectordb_bench/frontend/components/run_test/inputWidget.py +48 -0
  56. {vectordb_bench-0.0.30 → vectordb_bench-1.0.0}/vectordb_bench/frontend/components/run_test/submitTask.py +3 -1
  57. vectordb_bench-1.0.0/vectordb_bench/frontend/components/streaming/charts.py +253 -0
  58. vectordb_bench-1.0.0/vectordb_bench/frontend/components/streaming/data.py +62 -0
  59. {vectordb_bench-0.0.30 → vectordb_bench-1.0.0}/vectordb_bench/frontend/components/tables/data.py +1 -1
  60. vectordb_bench-1.0.0/vectordb_bench/frontend/components/welcome/explainPrams.py +66 -0
  61. vectordb_bench-1.0.0/vectordb_bench/frontend/components/welcome/pagestyle.py +106 -0
  62. vectordb_bench-1.0.0/vectordb_bench/frontend/components/welcome/welcomePrams.py +147 -0
  63. {vectordb_bench-0.0.30 → vectordb_bench-1.0.0}/vectordb_bench/frontend/config/dbCaseConfigs.py +307 -40
  64. vectordb_bench-1.0.0/vectordb_bench/frontend/config/styles.py +99 -0
  65. {vectordb_bench-0.0.30 → vectordb_bench-1.0.0}/vectordb_bench/frontend/pages/concurrent.py +5 -1
  66. {vectordb_bench-0.0.30 → vectordb_bench-1.0.0}/vectordb_bench/frontend/pages/custom.py +4 -0
  67. vectordb_bench-1.0.0/vectordb_bench/frontend/pages/label_filter.py +56 -0
  68. {vectordb_bench-0.0.30 → vectordb_bench-1.0.0}/vectordb_bench/frontend/pages/quries_per_dollar.py +5 -1
  69. vectordb_bench-0.0.30/vectordb_bench/frontend/vdb_benchmark.py → vectordb_bench-1.0.0/vectordb_bench/frontend/pages/results.py +5 -1
  70. {vectordb_bench-0.0.30 → vectordb_bench-1.0.0}/vectordb_bench/frontend/pages/run_test.py +3 -3
  71. vectordb_bench-1.0.0/vectordb_bench/frontend/pages/streaming.py +135 -0
  72. {vectordb_bench-0.0.30 → vectordb_bench-1.0.0}/vectordb_bench/frontend/pages/tables.py +4 -0
  73. vectordb_bench-1.0.0/vectordb_bench/frontend/vdb_benchmark.py +31 -0
  74. {vectordb_bench-0.0.30 → vectordb_bench-1.0.0}/vectordb_bench/interface.py +6 -2
  75. {vectordb_bench-0.0.30 → vectordb_bench-1.0.0}/vectordb_bench/metric.py +15 -1
  76. {vectordb_bench-0.0.30 → vectordb_bench-1.0.0}/vectordb_bench/models.py +31 -11
  77. vectordb_bench-1.0.0/vectordb_bench/results/ElasticCloud/result_20250318_standard_elasticcloud.json +5890 -0
  78. vectordb_bench-1.0.0/vectordb_bench/results/Milvus/result_20250509_standard_milvus.json +6138 -0
  79. vectordb_bench-1.0.0/vectordb_bench/results/OpenSearch/result_20250224_standard_opensearch.json +7319 -0
  80. vectordb_bench-1.0.0/vectordb_bench/results/Pinecone/result_20250124_standard_pinecone.json +2365 -0
  81. vectordb_bench-1.0.0/vectordb_bench/results/QdrantCloud/result_20250602_standard_qdrantcloud.json +3556 -0
  82. vectordb_bench-1.0.0/vectordb_bench/results/ZillizCloud/result_20250613_standard_zillizcloud.json +6290 -0
  83. {vectordb_bench-0.0.30 → vectordb_bench-1.0.0}/vectordb_bench/results/dbPrices.json +12 -4
  84. {vectordb_bench-0.0.30 → vectordb_bench-1.0.0}/vectordb_bench.egg-info/PKG-INFO +85 -32
  85. {vectordb_bench-0.0.30 → vectordb_bench-1.0.0}/vectordb_bench.egg-info/SOURCES.txt +31 -3
  86. {vectordb_bench-0.0.30 → vectordb_bench-1.0.0}/vectordb_bench.egg-info/requires.txt +5 -1
  87. vectordb_bench-0.0.30/vectordb_bench/backend/clients/elastic_cloud/elastic_cloud.py +0 -160
  88. vectordb_bench-0.0.30/vectordb_bench/backend/clients/qdrant_cloud/config.py +0 -40
  89. vectordb_bench-0.0.30/vectordb_bench/backend/runner/rate_runner.py +0 -107
  90. vectordb_bench-0.0.30/vectordb_bench/backend/runner/read_write_runner.py +0 -202
  91. vectordb_bench-0.0.30/vectordb_bench/frontend/components/check_results/headerIcon.py +0 -22
  92. vectordb_bench-0.0.30/vectordb_bench/frontend/components/check_results/nav.py +0 -22
  93. vectordb_bench-0.0.30/vectordb_bench/frontend/components/custom/displayCustomCase.py +0 -37
  94. vectordb_bench-0.0.30/vectordb_bench/frontend/components/custom/displaypPrams.py +0 -24
  95. vectordb_bench-0.0.30/vectordb_bench/frontend/components/run_test/caseSelector.py +0 -115
  96. vectordb_bench-0.0.30/vectordb_bench/frontend/config/styles.py +0 -69
  97. vectordb_bench-0.0.30/vectordb_bench/results/ZillizCloud/result_20230727_standard_zillizcloud.json +0 -791
  98. vectordb_bench-0.0.30/vectordb_bench/results/ZillizCloud/result_20230808_standard_zillizcloud.json +0 -679
  99. vectordb_bench-0.0.30/vectordb_bench/results/ZillizCloud/result_20240105_standard_202401_zillizcloud.json +0 -1352
  100. {vectordb_bench-0.0.30 → vectordb_bench-1.0.0}/.devcontainer/Dockerfile +0 -0
  101. {vectordb_bench-0.0.30 → vectordb_bench-1.0.0}/.devcontainer/devcontainer.json +0 -0
  102. {vectordb_bench-0.0.30 → vectordb_bench-1.0.0}/.env.example +0 -0
  103. {vectordb_bench-0.0.30 → vectordb_bench-1.0.0}/.github/workflows/publish_package_on_release.yml +0 -0
  104. {vectordb_bench-0.0.30 → vectordb_bench-1.0.0}/.gitignore +0 -0
  105. {vectordb_bench-0.0.30 → vectordb_bench-1.0.0}/Dockerfile +0 -0
  106. {vectordb_bench-0.0.30 → vectordb_bench-1.0.0}/LICENSE +0 -0
  107. {vectordb_bench-0.0.30 → vectordb_bench-1.0.0}/Makefile +0 -0
  108. {vectordb_bench-0.0.30 → vectordb_bench-1.0.0}/OWNERS +0 -0
  109. {vectordb_bench-0.0.30 → vectordb_bench-1.0.0}/fig/custom_case_run_test.png +0 -0
  110. {vectordb_bench-0.0.30 → vectordb_bench-1.0.0}/fig/custom_dataset.png +0 -0
  111. {vectordb_bench-0.0.30 → vectordb_bench-1.0.0}/install.py +0 -0
  112. {vectordb_bench-0.0.30 → vectordb_bench-1.0.0}/setup.cfg +0 -0
  113. {vectordb_bench-0.0.30 → vectordb_bench-1.0.0}/tests/conftest.py +0 -0
  114. {vectordb_bench-0.0.30 → vectordb_bench-1.0.0}/tests/pytest.ini +0 -0
  115. {vectordb_bench-0.0.30 → vectordb_bench-1.0.0}/tests/test_bench_runner.py +0 -0
  116. {vectordb_bench-0.0.30 → vectordb_bench-1.0.0}/tests/test_chroma.py +0 -0
  117. {vectordb_bench-0.0.30 → vectordb_bench-1.0.0}/tests/test_data_source.py +0 -0
  118. {vectordb_bench-0.0.30 → vectordb_bench-1.0.0}/tests/test_dataset.py +0 -0
  119. {vectordb_bench-0.0.30 → vectordb_bench-1.0.0}/tests/test_elasticsearch_cloud.py +0 -0
  120. {vectordb_bench-0.0.30 → vectordb_bench-1.0.0}/tests/test_models.py +0 -0
  121. {vectordb_bench-0.0.30 → vectordb_bench-1.0.0}/tests/test_rate_runner.py +0 -0
  122. {vectordb_bench-0.0.30 → vectordb_bench-1.0.0}/tests/test_redis.py +0 -0
  123. {vectordb_bench-0.0.30 → vectordb_bench-1.0.0}/tests/test_utils.py +0 -0
  124. {vectordb_bench-0.0.30 → vectordb_bench-1.0.0}/tests/ut_cases.py +0 -0
  125. {vectordb_bench-0.0.30 → vectordb_bench-1.0.0}/vectordb_bench/__main__.py +0 -0
  126. {vectordb_bench-0.0.30 → vectordb_bench-1.0.0}/vectordb_bench/backend/__init__.py +0 -0
  127. {vectordb_bench-0.0.30 → vectordb_bench-1.0.0}/vectordb_bench/backend/clients/aliyun_elasticsearch/aliyun_elasticsearch.py +0 -0
  128. {vectordb_bench-0.0.30 → vectordb_bench-1.0.0}/vectordb_bench/backend/clients/aliyun_elasticsearch/config.py +0 -0
  129. {vectordb_bench-0.0.30 → vectordb_bench-1.0.0}/vectordb_bench/backend/clients/aliyun_opensearch/aliyun_opensearch.py +0 -0
  130. {vectordb_bench-0.0.30 → vectordb_bench-1.0.0}/vectordb_bench/backend/clients/aliyun_opensearch/config.py +0 -0
  131. {vectordb_bench-0.0.30 → vectordb_bench-1.0.0}/vectordb_bench/backend/clients/alloydb/alloydb.py +0 -0
  132. {vectordb_bench-0.0.30 → vectordb_bench-1.0.0}/vectordb_bench/backend/clients/alloydb/cli.py +0 -0
  133. {vectordb_bench-0.0.30 → vectordb_bench-1.0.0}/vectordb_bench/backend/clients/alloydb/config.py +0 -0
  134. {vectordb_bench-0.0.30 → vectordb_bench-1.0.0}/vectordb_bench/backend/clients/aws_opensearch/cli.py +0 -0
  135. {vectordb_bench-0.0.30 → vectordb_bench-1.0.0}/vectordb_bench/backend/clients/aws_opensearch/run.py +0 -0
  136. {vectordb_bench-0.0.30 → vectordb_bench-1.0.0}/vectordb_bench/backend/clients/chroma/config.py +0 -0
  137. {vectordb_bench-0.0.30 → vectordb_bench-1.0.0}/vectordb_bench/backend/clients/clickhouse/cli.py +0 -0
  138. {vectordb_bench-0.0.30 → vectordb_bench-1.0.0}/vectordb_bench/backend/clients/clickhouse/clickhouse.py +0 -0
  139. {vectordb_bench-0.0.30 → vectordb_bench-1.0.0}/vectordb_bench/backend/clients/clickhouse/config.py +0 -0
  140. {vectordb_bench-0.0.30 → vectordb_bench-1.0.0}/vectordb_bench/backend/clients/lancedb/cli.py +0 -0
  141. {vectordb_bench-0.0.30 → vectordb_bench-1.0.0}/vectordb_bench/backend/clients/lancedb/config.py +0 -0
  142. {vectordb_bench-0.0.30 → vectordb_bench-1.0.0}/vectordb_bench/backend/clients/lancedb/lancedb.py +0 -0
  143. {vectordb_bench-0.0.30 → vectordb_bench-1.0.0}/vectordb_bench/backend/clients/mariadb/cli.py +0 -0
  144. {vectordb_bench-0.0.30 → vectordb_bench-1.0.0}/vectordb_bench/backend/clients/mariadb/config.py +0 -0
  145. {vectordb_bench-0.0.30 → vectordb_bench-1.0.0}/vectordb_bench/backend/clients/mariadb/mariadb.py +0 -0
  146. {vectordb_bench-0.0.30 → vectordb_bench-1.0.0}/vectordb_bench/backend/clients/memorydb/cli.py +0 -0
  147. {vectordb_bench-0.0.30 → vectordb_bench-1.0.0}/vectordb_bench/backend/clients/memorydb/config.py +0 -0
  148. {vectordb_bench-0.0.30 → vectordb_bench-1.0.0}/vectordb_bench/backend/clients/memorydb/memorydb.py +0 -0
  149. {vectordb_bench-0.0.30 → vectordb_bench-1.0.0}/vectordb_bench/backend/clients/milvus/cli.py +0 -0
  150. {vectordb_bench-0.0.30 → vectordb_bench-1.0.0}/vectordb_bench/backend/clients/mongodb/config.py +0 -0
  151. {vectordb_bench-0.0.30 → vectordb_bench-1.0.0}/vectordb_bench/backend/clients/mongodb/mongodb.py +0 -0
  152. {vectordb_bench-0.0.30 → vectordb_bench-1.0.0}/vectordb_bench/backend/clients/pgdiskann/cli.py +0 -0
  153. {vectordb_bench-0.0.30 → vectordb_bench-1.0.0}/vectordb_bench/backend/clients/pgdiskann/config.py +0 -0
  154. {vectordb_bench-0.0.30 → vectordb_bench-1.0.0}/vectordb_bench/backend/clients/pgdiskann/pgdiskann.py +0 -0
  155. {vectordb_bench-0.0.30 → vectordb_bench-1.0.0}/vectordb_bench/backend/clients/pgvecto_rs/cli.py +0 -0
  156. {vectordb_bench-0.0.30 → vectordb_bench-1.0.0}/vectordb_bench/backend/clients/pgvecto_rs/config.py +0 -0
  157. {vectordb_bench-0.0.30 → vectordb_bench-1.0.0}/vectordb_bench/backend/clients/pgvecto_rs/pgvecto_rs.py +0 -0
  158. {vectordb_bench-0.0.30 → vectordb_bench-1.0.0}/vectordb_bench/backend/clients/pgvector/cli.py +0 -0
  159. {vectordb_bench-0.0.30 → vectordb_bench-1.0.0}/vectordb_bench/backend/clients/pgvector/config.py +0 -0
  160. {vectordb_bench-0.0.30 → vectordb_bench-1.0.0}/vectordb_bench/backend/clients/pgvector/pgvector.py +0 -0
  161. {vectordb_bench-0.0.30 → vectordb_bench-1.0.0}/vectordb_bench/backend/clients/pgvectorscale/cli.py +0 -0
  162. {vectordb_bench-0.0.30 → vectordb_bench-1.0.0}/vectordb_bench/backend/clients/pgvectorscale/config.py +0 -0
  163. {vectordb_bench-0.0.30 → vectordb_bench-1.0.0}/vectordb_bench/backend/clients/pgvectorscale/pgvectorscale.py +0 -0
  164. {vectordb_bench-0.0.30 → vectordb_bench-1.0.0}/vectordb_bench/backend/clients/pinecone/config.py +0 -0
  165. {vectordb_bench-0.0.30 → vectordb_bench-1.0.0}/vectordb_bench/backend/clients/qdrant_cloud/cli.py +0 -0
  166. {vectordb_bench-0.0.30 → vectordb_bench-1.0.0}/vectordb_bench/backend/clients/qdrant_local/cli.py +0 -0
  167. {vectordb_bench-0.0.30 → vectordb_bench-1.0.0}/vectordb_bench/backend/clients/qdrant_local/config.py +0 -0
  168. {vectordb_bench-0.0.30 → vectordb_bench-1.0.0}/vectordb_bench/backend/clients/qdrant_local/qdrant_local.py +0 -0
  169. {vectordb_bench-0.0.30 → vectordb_bench-1.0.0}/vectordb_bench/backend/clients/redis/cli.py +0 -0
  170. {vectordb_bench-0.0.30 → vectordb_bench-1.0.0}/vectordb_bench/backend/clients/redis/config.py +0 -0
  171. {vectordb_bench-0.0.30 → vectordb_bench-1.0.0}/vectordb_bench/backend/clients/redis/redis.py +0 -0
  172. {vectordb_bench-0.0.30 → vectordb_bench-1.0.0}/vectordb_bench/backend/clients/test/cli.py +0 -0
  173. {vectordb_bench-0.0.30 → vectordb_bench-1.0.0}/vectordb_bench/backend/clients/test/config.py +0 -0
  174. {vectordb_bench-0.0.30 → vectordb_bench-1.0.0}/vectordb_bench/backend/clients/test/test.py +0 -0
  175. {vectordb_bench-0.0.30 → vectordb_bench-1.0.0}/vectordb_bench/backend/clients/tidb/cli.py +0 -0
  176. {vectordb_bench-0.0.30 → vectordb_bench-1.0.0}/vectordb_bench/backend/clients/tidb/config.py +0 -0
  177. {vectordb_bench-0.0.30 → vectordb_bench-1.0.0}/vectordb_bench/backend/clients/tidb/tidb.py +0 -0
  178. {vectordb_bench-0.0.30 → vectordb_bench-1.0.0}/vectordb_bench/backend/clients/vespa/cli.py +0 -0
  179. {vectordb_bench-0.0.30 → vectordb_bench-1.0.0}/vectordb_bench/backend/clients/vespa/config.py +0 -0
  180. {vectordb_bench-0.0.30 → vectordb_bench-1.0.0}/vectordb_bench/backend/clients/vespa/util.py +0 -0
  181. {vectordb_bench-0.0.30 → vectordb_bench-1.0.0}/vectordb_bench/backend/clients/vespa/vespa.py +0 -0
  182. {vectordb_bench-0.0.30 → vectordb_bench-1.0.0}/vectordb_bench/backend/clients/weaviate_cloud/cli.py +0 -0
  183. {vectordb_bench-0.0.30 → vectordb_bench-1.0.0}/vectordb_bench/backend/clients/weaviate_cloud/config.py +0 -0
  184. {vectordb_bench-0.0.30 → vectordb_bench-1.0.0}/vectordb_bench/backend/clients/weaviate_cloud/weaviate_cloud.py +0 -0
  185. {vectordb_bench-0.0.30 → vectordb_bench-1.0.0}/vectordb_bench/backend/clients/zilliz_cloud/cli.py +0 -0
  186. {vectordb_bench-0.0.30 → vectordb_bench-1.0.0}/vectordb_bench/backend/clients/zilliz_cloud/config.py +0 -0
  187. {vectordb_bench-0.0.30 → vectordb_bench-1.0.0}/vectordb_bench/backend/clients/zilliz_cloud/zilliz_cloud.py +0 -0
  188. {vectordb_bench-0.0.30 → vectordb_bench-1.0.0}/vectordb_bench/backend/data_source.py +0 -0
  189. {vectordb_bench-0.0.30 → vectordb_bench-1.0.0}/vectordb_bench/backend/result_collector.py +0 -0
  190. {vectordb_bench-0.0.30 → vectordb_bench-1.0.0}/vectordb_bench/backend/runner/util.py +0 -0
  191. {vectordb_bench-0.0.30 → vectordb_bench-1.0.0}/vectordb_bench/backend/utils.py +0 -0
  192. {vectordb_bench-0.0.30 → vectordb_bench-1.0.0}/vectordb_bench/base.py +0 -0
  193. {vectordb_bench-0.0.30 → vectordb_bench-1.0.0}/vectordb_bench/cli/__init__.py +0 -0
  194. {vectordb_bench-0.0.30 → vectordb_bench-1.0.0}/vectordb_bench/cli/batch_cli.py +0 -0
  195. {vectordb_bench-0.0.30 → vectordb_bench-1.0.0}/vectordb_bench/config-files/batch_sample_config.yml +0 -0
  196. {vectordb_bench-0.0.30 → vectordb_bench-1.0.0}/vectordb_bench/config-files/sample_config.yml +0 -0
  197. {vectordb_bench-0.0.30 → vectordb_bench-1.0.0}/vectordb_bench/custom/custom_case.json +0 -0
  198. {vectordb_bench-0.0.30 → vectordb_bench-1.0.0}/vectordb_bench/frontend/components/check_results/charts.py +0 -0
  199. {vectordb_bench-0.0.30 → vectordb_bench-1.0.0}/vectordb_bench/frontend/components/check_results/expanderStyle.py +0 -0
  200. {vectordb_bench-0.0.30 → vectordb_bench-1.0.0}/vectordb_bench/frontend/components/check_results/footer.py +0 -0
  201. {vectordb_bench-0.0.30 → vectordb_bench-1.0.0}/vectordb_bench/frontend/components/check_results/priceTable.py +0 -0
  202. {vectordb_bench-0.0.30 → vectordb_bench-1.0.0}/vectordb_bench/frontend/components/check_results/stPageConfig.py +0 -0
  203. {vectordb_bench-0.0.30 → vectordb_bench-1.0.0}/vectordb_bench/frontend/components/concurrent/charts.py +0 -0
  204. {vectordb_bench-0.0.30 → vectordb_bench-1.0.0}/vectordb_bench/frontend/components/custom/initStyle.py +0 -0
  205. {vectordb_bench-0.0.30 → vectordb_bench-1.0.0}/vectordb_bench/frontend/components/get_results/saveAsImage.py +0 -0
  206. {vectordb_bench-0.0.30 → vectordb_bench-1.0.0}/vectordb_bench/frontend/components/run_test/autoRefresh.py +0 -0
  207. {vectordb_bench-0.0.30 → vectordb_bench-1.0.0}/vectordb_bench/frontend/components/run_test/dbConfigSetting.py +0 -0
  208. {vectordb_bench-0.0.30 → vectordb_bench-1.0.0}/vectordb_bench/frontend/components/run_test/generateTasks.py +0 -0
  209. {vectordb_bench-0.0.30 → vectordb_bench-1.0.0}/vectordb_bench/frontend/components/run_test/hideSidebar.py +0 -0
  210. {vectordb_bench-0.0.30 → vectordb_bench-1.0.0}/vectordb_bench/frontend/components/run_test/initStyle.py +0 -0
  211. {vectordb_bench-0.0.30 → vectordb_bench-1.0.0}/vectordb_bench/frontend/config/dbPrices.py +0 -0
  212. {vectordb_bench-0.0.30 → vectordb_bench-1.0.0}/vectordb_bench/frontend/utils.py +0 -0
  213. {vectordb_bench-0.0.30 → vectordb_bench-1.0.0}/vectordb_bench/log_util.py +0 -0
  214. {vectordb_bench-0.0.30 → vectordb_bench-1.0.0}/vectordb_bench/results/ElasticCloud/result_20230727_standard_elasticcloud.json +0 -0
  215. {vectordb_bench-0.0.30 → vectordb_bench-1.0.0}/vectordb_bench/results/ElasticCloud/result_20230808_standard_elasticcloud.json +0 -0
  216. {vectordb_bench-0.0.30 → vectordb_bench-1.0.0}/vectordb_bench/results/Milvus/result_20230727_standard_milvus.json +0 -0
  217. {vectordb_bench-0.0.30 → vectordb_bench-1.0.0}/vectordb_bench/results/Milvus/result_20230808_standard_milvus.json +0 -0
  218. {vectordb_bench-0.0.30 → vectordb_bench-1.0.0}/vectordb_bench/results/PgVector/result_20230727_standard_pgvector.json +0 -0
  219. {vectordb_bench-0.0.30 → vectordb_bench-1.0.0}/vectordb_bench/results/PgVector/result_20230808_standard_pgvector.json +0 -0
  220. {vectordb_bench-0.0.30 → vectordb_bench-1.0.0}/vectordb_bench/results/Pinecone/result_20230727_standard_pinecone.json +0 -0
  221. {vectordb_bench-0.0.30 → vectordb_bench-1.0.0}/vectordb_bench/results/Pinecone/result_20230808_standard_pinecone.json +0 -0
  222. {vectordb_bench-0.0.30 → vectordb_bench-1.0.0}/vectordb_bench/results/QdrantCloud/result_20230727_standard_qdrantcloud.json +0 -0
  223. {vectordb_bench-0.0.30 → vectordb_bench-1.0.0}/vectordb_bench/results/QdrantCloud/result_20230808_standard_qdrantcloud.json +0 -0
  224. {vectordb_bench-0.0.30 → vectordb_bench-1.0.0}/vectordb_bench/results/WeaviateCloud/result_20230727_standard_weaviatecloud.json +0 -0
  225. {vectordb_bench-0.0.30 → vectordb_bench-1.0.0}/vectordb_bench/results/WeaviateCloud/result_20230808_standard_weaviatecloud.json +0 -0
  226. {vectordb_bench-0.0.30 → vectordb_bench-1.0.0}/vectordb_bench/results/getLeaderboardData.py +0 -0
  227. {vectordb_bench-0.0.30 → vectordb_bench-1.0.0}/vectordb_bench/results/leaderboard.json +0 -0
  228. {vectordb_bench-0.0.30 → vectordb_bench-1.0.0}/vectordb_bench.egg-info/dependency_links.txt +0 -0
  229. {vectordb_bench-0.0.30 → vectordb_bench-1.0.0}/vectordb_bench.egg-info/entry_points.txt +0 -0
  230. {vectordb_bench-0.0.30 → vectordb_bench-1.0.0}/vectordb_bench.egg-info/top_level.txt +0 -0
@@ -4,6 +4,7 @@ on:
4
4
  pull_request:
5
5
  branches:
6
6
  - main
7
+ - vdbbench_*
7
8
 
8
9
  jobs:
9
10
  build:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: vectordb-bench
3
- Version: 0.0.30
3
+ Version: 1.0.0
4
4
  Summary: VectorDBBench is not just an offering of benchmark results for mainstream vector databases and cloud services, it's your go-to tool for the ultimate performance and cost-effectiveness comparison. Designed with ease-of-use in mind, VectorDBBench is devised to help users, even non-professionals, reproduce results or test new systems, making the hunt for the optimal choice amongst a plethora of cloud services and open-source vector databases a breeze.
5
5
  Author-email: XuanYang-cn <xuan.yang@zilliz.com>
6
6
  Project-URL: repository, https://github.com/zilliztech/VectorDBBench
@@ -21,7 +21,7 @@ Requires-Dist: oss2
21
21
  Requires-Dist: psutil
22
22
  Requires-Dist: polars
23
23
  Requires-Dist: plotly
24
- Requires-Dist: environs<14.1.0
24
+ Requires-Dist: environs
25
25
  Requires-Dist: pydantic<v2
26
26
  Requires-Dist: scikit-learn
27
27
  Requires-Dist: pymilvus
@@ -53,6 +53,7 @@ Requires-Dist: PyMySQL; extra == "all"
53
53
  Requires-Dist: clickhouse-connect; extra == "all"
54
54
  Requires-Dist: pyvespa; extra == "all"
55
55
  Requires-Dist: lancedb; extra == "all"
56
+ Requires-Dist: mysql-connector-python; extra == "all"
56
57
  Provides-Extra: qdrant
57
58
  Requires-Dist: qdrant-client; extra == "qdrant"
58
59
  Provides-Extra: pinecone
@@ -90,6 +91,8 @@ Provides-Extra: vespa
90
91
  Requires-Dist: pyvespa; extra == "vespa"
91
92
  Provides-Extra: lancedb
92
93
  Requires-Dist: lancedb; extra == "lancedb"
94
+ Provides-Extra: oceanbase
95
+ Requires-Dist: mysql-connector-python; extra == "oceanbase"
93
96
  Dynamic: license-file
94
97
 
95
98
  # VectorDBBench(VDBBench): A Benchmark Tool for VectorDB
@@ -151,6 +154,7 @@ All the database client supported
151
154
  | mongodb | `pip install vectordb-bench[mongodb]` |
152
155
  | tidb | `pip install vectordb-bench[tidb]` |
153
156
  | vespa | `pip install vectordb-bench[vespa]` |
157
+ | oceanbase | `pip install vectordb-bench[oceanbase]` |
154
158
 
155
159
  ### Run
156
160
 
@@ -303,6 +307,72 @@ Options:
303
307
  --quantization-type TEXT which type of quantization to use valid values [fp32, fp16]
304
308
  --help Show this message and exit.
305
309
  ```
310
+ ### Run OceanBase from command line
311
+
312
+ Execute tests for the index types: HNSW, HNSW_SQ, or HNSW_BQ.
313
+
314
+ ```shell
315
+ vectordbbench oceanbasehnsw --host xxx --port xxx --user root@mysql_tenant --database test \
316
+ --m 16 --ef-construction 200 --case-type Performance1536D50K \
317
+ --index-type HNSW --ef-search 100
318
+ ```
319
+
320
+ To list the options for oceanbase, execute `vectordbbench oceanbasehnsw --help`, The following are some OceanBase-specific command-line options.
321
+
322
+ ```text
323
+ $ vectordbbench oceanbasehnsw --help
324
+ Usage: vectordbbench oceanbasehnsw [OPTIONS]
325
+
326
+ Options:
327
+ [...]
328
+ --host TEXT OceanBase host
329
+ --user TEXT OceanBase username [required]
330
+ --password TEXT OceanBase database password
331
+ --database TEXT DataBase name [required]
332
+ --port INTEGER OceanBase port [required]
333
+ --m INTEGER hnsw m [required]
334
+ --ef-construction INTEGER hnsw ef-construction [required]
335
+ --ef-search INTEGER hnsw ef-search [required]
336
+ --index-type [HNSW|HNSW_SQ|HNSW_BQ]
337
+ Type of index to use. Supported values:
338
+ HNSW, HNSW_SQ, HNSW_BQ [required]
339
+ --help Show this message and exit.
340
+ ```
341
+
342
+ Execute tests for the index types: IVF_FLAT, IVF_SQ8, or IVF_PQ.
343
+
344
+ ```shell
345
+ vectordbbench oceanbaseivf --host xxx --port xxx --user root@mysql_tenant --database test \
346
+ --nlist 1000 --sample_per_nlist 256 --case-type Performance768D1M \
347
+ --index-type IVF_FLAT --ivf_nprobes 100
348
+ ```
349
+
350
+ To list the options for oceanbase, execute `vectordbbench oceanbaseivf --help`, The following are some OceanBase-specific command-line options.
351
+
352
+ ```text
353
+ $ vectordbbench oceanbaseivf --help
354
+ Usage: vectordbbench oceanbaseivf [OPTIONS]
355
+
356
+ Options:
357
+ [...]
358
+ --host TEXT OceanBase host
359
+ --user TEXT OceanBase username [required]
360
+ --password TEXT OceanBase database password
361
+ --database TEXT DataBase name [required]
362
+ --port INTEGER OceanBase port [required]
363
+ --index-type [IVF_FLAT|IVF_SQ8|IVF_PQ]
364
+ Type of index to use. Supported values:
365
+ IVF_FLAT, IVF_SQ8, IVF_PQ [required]
366
+ --nlist INTEGER Number of cluster centers [required]
367
+ --sample_per_nlist INTEGER The cluster centers are calculated by total
368
+ sampling sample_per_nlist * nlist vectors
369
+ [required]
370
+ --ivf_nprobes TEXT How many clustering centers to search during
371
+ the query [required]
372
+ --m INTEGER The number of sub-vectors that each data
373
+ vector is divided into during IVF-PQ
374
+ --help Show this message and exit. Show this message and exit.
375
+ ```
306
376
 
307
377
  #### Using a configuration file.
308
378
 
@@ -453,52 +523,35 @@ The standard benchmark results displayed here include all 15 cases that we curre
453
523
 
454
524
  All standard benchmark results are generated by a client running on an 8 core, 32 GB host, which is located in the same region as the server being tested. The client host is equipped with an `Intel(R) Xeon(R) Platinum 8375C CPU @ 2.90GHz` processor. Also all the servers for the open-source systems tested in our benchmarks run on hosts with the same type of processor.
455
525
  ### Run Test Page
456
- ![image](https://github.com/zilliztech/VectorDBBench/assets/105927039/f3135a29-8f12-4aac-bbb3-f2f55e2a2ff0)
457
- This is the page to run a test:
458
526
  1. Initially, you select the systems to be tested - multiple selections are allowed. Once selected, corresponding forms will pop up to gather necessary information for using the chosen databases. The db_label is used to differentiate different instances of the same system. We recommend filling in the host size or instance type here (as we do in our standard results).
459
527
  2. The next step is to select the test cases you want to perform. You can select multiple cases at once, and a form to collect corresponding parameters will appear.
460
528
  3. Finally, you'll need to provide a task label to distinguish different test results. Using the same label for different tests will result in the previous results being overwritten.
461
529
  Now we can only run one task at the same time.
530
+ ![image](fig/run_test_select_db.png)
531
+ ![image](fig/run_test_select_case.png)
532
+ ![image](fig/run_test_submit.png)
533
+
462
534
 
463
535
  ## Module
464
536
  ### Code Structure
465
537
  ![image](https://github.com/zilliztech/VectorDBBench/assets/105927039/8c06512e-5419-4381-b084-9c93aed59639)
466
538
  ### Client
467
- Our client module is designed with flexibility and extensibility in mind, aiming to integrate APIs from different systems seamlessly. As of now, it supports Milvus, Zilliz Cloud, Elastic Search, Pinecone, Qdrant Cloud, Weaviate Cloud, PgVector, Redis, and Chroma. Stay tuned for more options, as we are consistently working on extending our reach to other systems.
539
+ Our client module is designed with flexibility and extensibility in mind, aiming to integrate APIs from different systems seamlessly. As of now, it supports Milvus, Zilliz Cloud, Elastic Search, Pinecone, Qdrant Cloud, Weaviate Cloud, PgVector, Redis, Chroma, etc. Stay tuned for more options, as we are consistently working on extending our reach to other systems.
468
540
  ### Benchmark Cases
469
- We've developed an array of 15 comprehensive benchmark cases to test vector databases' various capabilities, each designed to give you a different piece of the puzzle. These cases are categorized into three main types:
541
+ We've developed lots of comprehensive benchmark cases to test vector databases' various capabilities, each designed to give you a different piece of the puzzle. These cases are categorized into four main types:
470
542
  #### Capacity Case
471
543
  - **Large Dim:** Tests the database's loading capacity by inserting large-dimension vectors (GIST 100K vectors, 960 dimensions) until fully loaded. The final number of inserted vectors is reported.
472
544
  - **Small Dim:** Similar to the Large Dim case but uses small-dimension vectors (SIFT 500K vectors, 128 dimensions).
473
545
  #### Search Performance Case
474
546
  - **XLarge Dataset:** Measures search performance with a massive dataset (LAION 100M vectors, 768 dimensions) at varying parallel levels. The results include index building time, recall, latency, and maximum QPS.
475
- - **Large Dataset:** Similar to the XLarge Dataset case, but uses a slightly smaller dataset (10M-768dim, 5M-1536dim).
476
- - **Medium Dataset:** A case using a medium dataset (1M-768dim, 500K-1536dim).
547
+ - **Large Dataset:** Similar to the XLarge Dataset case, but uses a slightly smaller dataset (10M-1024dim, 10M-768dim, 5M-1536dim).
548
+ - **Medium Dataset:** A case using a medium dataset (1M-1024dim, 1M-768dim, 500K-1536dim).
549
+ - **Small Dataset:** For development (100K-768dim, 50K-1536dim).
477
550
  #### Filtering Search Performance Case
478
- - **Large Dataset, Low Filtering Rate:** Evaluates search performance with a large dataset (10M-768dim, 5M-1536dim) under a low filtering rate (1% vectors) at different parallel levels.
479
- - **Medium Dataset, Low Filtering Rate:** This case uses a medium dataset (1M-768dim, 500K-1536dim) with a similar low filtering rate.
480
- - **Large Dataset, High Filtering Rate:** It tests with a large dataset (10M-768dim, 5M-1536dim) but under a high filtering rate (99% vectors).
481
- - **Medium Dataset, High Filtering Rate:** This case uses a medium dataset (1M-768dim, 500K-1536dim) with a high filtering rate.
482
- For a quick reference, here is a table summarizing the key aspects of each case:
483
-
484
- Case No. | Case Type | Dataset Size | Filtering Rate | Results |
485
- |----------|-----------|--------------|----------------|---------|
486
- 1 | Capacity Case | SIFT 500K vectors, 128 dimensions | N/A | Number of inserted vectors |
487
- 2 | Capacity Case | GIST 100K vectors, 960 dimensions | N/A | Number of inserted vectors |
488
- 3 | Search Performance Case | LAION 100M vectors, 768 dimensions | N/A | Index building time, recall, latency, maximum QPS |
489
- 4 | Search Performance Case | Cohere 10M vectors, 768 dimensions | N/A | Index building time, recall, latency, maximum QPS |
490
- 5 | Search Performance Case | Cohere 1M vectors, 768 dimensions | N/A | Index building time, recall, latency, maximum QPS |
491
- 6 | Filtering Search Performance Case | Cohere 10M vectors, 768 dimensions | 1% vectors | Index building time, recall, latency, maximum QPS |
492
- 7 | Filtering Search Performance Case | Cohere 1M vectors, 768 dimensions | 1% vectors | Index building time, recall, latency, maximum QPS |
493
- 8 | Filtering Search Performance Case | Cohere 10M vectors, 768 dimensions | 99% vectors | Index building time, recall, latency, maximum QPS |
494
- 9 | Filtering Search Performance Case | Cohere 1M vectors, 768 dimensions | 99% vectors | Index building time, recall, latency, maximum QPS |
495
- 10 | Search Performance Case | OpenAI generated 500K vectors, 1536 dimensions | N/A | Index building time, recall, latency, maximum QPS |
496
- 11 | Search Performance Case | OpenAI generated 5M vectors, 1536 dimensions | N/A | Index building time, recall, latency, maximum QPS |
497
- 12 | Filtering Search Performance Case | OpenAI generated 500K vectors, 1536 dimensions | 1% vectors | Index building time, recall, latency, maximum QPS |
498
- 13 | Filtering Search Performance Case | OpenAI generated 5M vectors, 1536 dimensions | 1% vectors | Index building time, recall, latency, maximum QPS |
499
- 14 | Filtering Search Performance Case | OpenAI generated 500K vectors, 1536 dimensions | 99% vectors | Index building time, recall, latency, maximum QPS |
500
- 15 | Filtering Search Performance Case | OpenAI generated 5M vectors, 1536 dimensions | 99% vectors | Index building time, recall, latency, maximum QPS |
501
-
551
+ - **Int-Filter Cases:** Evaluates search performance with int-based filter expression (e.g. "id >= 2,000").
552
+ - **Label-Filter Cases:** Evaluates search performance with label-based filter expressions (e.g., "color == 'red'"). The test includes randomly generated labels to simulate real-world filtering scenarios.
553
+ #### Streaming Cases
554
+ - **Insertion-Under-Load Case:** Evaluates search performance while maintaining a constant insertion workload. VectorDBBench applies a steady stream of insert requests at a fixed rate to simulate real-world scenarios where search operations must perform reliably under continuous data ingestion.
502
555
 
503
556
  Each case provides an in-depth examination of a vector database's abilities, providing you a comprehensive view of the database's performance.
504
557
 
@@ -57,6 +57,7 @@ All the database client supported
57
57
  | mongodb | `pip install vectordb-bench[mongodb]` |
58
58
  | tidb | `pip install vectordb-bench[tidb]` |
59
59
  | vespa | `pip install vectordb-bench[vespa]` |
60
+ | oceanbase | `pip install vectordb-bench[oceanbase]` |
60
61
 
61
62
  ### Run
62
63
 
@@ -209,6 +210,72 @@ Options:
209
210
  --quantization-type TEXT which type of quantization to use valid values [fp32, fp16]
210
211
  --help Show this message and exit.
211
212
  ```
213
+ ### Run OceanBase from command line
214
+
215
+ Execute tests for the index types: HNSW, HNSW_SQ, or HNSW_BQ.
216
+
217
+ ```shell
218
+ vectordbbench oceanbasehnsw --host xxx --port xxx --user root@mysql_tenant --database test \
219
+ --m 16 --ef-construction 200 --case-type Performance1536D50K \
220
+ --index-type HNSW --ef-search 100
221
+ ```
222
+
223
+ To list the options for oceanbase, execute `vectordbbench oceanbasehnsw --help`, The following are some OceanBase-specific command-line options.
224
+
225
+ ```text
226
+ $ vectordbbench oceanbasehnsw --help
227
+ Usage: vectordbbench oceanbasehnsw [OPTIONS]
228
+
229
+ Options:
230
+ [...]
231
+ --host TEXT OceanBase host
232
+ --user TEXT OceanBase username [required]
233
+ --password TEXT OceanBase database password
234
+ --database TEXT DataBase name [required]
235
+ --port INTEGER OceanBase port [required]
236
+ --m INTEGER hnsw m [required]
237
+ --ef-construction INTEGER hnsw ef-construction [required]
238
+ --ef-search INTEGER hnsw ef-search [required]
239
+ --index-type [HNSW|HNSW_SQ|HNSW_BQ]
240
+ Type of index to use. Supported values:
241
+ HNSW, HNSW_SQ, HNSW_BQ [required]
242
+ --help Show this message and exit.
243
+ ```
244
+
245
+ Execute tests for the index types: IVF_FLAT, IVF_SQ8, or IVF_PQ.
246
+
247
+ ```shell
248
+ vectordbbench oceanbaseivf --host xxx --port xxx --user root@mysql_tenant --database test \
249
+ --nlist 1000 --sample_per_nlist 256 --case-type Performance768D1M \
250
+ --index-type IVF_FLAT --ivf_nprobes 100
251
+ ```
252
+
253
+ To list the options for oceanbase, execute `vectordbbench oceanbaseivf --help`, The following are some OceanBase-specific command-line options.
254
+
255
+ ```text
256
+ $ vectordbbench oceanbaseivf --help
257
+ Usage: vectordbbench oceanbaseivf [OPTIONS]
258
+
259
+ Options:
260
+ [...]
261
+ --host TEXT OceanBase host
262
+ --user TEXT OceanBase username [required]
263
+ --password TEXT OceanBase database password
264
+ --database TEXT DataBase name [required]
265
+ --port INTEGER OceanBase port [required]
266
+ --index-type [IVF_FLAT|IVF_SQ8|IVF_PQ]
267
+ Type of index to use. Supported values:
268
+ IVF_FLAT, IVF_SQ8, IVF_PQ [required]
269
+ --nlist INTEGER Number of cluster centers [required]
270
+ --sample_per_nlist INTEGER The cluster centers are calculated by total
271
+ sampling sample_per_nlist * nlist vectors
272
+ [required]
273
+ --ivf_nprobes TEXT How many clustering centers to search during
274
+ the query [required]
275
+ --m INTEGER The number of sub-vectors that each data
276
+ vector is divided into during IVF-PQ
277
+ --help Show this message and exit. Show this message and exit.
278
+ ```
212
279
 
213
280
  #### Using a configuration file.
214
281
 
@@ -359,52 +426,35 @@ The standard benchmark results displayed here include all 15 cases that we curre
359
426
 
360
427
  All standard benchmark results are generated by a client running on an 8 core, 32 GB host, which is located in the same region as the server being tested. The client host is equipped with an `Intel(R) Xeon(R) Platinum 8375C CPU @ 2.90GHz` processor. Also all the servers for the open-source systems tested in our benchmarks run on hosts with the same type of processor.
361
428
  ### Run Test Page
362
- ![image](https://github.com/zilliztech/VectorDBBench/assets/105927039/f3135a29-8f12-4aac-bbb3-f2f55e2a2ff0)
363
- This is the page to run a test:
364
429
  1. Initially, you select the systems to be tested - multiple selections are allowed. Once selected, corresponding forms will pop up to gather necessary information for using the chosen databases. The db_label is used to differentiate different instances of the same system. We recommend filling in the host size or instance type here (as we do in our standard results).
365
430
  2. The next step is to select the test cases you want to perform. You can select multiple cases at once, and a form to collect corresponding parameters will appear.
366
431
  3. Finally, you'll need to provide a task label to distinguish different test results. Using the same label for different tests will result in the previous results being overwritten.
367
432
  Now we can only run one task at the same time.
433
+ ![image](fig/run_test_select_db.png)
434
+ ![image](fig/run_test_select_case.png)
435
+ ![image](fig/run_test_submit.png)
436
+
368
437
 
369
438
  ## Module
370
439
  ### Code Structure
371
440
  ![image](https://github.com/zilliztech/VectorDBBench/assets/105927039/8c06512e-5419-4381-b084-9c93aed59639)
372
441
  ### Client
373
- Our client module is designed with flexibility and extensibility in mind, aiming to integrate APIs from different systems seamlessly. As of now, it supports Milvus, Zilliz Cloud, Elastic Search, Pinecone, Qdrant Cloud, Weaviate Cloud, PgVector, Redis, and Chroma. Stay tuned for more options, as we are consistently working on extending our reach to other systems.
442
+ Our client module is designed with flexibility and extensibility in mind, aiming to integrate APIs from different systems seamlessly. As of now, it supports Milvus, Zilliz Cloud, Elastic Search, Pinecone, Qdrant Cloud, Weaviate Cloud, PgVector, Redis, Chroma, etc. Stay tuned for more options, as we are consistently working on extending our reach to other systems.
374
443
  ### Benchmark Cases
375
- We've developed an array of 15 comprehensive benchmark cases to test vector databases' various capabilities, each designed to give you a different piece of the puzzle. These cases are categorized into three main types:
444
+ We've developed lots of comprehensive benchmark cases to test vector databases' various capabilities, each designed to give you a different piece of the puzzle. These cases are categorized into four main types:
376
445
  #### Capacity Case
377
446
  - **Large Dim:** Tests the database's loading capacity by inserting large-dimension vectors (GIST 100K vectors, 960 dimensions) until fully loaded. The final number of inserted vectors is reported.
378
447
  - **Small Dim:** Similar to the Large Dim case but uses small-dimension vectors (SIFT 500K vectors, 128 dimensions).
379
448
  #### Search Performance Case
380
449
  - **XLarge Dataset:** Measures search performance with a massive dataset (LAION 100M vectors, 768 dimensions) at varying parallel levels. The results include index building time, recall, latency, and maximum QPS.
381
- - **Large Dataset:** Similar to the XLarge Dataset case, but uses a slightly smaller dataset (10M-768dim, 5M-1536dim).
382
- - **Medium Dataset:** A case using a medium dataset (1M-768dim, 500K-1536dim).
450
+ - **Large Dataset:** Similar to the XLarge Dataset case, but uses a slightly smaller dataset (10M-1024dim, 10M-768dim, 5M-1536dim).
451
+ - **Medium Dataset:** A case using a medium dataset (1M-1024dim, 1M-768dim, 500K-1536dim).
452
+ - **Small Dataset:** For development (100K-768dim, 50K-1536dim).
383
453
  #### Filtering Search Performance Case
384
- - **Large Dataset, Low Filtering Rate:** Evaluates search performance with a large dataset (10M-768dim, 5M-1536dim) under a low filtering rate (1% vectors) at different parallel levels.
385
- - **Medium Dataset, Low Filtering Rate:** This case uses a medium dataset (1M-768dim, 500K-1536dim) with a similar low filtering rate.
386
- - **Large Dataset, High Filtering Rate:** It tests with a large dataset (10M-768dim, 5M-1536dim) but under a high filtering rate (99% vectors).
387
- - **Medium Dataset, High Filtering Rate:** This case uses a medium dataset (1M-768dim, 500K-1536dim) with a high filtering rate.
388
- For a quick reference, here is a table summarizing the key aspects of each case:
389
-
390
- Case No. | Case Type | Dataset Size | Filtering Rate | Results |
391
- |----------|-----------|--------------|----------------|---------|
392
- 1 | Capacity Case | SIFT 500K vectors, 128 dimensions | N/A | Number of inserted vectors |
393
- 2 | Capacity Case | GIST 100K vectors, 960 dimensions | N/A | Number of inserted vectors |
394
- 3 | Search Performance Case | LAION 100M vectors, 768 dimensions | N/A | Index building time, recall, latency, maximum QPS |
395
- 4 | Search Performance Case | Cohere 10M vectors, 768 dimensions | N/A | Index building time, recall, latency, maximum QPS |
396
- 5 | Search Performance Case | Cohere 1M vectors, 768 dimensions | N/A | Index building time, recall, latency, maximum QPS |
397
- 6 | Filtering Search Performance Case | Cohere 10M vectors, 768 dimensions | 1% vectors | Index building time, recall, latency, maximum QPS |
398
- 7 | Filtering Search Performance Case | Cohere 1M vectors, 768 dimensions | 1% vectors | Index building time, recall, latency, maximum QPS |
399
- 8 | Filtering Search Performance Case | Cohere 10M vectors, 768 dimensions | 99% vectors | Index building time, recall, latency, maximum QPS |
400
- 9 | Filtering Search Performance Case | Cohere 1M vectors, 768 dimensions | 99% vectors | Index building time, recall, latency, maximum QPS |
401
- 10 | Search Performance Case | OpenAI generated 500K vectors, 1536 dimensions | N/A | Index building time, recall, latency, maximum QPS |
402
- 11 | Search Performance Case | OpenAI generated 5M vectors, 1536 dimensions | N/A | Index building time, recall, latency, maximum QPS |
403
- 12 | Filtering Search Performance Case | OpenAI generated 500K vectors, 1536 dimensions | 1% vectors | Index building time, recall, latency, maximum QPS |
404
- 13 | Filtering Search Performance Case | OpenAI generated 5M vectors, 1536 dimensions | 1% vectors | Index building time, recall, latency, maximum QPS |
405
- 14 | Filtering Search Performance Case | OpenAI generated 500K vectors, 1536 dimensions | 99% vectors | Index building time, recall, latency, maximum QPS |
406
- 15 | Filtering Search Performance Case | OpenAI generated 5M vectors, 1536 dimensions | 99% vectors | Index building time, recall, latency, maximum QPS |
407
-
454
+ - **Int-Filter Cases:** Evaluates search performance with int-based filter expression (e.g. "id >= 2,000").
455
+ - **Label-Filter Cases:** Evaluates search performance with label-based filter expressions (e.g., "color == 'red'"). The test includes randomly generated labels to simulate real-world filtering scenarios.
456
+ #### Streaming Cases
457
+ - **Insertion-Under-Load Case:** Evaluates search performance while maintaining a constant insertion workload. VectorDBBench applies a steady stream of insert requests at a fixed rate to simulate real-world scenarios where search operations must perform reliably under continuous data ingestion.
408
458
 
409
459
  Each case provides an in-depth examination of a vector database's abilities, providing you a comprehensive view of the database's performance.
410
460
 
@@ -24,3 +24,4 @@ scikit-learn
24
24
  pymilvus
25
25
  clickhouse_connect
26
26
  pyvespa
27
+ mysql-connector-python
@@ -35,7 +35,7 @@ dependencies = [
35
35
  "psutil",
36
36
  "polars",
37
37
  "plotly",
38
- "environs<14.1.0",
38
+ "environs",
39
39
  "pydantic<v2",
40
40
  "scikit-learn",
41
41
  "pymilvus", # with pandas, numpy, ujson
@@ -73,6 +73,7 @@ all = [
73
73
  "clickhouse-connect",
74
74
  "pyvespa",
75
75
  "lancedb",
76
+ "mysql-connector-python",
76
77
  ]
77
78
 
78
79
  qdrant = [ "qdrant-client" ]
@@ -96,6 +97,7 @@ tidb = [ "PyMySQL" ]
96
97
  clickhouse = [ "clickhouse-connect" ]
97
98
  vespa = [ "pyvespa" ]
98
99
  lancedb = [ "lancedb" ]
100
+ oceanbase = [ "mysql-connector-python" ]
99
101
 
100
102
  [project.urls]
101
103
  "repository" = "https://github.com/zilliztech/VectorDBBench"
@@ -18,37 +18,16 @@ class config:
18
18
  DEFAULT_DATASET_URL = env.str("DEFAULT_DATASET_URL", AWS_S3_URL)
19
19
  DATASET_LOCAL_DIR = env.path("DATASET_LOCAL_DIR", "/tmp/vectordb_bench/dataset")
20
20
  NUM_PER_BATCH = env.int("NUM_PER_BATCH", 100)
21
+ TIME_PER_BATCH = 1 # 1s. for streaming insertion.
22
+ MAX_INSERT_RETRY = 5
23
+ MAX_SEARCH_RETRY = 5
24
+
25
+ LOAD_MAX_TRY_COUNT = 10
21
26
 
22
27
  DROP_OLD = env.bool("DROP_OLD", True)
23
28
  USE_SHUFFLED_DATA = env.bool("USE_SHUFFLED_DATA", True)
24
29
 
25
- NUM_CONCURRENCY = env.list(
26
- "NUM_CONCURRENCY",
27
- [
28
- 1,
29
- 5,
30
- 10,
31
- 15,
32
- 20,
33
- 25,
34
- 30,
35
- 35,
36
- 40,
37
- 45,
38
- 50,
39
- 55,
40
- 60,
41
- 65,
42
- 70,
43
- 75,
44
- 80,
45
- 85,
46
- 90,
47
- 95,
48
- 100,
49
- ],
50
- subcast=int,
51
- )
30
+ NUM_CONCURRENCY = env.list("NUM_CONCURRENCY", [1, 5, 10, 20, 30, 40, 60, 80], subcast=int)
52
31
 
53
32
  CONCURRENCY_DURATION = 30
54
33
 
@@ -68,6 +47,7 @@ class config:
68
47
 
69
48
  CAPACITY_TIMEOUT_IN_SECONDS = 24 * 3600 # 24h
70
49
  LOAD_TIMEOUT_DEFAULT = 24 * 3600 # 24h
50
+ LOAD_TIMEOUT_768D_100K = 24 * 3600 # 24h
71
51
  LOAD_TIMEOUT_768D_1M = 24 * 3600 # 24h
72
52
  LOAD_TIMEOUT_768D_10M = 240 * 3600 # 10d
73
53
  LOAD_TIMEOUT_768D_100M = 2400 * 3600 # 100d
@@ -75,7 +55,11 @@ class config:
75
55
  LOAD_TIMEOUT_1536D_500K = 24 * 3600 # 24h
76
56
  LOAD_TIMEOUT_1536D_5M = 240 * 3600 # 10d
77
57
 
58
+ LOAD_TIMEOUT_1024D_1M = 24 * 3600 # 24h
59
+ LOAD_TIMEOUT_1024D_10M = 240 * 3600 # 10d
60
+
78
61
  OPTIMIZE_TIMEOUT_DEFAULT = 24 * 3600 # 24h
62
+ OPTIMIZE_TIMEOUT_768D_100K = 24 * 3600 # 24h
79
63
  OPTIMIZE_TIMEOUT_768D_1M = 24 * 3600 # 24h
80
64
  OPTIMIZE_TIMEOUT_768D_10M = 240 * 3600 # 10d
81
65
  OPTIMIZE_TIMEOUT_768D_100M = 2400 * 3600 # 100d
@@ -83,6 +67,9 @@ class config:
83
67
  OPTIMIZE_TIMEOUT_1536D_500K = 24 * 3600 # 24h
84
68
  OPTIMIZE_TIMEOUT_1536D_5M = 240 * 3600 # 10d
85
69
 
70
+ OPTIMIZE_TIMEOUT_1024D_1M = 24 * 3600 # 24h
71
+ OPTIMIZE_TIMEOUT_1024D_10M = 240 * 3600 # 10d
72
+
86
73
  def display(self) -> str:
87
74
  return [
88
75
  i
@@ -1,7 +1,8 @@
1
1
  import logging
2
2
 
3
- from vectordb_bench.backend.clients import EmptyDBCaseConfig
3
+ from vectordb_bench.backend.clients import DB, EmptyDBCaseConfig
4
4
  from vectordb_bench.backend.data_source import DatasetSource
5
+ from vectordb_bench.backend.filter import FilterOp
5
6
  from vectordb_bench.models import TaskConfig
6
7
 
7
8
  from .cases import CaseLabel
@@ -10,6 +11,13 @@ from .task_runner import CaseRunner, RunningStatus, TaskRunner
10
11
  log = logging.getLogger(__name__)
11
12
 
12
13
 
14
+ class FilterNotSupportedError(ValueError):
15
+ """Raised when a filter type is not supported by a vector database."""
16
+
17
+ def __init__(self, db_name: str, filter_type: FilterOp):
18
+ super().__init__(f"{filter_type} Filter test is not supported by {db_name}.")
19
+
20
+
13
21
  class Assembler:
14
22
  @classmethod
15
23
  def assemble(cls, run_id: str, task: TaskConfig, source: DatasetSource) -> CaseRunner:
@@ -39,25 +47,30 @@ class Assembler:
39
47
  runners = [cls.assemble(run_id, task, source) for task in tasks]
40
48
  load_runners = [r for r in runners if r.ca.label == CaseLabel.Load]
41
49
  perf_runners = [r for r in runners if r.ca.label == CaseLabel.Performance]
50
+ streaming_runners = [r for r in runners if r.ca.label == CaseLabel.Streaming]
42
51
 
43
52
  # group by db
44
- db2runner = {}
53
+ db2runner: dict[DB, list[CaseRunner]] = {}
45
54
  for r in perf_runners:
46
55
  db = r.config.db
47
56
  if db not in db2runner:
48
57
  db2runner[db] = []
49
58
  db2runner[db].append(r)
50
59
 
51
- # check dbclient installed
52
- for k in db2runner:
53
- _ = k.init_cls
60
+ # check
61
+ for db, runners in db2runner.items():
62
+ db_instance = db.init_cls
63
+ for runner in runners:
64
+ if not db_instance.filter_supported(runner.ca.filters):
65
+ raise FilterNotSupportedError(db.value, runner.ca.filters.type)
54
66
 
55
67
  # sort by dataset size
56
68
  for _, runner in db2runner.items():
57
- runner.sort(key=lambda x: x.ca.dataset.data.size)
69
+ runner.sort(key=lambda x: (x.ca.dataset.data.size, 0 if x.ca.filters.type == FilterOp.StrEqual else 1))
58
70
 
59
71
  all_runners = []
60
72
  all_runners.extend(load_runners)
73
+ all_runners.extend(streaming_runners)
61
74
  for v in db2runner.values():
62
75
  all_runners.extend(v)
63
76