vectordb-bench 0.0.29__tar.gz → 1.0.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/.github/workflows/pull_request.yml +1 -0
- {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/PKG-INFO +131 -32
- {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/README.md +126 -30
- vectordb_bench-1.0.0/fig/homepage/bar-chart.png +0 -0
- vectordb_bench-1.0.0/fig/homepage/concurrent.png +0 -0
- vectordb_bench-1.0.0/fig/homepage/custom.png +0 -0
- vectordb_bench-1.0.0/fig/homepage/label_filter.png +0 -0
- vectordb_bench-1.0.0/fig/homepage/qp$.png +0 -0
- vectordb_bench-1.0.0/fig/homepage/run_test.png +0 -0
- vectordb_bench-1.0.0/fig/homepage/streaming.png +0 -0
- vectordb_bench-1.0.0/fig/homepage/table.png +0 -0
- vectordb_bench-1.0.0/fig/run_test_select_case.png +0 -0
- vectordb_bench-1.0.0/fig/run_test_select_db.png +0 -0
- vectordb_bench-1.0.0/fig/run_test_submit.png +0 -0
- {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/install/requirements_py3.11.txt +1 -0
- {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/pyproject.toml +3 -1
- {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/__init__.py +14 -27
- {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/backend/assembler.py +19 -6
- {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/backend/cases.py +186 -23
- {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/backend/clients/__init__.py +32 -0
- {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/backend/clients/api.py +22 -1
- vectordb_bench-1.0.0/vectordb_bench/backend/clients/aws_opensearch/aws_opensearch.py +451 -0
- {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/backend/clients/aws_opensearch/cli.py +51 -21
- vectordb_bench-1.0.0/vectordb_bench/backend/clients/aws_opensearch/config.py +120 -0
- {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/backend/clients/chroma/chroma.py +6 -2
- {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/backend/clients/elastic_cloud/config.py +19 -1
- vectordb_bench-1.0.0/vectordb_bench/backend/clients/elastic_cloud/elastic_cloud.py +248 -0
- vectordb_bench-1.0.0/vectordb_bench/backend/clients/lancedb/cli.py +146 -0
- {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/backend/clients/lancedb/config.py +14 -1
- {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/backend/clients/lancedb/lancedb.py +21 -9
- {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/backend/clients/memorydb/memorydb.py +2 -2
- {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/backend/clients/milvus/cli.py +30 -9
- {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/backend/clients/milvus/config.py +3 -0
- {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/backend/clients/milvus/milvus.py +81 -23
- vectordb_bench-1.0.0/vectordb_bench/backend/clients/oceanbase/cli.py +100 -0
- vectordb_bench-1.0.0/vectordb_bench/backend/clients/oceanbase/config.py +125 -0
- vectordb_bench-1.0.0/vectordb_bench/backend/clients/oceanbase/oceanbase.py +215 -0
- {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/backend/clients/pinecone/pinecone.py +39 -25
- vectordb_bench-1.0.0/vectordb_bench/backend/clients/qdrant_cloud/config.py +96 -0
- {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/backend/clients/qdrant_cloud/qdrant_cloud.py +100 -33
- vectordb_bench-1.0.0/vectordb_bench/backend/clients/qdrant_local/cli.py +60 -0
- vectordb_bench-1.0.0/vectordb_bench/backend/clients/qdrant_local/config.py +47 -0
- vectordb_bench-1.0.0/vectordb_bench/backend/clients/qdrant_local/qdrant_local.py +232 -0
- vectordb_bench-1.0.0/vectordb_bench/backend/clients/weaviate_cloud/cli.py +66 -0
- {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/backend/clients/weaviate_cloud/config.py +2 -0
- {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/backend/clients/weaviate_cloud/weaviate_cloud.py +5 -0
- {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/backend/dataset.py +143 -27
- vectordb_bench-1.0.0/vectordb_bench/backend/filter.py +76 -0
- {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/backend/runner/__init__.py +3 -3
- {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/backend/runner/mp_runner.py +52 -39
- vectordb_bench-1.0.0/vectordb_bench/backend/runner/rate_runner.py +123 -0
- vectordb_bench-1.0.0/vectordb_bench/backend/runner/read_write_runner.py +259 -0
- {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/backend/runner/serial_runner.py +56 -23
- {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/backend/task_runner.py +48 -20
- vectordb_bench-1.0.0/vectordb_bench/cli/batch_cli.py +121 -0
- {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/cli/cli.py +59 -1
- {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/cli/vectordbbench.py +7 -0
- vectordb_bench-1.0.0/vectordb_bench/config-files/batch_sample_config.yml +17 -0
- {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/frontend/components/check_results/data.py +16 -11
- {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/frontend/components/check_results/filters.py +53 -25
- vectordb_bench-1.0.0/vectordb_bench/frontend/components/check_results/headerIcon.py +25 -0
- vectordb_bench-1.0.0/vectordb_bench/frontend/components/check_results/nav.py +42 -0
- vectordb_bench-1.0.0/vectordb_bench/frontend/components/custom/displayCustomCase.py +72 -0
- vectordb_bench-1.0.0/vectordb_bench/frontend/components/custom/displaypPrams.py +29 -0
- {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/frontend/components/custom/getCustomConfig.py +10 -0
- vectordb_bench-1.0.0/vectordb_bench/frontend/components/label_filter/charts.py +60 -0
- vectordb_bench-1.0.0/vectordb_bench/frontend/components/run_test/caseSelector.py +111 -0
- {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/frontend/components/run_test/dbSelector.py +9 -5
- vectordb_bench-1.0.0/vectordb_bench/frontend/components/run_test/inputWidget.py +48 -0
- {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/frontend/components/run_test/submitTask.py +3 -1
- vectordb_bench-1.0.0/vectordb_bench/frontend/components/streaming/charts.py +253 -0
- vectordb_bench-1.0.0/vectordb_bench/frontend/components/streaming/data.py +62 -0
- {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/frontend/components/tables/data.py +1 -1
- vectordb_bench-1.0.0/vectordb_bench/frontend/components/welcome/explainPrams.py +66 -0
- vectordb_bench-1.0.0/vectordb_bench/frontend/components/welcome/pagestyle.py +106 -0
- vectordb_bench-1.0.0/vectordb_bench/frontend/components/welcome/welcomePrams.py +147 -0
- {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/frontend/config/dbCaseConfigs.py +420 -41
- vectordb_bench-1.0.0/vectordb_bench/frontend/config/styles.py +99 -0
- {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/frontend/pages/concurrent.py +5 -1
- {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/frontend/pages/custom.py +4 -0
- vectordb_bench-1.0.0/vectordb_bench/frontend/pages/label_filter.py +56 -0
- {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/frontend/pages/quries_per_dollar.py +5 -1
- vectordb_bench-0.0.29/vectordb_bench/frontend/vdb_benchmark.py → vectordb_bench-1.0.0/vectordb_bench/frontend/pages/results.py +5 -1
- {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/frontend/pages/run_test.py +3 -3
- vectordb_bench-1.0.0/vectordb_bench/frontend/pages/streaming.py +135 -0
- {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/frontend/pages/tables.py +4 -0
- vectordb_bench-1.0.0/vectordb_bench/frontend/vdb_benchmark.py +31 -0
- {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/interface.py +6 -2
- {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/metric.py +15 -1
- {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/models.py +38 -11
- vectordb_bench-1.0.0/vectordb_bench/results/ElasticCloud/result_20250318_standard_elasticcloud.json +5890 -0
- vectordb_bench-1.0.0/vectordb_bench/results/Milvus/result_20250509_standard_milvus.json +6138 -0
- vectordb_bench-1.0.0/vectordb_bench/results/OpenSearch/result_20250224_standard_opensearch.json +7319 -0
- vectordb_bench-1.0.0/vectordb_bench/results/Pinecone/result_20250124_standard_pinecone.json +2365 -0
- vectordb_bench-1.0.0/vectordb_bench/results/QdrantCloud/result_20250602_standard_qdrantcloud.json +3556 -0
- vectordb_bench-1.0.0/vectordb_bench/results/ZillizCloud/result_20250613_standard_zillizcloud.json +6290 -0
- {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/results/dbPrices.json +12 -4
- {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench.egg-info/PKG-INFO +131 -32
- {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench.egg-info/SOURCES.txt +36 -3
- {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench.egg-info/requires.txt +5 -1
- vectordb_bench-0.0.29/vectordb_bench/backend/clients/aws_opensearch/aws_opensearch.py +0 -245
- vectordb_bench-0.0.29/vectordb_bench/backend/clients/aws_opensearch/config.py +0 -78
- vectordb_bench-0.0.29/vectordb_bench/backend/clients/elastic_cloud/elastic_cloud.py +0 -160
- vectordb_bench-0.0.29/vectordb_bench/backend/clients/lancedb/cli.py +0 -92
- vectordb_bench-0.0.29/vectordb_bench/backend/clients/qdrant_cloud/config.py +0 -40
- vectordb_bench-0.0.29/vectordb_bench/backend/clients/weaviate_cloud/cli.py +0 -40
- vectordb_bench-0.0.29/vectordb_bench/backend/runner/rate_runner.py +0 -107
- vectordb_bench-0.0.29/vectordb_bench/backend/runner/read_write_runner.py +0 -202
- vectordb_bench-0.0.29/vectordb_bench/frontend/components/check_results/headerIcon.py +0 -22
- vectordb_bench-0.0.29/vectordb_bench/frontend/components/check_results/nav.py +0 -22
- vectordb_bench-0.0.29/vectordb_bench/frontend/components/custom/displayCustomCase.py +0 -37
- vectordb_bench-0.0.29/vectordb_bench/frontend/components/custom/displaypPrams.py +0 -24
- vectordb_bench-0.0.29/vectordb_bench/frontend/components/run_test/caseSelector.py +0 -115
- vectordb_bench-0.0.29/vectordb_bench/frontend/config/styles.py +0 -69
- vectordb_bench-0.0.29/vectordb_bench/results/ZillizCloud/result_20230727_standard_zillizcloud.json +0 -791
- vectordb_bench-0.0.29/vectordb_bench/results/ZillizCloud/result_20230808_standard_zillizcloud.json +0 -679
- vectordb_bench-0.0.29/vectordb_bench/results/ZillizCloud/result_20240105_standard_202401_zillizcloud.json +0 -1352
- {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/.devcontainer/Dockerfile +0 -0
- {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/.devcontainer/devcontainer.json +0 -0
- {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/.env.example +0 -0
- {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/.github/workflows/publish_package_on_release.yml +0 -0
- {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/.gitignore +0 -0
- {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/Dockerfile +0 -0
- {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/LICENSE +0 -0
- {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/Makefile +0 -0
- {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/OWNERS +0 -0
- {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/fig/custom_case_run_test.png +0 -0
- {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/fig/custom_dataset.png +0 -0
- {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/install.py +0 -0
- {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/setup.cfg +0 -0
- {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/tests/conftest.py +0 -0
- {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/tests/pytest.ini +0 -0
- {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/tests/test_bench_runner.py +0 -0
- {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/tests/test_chroma.py +0 -0
- {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/tests/test_data_source.py +0 -0
- {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/tests/test_dataset.py +0 -0
- {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/tests/test_elasticsearch_cloud.py +0 -0
- {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/tests/test_models.py +0 -0
- {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/tests/test_rate_runner.py +0 -0
- {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/tests/test_redis.py +0 -0
- {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/tests/test_utils.py +0 -0
- {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/tests/ut_cases.py +0 -0
- {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/__main__.py +0 -0
- {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/backend/__init__.py +0 -0
- {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/backend/clients/aliyun_elasticsearch/aliyun_elasticsearch.py +0 -0
- {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/backend/clients/aliyun_elasticsearch/config.py +0 -0
- {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/backend/clients/aliyun_opensearch/aliyun_opensearch.py +0 -0
- {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/backend/clients/aliyun_opensearch/config.py +0 -0
- {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/backend/clients/alloydb/alloydb.py +0 -0
- {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/backend/clients/alloydb/cli.py +0 -0
- {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/backend/clients/alloydb/config.py +0 -0
- {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/backend/clients/aws_opensearch/run.py +0 -0
- {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/backend/clients/chroma/config.py +0 -0
- {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/backend/clients/clickhouse/cli.py +0 -0
- {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/backend/clients/clickhouse/clickhouse.py +0 -0
- {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/backend/clients/clickhouse/config.py +0 -0
- {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/backend/clients/mariadb/cli.py +0 -0
- {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/backend/clients/mariadb/config.py +0 -0
- {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/backend/clients/mariadb/mariadb.py +0 -0
- {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/backend/clients/memorydb/cli.py +0 -0
- {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/backend/clients/memorydb/config.py +0 -0
- {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/backend/clients/mongodb/config.py +0 -0
- {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/backend/clients/mongodb/mongodb.py +0 -0
- {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/backend/clients/pgdiskann/cli.py +0 -0
- {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/backend/clients/pgdiskann/config.py +0 -0
- {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/backend/clients/pgdiskann/pgdiskann.py +0 -0
- {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/backend/clients/pgvecto_rs/cli.py +0 -0
- {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/backend/clients/pgvecto_rs/config.py +0 -0
- {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/backend/clients/pgvecto_rs/pgvecto_rs.py +0 -0
- {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/backend/clients/pgvector/cli.py +0 -0
- {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/backend/clients/pgvector/config.py +0 -0
- {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/backend/clients/pgvector/pgvector.py +0 -0
- {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/backend/clients/pgvectorscale/cli.py +0 -0
- {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/backend/clients/pgvectorscale/config.py +0 -0
- {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/backend/clients/pgvectorscale/pgvectorscale.py +0 -0
- {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/backend/clients/pinecone/config.py +0 -0
- {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/backend/clients/qdrant_cloud/cli.py +0 -0
- {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/backend/clients/redis/cli.py +0 -0
- {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/backend/clients/redis/config.py +0 -0
- {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/backend/clients/redis/redis.py +0 -0
- {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/backend/clients/test/cli.py +0 -0
- {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/backend/clients/test/config.py +0 -0
- {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/backend/clients/test/test.py +0 -0
- {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/backend/clients/tidb/cli.py +0 -0
- {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/backend/clients/tidb/config.py +0 -0
- {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/backend/clients/tidb/tidb.py +0 -0
- {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/backend/clients/vespa/cli.py +0 -0
- {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/backend/clients/vespa/config.py +0 -0
- {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/backend/clients/vespa/util.py +0 -0
- {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/backend/clients/vespa/vespa.py +0 -0
- {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/backend/clients/zilliz_cloud/cli.py +0 -0
- {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/backend/clients/zilliz_cloud/config.py +0 -0
- {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/backend/clients/zilliz_cloud/zilliz_cloud.py +0 -0
- {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/backend/data_source.py +0 -0
- {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/backend/result_collector.py +0 -0
- {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/backend/runner/util.py +0 -0
- {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/backend/utils.py +0 -0
- {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/base.py +0 -0
- {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/cli/__init__.py +0 -0
- {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/config-files/sample_config.yml +0 -0
- {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/custom/custom_case.json +0 -0
- {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/frontend/components/check_results/charts.py +0 -0
- {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/frontend/components/check_results/expanderStyle.py +0 -0
- {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/frontend/components/check_results/footer.py +0 -0
- {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/frontend/components/check_results/priceTable.py +0 -0
- {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/frontend/components/check_results/stPageConfig.py +0 -0
- {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/frontend/components/concurrent/charts.py +0 -0
- {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/frontend/components/custom/initStyle.py +0 -0
- {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/frontend/components/get_results/saveAsImage.py +0 -0
- {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/frontend/components/run_test/autoRefresh.py +0 -0
- {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/frontend/components/run_test/dbConfigSetting.py +0 -0
- {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/frontend/components/run_test/generateTasks.py +0 -0
- {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/frontend/components/run_test/hideSidebar.py +0 -0
- {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/frontend/components/run_test/initStyle.py +0 -0
- {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/frontend/config/dbPrices.py +0 -0
- {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/frontend/utils.py +0 -0
- {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/log_util.py +0 -0
- {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/results/ElasticCloud/result_20230727_standard_elasticcloud.json +0 -0
- {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/results/ElasticCloud/result_20230808_standard_elasticcloud.json +0 -0
- {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/results/Milvus/result_20230727_standard_milvus.json +0 -0
- {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/results/Milvus/result_20230808_standard_milvus.json +0 -0
- {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/results/PgVector/result_20230727_standard_pgvector.json +0 -0
- {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/results/PgVector/result_20230808_standard_pgvector.json +0 -0
- {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/results/Pinecone/result_20230727_standard_pinecone.json +0 -0
- {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/results/Pinecone/result_20230808_standard_pinecone.json +0 -0
- {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/results/QdrantCloud/result_20230727_standard_qdrantcloud.json +0 -0
- {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/results/QdrantCloud/result_20230808_standard_qdrantcloud.json +0 -0
- {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/results/WeaviateCloud/result_20230727_standard_weaviatecloud.json +0 -0
- {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/results/WeaviateCloud/result_20230808_standard_weaviatecloud.json +0 -0
- {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/results/getLeaderboardData.py +0 -0
- {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench/results/leaderboard.json +0 -0
- {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench.egg-info/dependency_links.txt +0 -0
- {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench.egg-info/entry_points.txt +0 -0
- {vectordb_bench-0.0.29 → vectordb_bench-1.0.0}/vectordb_bench.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: vectordb-bench
|
3
|
-
Version: 0.0
|
3
|
+
Version: 1.0.0
|
4
4
|
Summary: VectorDBBench is not just an offering of benchmark results for mainstream vector databases and cloud services, it's your go-to tool for the ultimate performance and cost-effectiveness comparison. Designed with ease-of-use in mind, VectorDBBench is devised to help users, even non-professionals, reproduce results or test new systems, making the hunt for the optimal choice amongst a plethora of cloud services and open-source vector databases a breeze.
|
5
5
|
Author-email: XuanYang-cn <xuan.yang@zilliz.com>
|
6
6
|
Project-URL: repository, https://github.com/zilliztech/VectorDBBench
|
@@ -21,7 +21,7 @@ Requires-Dist: oss2
|
|
21
21
|
Requires-Dist: psutil
|
22
22
|
Requires-Dist: polars
|
23
23
|
Requires-Dist: plotly
|
24
|
-
Requires-Dist: environs
|
24
|
+
Requires-Dist: environs
|
25
25
|
Requires-Dist: pydantic<v2
|
26
26
|
Requires-Dist: scikit-learn
|
27
27
|
Requires-Dist: pymilvus
|
@@ -53,6 +53,7 @@ Requires-Dist: PyMySQL; extra == "all"
|
|
53
53
|
Requires-Dist: clickhouse-connect; extra == "all"
|
54
54
|
Requires-Dist: pyvespa; extra == "all"
|
55
55
|
Requires-Dist: lancedb; extra == "all"
|
56
|
+
Requires-Dist: mysql-connector-python; extra == "all"
|
56
57
|
Provides-Extra: qdrant
|
57
58
|
Requires-Dist: qdrant-client; extra == "qdrant"
|
58
59
|
Provides-Extra: pinecone
|
@@ -90,6 +91,8 @@ Provides-Extra: vespa
|
|
90
91
|
Requires-Dist: pyvespa; extra == "vespa"
|
91
92
|
Provides-Extra: lancedb
|
92
93
|
Requires-Dist: lancedb; extra == "lancedb"
|
94
|
+
Provides-Extra: oceanbase
|
95
|
+
Requires-Dist: mysql-connector-python; extra == "oceanbase"
|
93
96
|
Dynamic: license-file
|
94
97
|
|
95
98
|
# VectorDBBench(VDBBench): A Benchmark Tool for VectorDB
|
@@ -151,6 +154,7 @@ All the database client supported
|
|
151
154
|
| mongodb | `pip install vectordb-bench[mongodb]` |
|
152
155
|
| tidb | `pip install vectordb-bench[tidb]` |
|
153
156
|
| vespa | `pip install vectordb-bench[vespa]` |
|
157
|
+
| oceanbase | `pip install vectordb-bench[oceanbase]` |
|
154
158
|
|
155
159
|
### Run
|
156
160
|
|
@@ -295,12 +299,81 @@ Options:
|
|
295
299
|
--force-merge-enabled BOOLEAN Whether to perform force merge operation
|
296
300
|
--flush-threshold-size TEXT Size threshold for flushing the transaction
|
297
301
|
log
|
302
|
+
--engine TEXT type of engine to use valid values [faiss, lucene]
|
298
303
|
# Memory Management
|
299
304
|
--cb-threshold TEXT k-NN Memory circuit breaker threshold
|
305
|
+
|
306
|
+
# Quantization Type
|
307
|
+
--quantization-type TEXT which type of quantization to use valid values [fp32, fp16]
|
308
|
+
--help Show this message and exit.
|
309
|
+
```
|
310
|
+
### Run OceanBase from command line
|
311
|
+
|
312
|
+
Execute tests for the index types: HNSW, HNSW_SQ, or HNSW_BQ.
|
313
|
+
|
314
|
+
```shell
|
315
|
+
vectordbbench oceanbasehnsw --host xxx --port xxx --user root@mysql_tenant --database test \
|
316
|
+
--m 16 --ef-construction 200 --case-type Performance1536D50K \
|
317
|
+
--index-type HNSW --ef-search 100
|
318
|
+
```
|
300
319
|
|
320
|
+
To list the options for oceanbase, execute `vectordbbench oceanbasehnsw --help`, The following are some OceanBase-specific command-line options.
|
321
|
+
|
322
|
+
```text
|
323
|
+
$ vectordbbench oceanbasehnsw --help
|
324
|
+
Usage: vectordbbench oceanbasehnsw [OPTIONS]
|
325
|
+
|
326
|
+
Options:
|
327
|
+
[...]
|
328
|
+
--host TEXT OceanBase host
|
329
|
+
--user TEXT OceanBase username [required]
|
330
|
+
--password TEXT OceanBase database password
|
331
|
+
--database TEXT DataBase name [required]
|
332
|
+
--port INTEGER OceanBase port [required]
|
333
|
+
--m INTEGER hnsw m [required]
|
334
|
+
--ef-construction INTEGER hnsw ef-construction [required]
|
335
|
+
--ef-search INTEGER hnsw ef-search [required]
|
336
|
+
--index-type [HNSW|HNSW_SQ|HNSW_BQ]
|
337
|
+
Type of index to use. Supported values:
|
338
|
+
HNSW, HNSW_SQ, HNSW_BQ [required]
|
301
339
|
--help Show this message and exit.
|
302
340
|
```
|
303
341
|
|
342
|
+
Execute tests for the index types: IVF_FLAT, IVF_SQ8, or IVF_PQ.
|
343
|
+
|
344
|
+
```shell
|
345
|
+
vectordbbench oceanbaseivf --host xxx --port xxx --user root@mysql_tenant --database test \
|
346
|
+
--nlist 1000 --sample_per_nlist 256 --case-type Performance768D1M \
|
347
|
+
--index-type IVF_FLAT --ivf_nprobes 100
|
348
|
+
```
|
349
|
+
|
350
|
+
To list the options for oceanbase, execute `vectordbbench oceanbaseivf --help`, The following are some OceanBase-specific command-line options.
|
351
|
+
|
352
|
+
```text
|
353
|
+
$ vectordbbench oceanbaseivf --help
|
354
|
+
Usage: vectordbbench oceanbaseivf [OPTIONS]
|
355
|
+
|
356
|
+
Options:
|
357
|
+
[...]
|
358
|
+
--host TEXT OceanBase host
|
359
|
+
--user TEXT OceanBase username [required]
|
360
|
+
--password TEXT OceanBase database password
|
361
|
+
--database TEXT DataBase name [required]
|
362
|
+
--port INTEGER OceanBase port [required]
|
363
|
+
--index-type [IVF_FLAT|IVF_SQ8|IVF_PQ]
|
364
|
+
Type of index to use. Supported values:
|
365
|
+
IVF_FLAT, IVF_SQ8, IVF_PQ [required]
|
366
|
+
--nlist INTEGER Number of cluster centers [required]
|
367
|
+
--sample_per_nlist INTEGER The cluster centers are calculated by total
|
368
|
+
sampling sample_per_nlist * nlist vectors
|
369
|
+
[required]
|
370
|
+
--ivf_nprobes TEXT How many clustering centers to search during
|
371
|
+
the query [required]
|
372
|
+
--m INTEGER The number of sub-vectors that each data
|
373
|
+
vector is divided into during IVF-PQ
|
374
|
+
--help Show this message and exit. Show this message and exit.
|
375
|
+
```
|
376
|
+
|
304
377
|
#### Using a configuration file.
|
305
378
|
|
306
379
|
The vectordbbench command can optionally read some or all the options from a yaml formatted configuration file.
|
@@ -339,6 +412,49 @@ milvushnsw:
|
|
339
412
|
> - Options passed on the command line will override the configuration file*
|
340
413
|
> - Parameter names use an _ not -
|
341
414
|
|
415
|
+
#### Using a batch configuration file.
|
416
|
+
|
417
|
+
The vectordbbench command can read a batch configuration file to run all the test cases in the yaml formatted configuration file.
|
418
|
+
|
419
|
+
By default, configuration files are expected to be in vectordb_bench/config-files/, this can be overridden by setting
|
420
|
+
the environment variable CONFIG_LOCAL_DIR or by passing the full path to the file.
|
421
|
+
|
422
|
+
The required format is:
|
423
|
+
```yaml
|
424
|
+
commandname:
|
425
|
+
- parameter_name: parameter_value
|
426
|
+
another_parameter_name: parameter_value
|
427
|
+
```
|
428
|
+
Example:
|
429
|
+
```yaml
|
430
|
+
pgvectorhnsw:
|
431
|
+
- db_label: pgConfigTest
|
432
|
+
user_name: vectordbbench
|
433
|
+
password: vectordbbench
|
434
|
+
db_name: vectordbbench
|
435
|
+
host: localhost
|
436
|
+
m: 16
|
437
|
+
ef_construction: 128
|
438
|
+
ef_search: 128
|
439
|
+
milvushnsw:
|
440
|
+
- skip_search_serial: True
|
441
|
+
case_type: Performance1536D50K
|
442
|
+
uri: http://localhost:19530
|
443
|
+
m: 16
|
444
|
+
ef_construction: 128
|
445
|
+
ef_search: 128
|
446
|
+
drop_old: False
|
447
|
+
load: False
|
448
|
+
```
|
449
|
+
> Notes:
|
450
|
+
> - Options can only be passed through configuration files
|
451
|
+
> - Parameter names use an _ not -
|
452
|
+
|
453
|
+
How to use?
|
454
|
+
```shell
|
455
|
+
vectordbbench batchcli --batch-config-file <your-yaml-configuration-file>
|
456
|
+
```
|
457
|
+
|
342
458
|
## Leaderboard
|
343
459
|
### Introduction
|
344
460
|
To facilitate the presentation of test results and provide a comprehensive performance analysis report, we offer a [leaderboard page](https://zilliz.com/benchmark). It allows us to choose from QPS, QP$, and latency metrics, and provides a comprehensive assessment of a system's performance based on the test results of various cases and a set of scoring mechanisms (to be introduced later). On this leaderboard, we can select the systems and models to be compared, and filter out cases we do not want to consider. Comprehensive scores are always ranked from best to worst, and the specific test results of each query will be presented in the list below.
|
@@ -407,52 +523,35 @@ The standard benchmark results displayed here include all 15 cases that we curre
|
|
407
523
|
|
408
524
|
All standard benchmark results are generated by a client running on an 8 core, 32 GB host, which is located in the same region as the server being tested. The client host is equipped with an `Intel(R) Xeon(R) Platinum 8375C CPU @ 2.90GHz` processor. Also all the servers for the open-source systems tested in our benchmarks run on hosts with the same type of processor.
|
409
525
|
### Run Test Page
|
410
|
-

|
411
|
-
This is the page to run a test:
|
412
526
|
1. Initially, you select the systems to be tested - multiple selections are allowed. Once selected, corresponding forms will pop up to gather necessary information for using the chosen databases. The db_label is used to differentiate different instances of the same system. We recommend filling in the host size or instance type here (as we do in our standard results).
|
413
527
|
2. The next step is to select the test cases you want to perform. You can select multiple cases at once, and a form to collect corresponding parameters will appear.
|
414
528
|
3. Finally, you'll need to provide a task label to distinguish different test results. Using the same label for different tests will result in the previous results being overwritten.
|
415
529
|
Now we can only run one task at the same time.
|
530
|
+

|
531
|
+

|
532
|
+

|
533
|
+
|
416
534
|
|
417
535
|
## Module
|
418
536
|
### Code Structure
|
419
537
|

|
420
538
|
### Client
|
421
|
-
Our client module is designed with flexibility and extensibility in mind, aiming to integrate APIs from different systems seamlessly. As of now, it supports Milvus, Zilliz Cloud, Elastic Search, Pinecone, Qdrant Cloud, Weaviate Cloud, PgVector, Redis,
|
539
|
+
Our client module is designed with flexibility and extensibility in mind, aiming to integrate APIs from different systems seamlessly. As of now, it supports Milvus, Zilliz Cloud, Elastic Search, Pinecone, Qdrant Cloud, Weaviate Cloud, PgVector, Redis, Chroma, etc. Stay tuned for more options, as we are consistently working on extending our reach to other systems.
|
422
540
|
### Benchmark Cases
|
423
|
-
We've developed
|
541
|
+
We've developed lots of comprehensive benchmark cases to test vector databases' various capabilities, each designed to give you a different piece of the puzzle. These cases are categorized into four main types:
|
424
542
|
#### Capacity Case
|
425
543
|
- **Large Dim:** Tests the database's loading capacity by inserting large-dimension vectors (GIST 100K vectors, 960 dimensions) until fully loaded. The final number of inserted vectors is reported.
|
426
544
|
- **Small Dim:** Similar to the Large Dim case but uses small-dimension vectors (SIFT 500K vectors, 128 dimensions).
|
427
545
|
#### Search Performance Case
|
428
546
|
- **XLarge Dataset:** Measures search performance with a massive dataset (LAION 100M vectors, 768 dimensions) at varying parallel levels. The results include index building time, recall, latency, and maximum QPS.
|
429
|
-
- **Large Dataset:** Similar to the XLarge Dataset case, but uses a slightly smaller dataset (10M-768dim, 5M-1536dim).
|
430
|
-
- **Medium Dataset:** A case using a medium dataset (1M-768dim, 500K-1536dim).
|
547
|
+
- **Large Dataset:** Similar to the XLarge Dataset case, but uses a slightly smaller dataset (10M-1024dim, 10M-768dim, 5M-1536dim).
|
548
|
+
- **Medium Dataset:** A case using a medium dataset (1M-1024dim, 1M-768dim, 500K-1536dim).
|
549
|
+
- **Small Dataset:** For development (100K-768dim, 50K-1536dim).
|
431
550
|
#### Filtering Search Performance Case
|
432
|
-
- **
|
433
|
-
- **
|
434
|
-
|
435
|
-
- **
|
436
|
-
For a quick reference, here is a table summarizing the key aspects of each case:
|
437
|
-
|
438
|
-
Case No. | Case Type | Dataset Size | Filtering Rate | Results |
|
439
|
-
|----------|-----------|--------------|----------------|---------|
|
440
|
-
1 | Capacity Case | SIFT 500K vectors, 128 dimensions | N/A | Number of inserted vectors |
|
441
|
-
2 | Capacity Case | GIST 100K vectors, 960 dimensions | N/A | Number of inserted vectors |
|
442
|
-
3 | Search Performance Case | LAION 100M vectors, 768 dimensions | N/A | Index building time, recall, latency, maximum QPS |
|
443
|
-
4 | Search Performance Case | Cohere 10M vectors, 768 dimensions | N/A | Index building time, recall, latency, maximum QPS |
|
444
|
-
5 | Search Performance Case | Cohere 1M vectors, 768 dimensions | N/A | Index building time, recall, latency, maximum QPS |
|
445
|
-
6 | Filtering Search Performance Case | Cohere 10M vectors, 768 dimensions | 1% vectors | Index building time, recall, latency, maximum QPS |
|
446
|
-
7 | Filtering Search Performance Case | Cohere 1M vectors, 768 dimensions | 1% vectors | Index building time, recall, latency, maximum QPS |
|
447
|
-
8 | Filtering Search Performance Case | Cohere 10M vectors, 768 dimensions | 99% vectors | Index building time, recall, latency, maximum QPS |
|
448
|
-
9 | Filtering Search Performance Case | Cohere 1M vectors, 768 dimensions | 99% vectors | Index building time, recall, latency, maximum QPS |
|
449
|
-
10 | Search Performance Case | OpenAI generated 500K vectors, 1536 dimensions | N/A | Index building time, recall, latency, maximum QPS |
|
450
|
-
11 | Search Performance Case | OpenAI generated 5M vectors, 1536 dimensions | N/A | Index building time, recall, latency, maximum QPS |
|
451
|
-
12 | Filtering Search Performance Case | OpenAI generated 500K vectors, 1536 dimensions | 1% vectors | Index building time, recall, latency, maximum QPS |
|
452
|
-
13 | Filtering Search Performance Case | OpenAI generated 5M vectors, 1536 dimensions | 1% vectors | Index building time, recall, latency, maximum QPS |
|
453
|
-
14 | Filtering Search Performance Case | OpenAI generated 500K vectors, 1536 dimensions | 99% vectors | Index building time, recall, latency, maximum QPS |
|
454
|
-
15 | Filtering Search Performance Case | OpenAI generated 5M vectors, 1536 dimensions | 99% vectors | Index building time, recall, latency, maximum QPS |
|
455
|
-
|
551
|
+
- **Int-Filter Cases:** Evaluates search performance with int-based filter expression (e.g. "id >= 2,000").
|
552
|
+
- **Label-Filter Cases:** Evaluates search performance with label-based filter expressions (e.g., "color == 'red'"). The test includes randomly generated labels to simulate real-world filtering scenarios.
|
553
|
+
#### Streaming Cases
|
554
|
+
- **Insertion-Under-Load Case:** Evaluates search performance while maintaining a constant insertion workload. VectorDBBench applies a steady stream of insert requests at a fixed rate to simulate real-world scenarios where search operations must perform reliably under continuous data ingestion.
|
456
555
|
|
457
556
|
Each case provides an in-depth examination of a vector database's abilities, providing you a comprehensive view of the database's performance.
|
458
557
|
|
@@ -57,6 +57,7 @@ All the database client supported
|
|
57
57
|
| mongodb | `pip install vectordb-bench[mongodb]` |
|
58
58
|
| tidb | `pip install vectordb-bench[tidb]` |
|
59
59
|
| vespa | `pip install vectordb-bench[vespa]` |
|
60
|
+
| oceanbase | `pip install vectordb-bench[oceanbase]` |
|
60
61
|
|
61
62
|
### Run
|
62
63
|
|
@@ -201,12 +202,81 @@ Options:
|
|
201
202
|
--force-merge-enabled BOOLEAN Whether to perform force merge operation
|
202
203
|
--flush-threshold-size TEXT Size threshold for flushing the transaction
|
203
204
|
log
|
205
|
+
--engine TEXT type of engine to use valid values [faiss, lucene]
|
204
206
|
# Memory Management
|
205
207
|
--cb-threshold TEXT k-NN Memory circuit breaker threshold
|
208
|
+
|
209
|
+
# Quantization Type
|
210
|
+
--quantization-type TEXT which type of quantization to use valid values [fp32, fp16]
|
211
|
+
--help Show this message and exit.
|
212
|
+
```
|
213
|
+
### Run OceanBase from command line
|
214
|
+
|
215
|
+
Execute tests for the index types: HNSW, HNSW_SQ, or HNSW_BQ.
|
216
|
+
|
217
|
+
```shell
|
218
|
+
vectordbbench oceanbasehnsw --host xxx --port xxx --user root@mysql_tenant --database test \
|
219
|
+
--m 16 --ef-construction 200 --case-type Performance1536D50K \
|
220
|
+
--index-type HNSW --ef-search 100
|
221
|
+
```
|
206
222
|
|
223
|
+
To list the options for oceanbase, execute `vectordbbench oceanbasehnsw --help`, The following are some OceanBase-specific command-line options.
|
224
|
+
|
225
|
+
```text
|
226
|
+
$ vectordbbench oceanbasehnsw --help
|
227
|
+
Usage: vectordbbench oceanbasehnsw [OPTIONS]
|
228
|
+
|
229
|
+
Options:
|
230
|
+
[...]
|
231
|
+
--host TEXT OceanBase host
|
232
|
+
--user TEXT OceanBase username [required]
|
233
|
+
--password TEXT OceanBase database password
|
234
|
+
--database TEXT DataBase name [required]
|
235
|
+
--port INTEGER OceanBase port [required]
|
236
|
+
--m INTEGER hnsw m [required]
|
237
|
+
--ef-construction INTEGER hnsw ef-construction [required]
|
238
|
+
--ef-search INTEGER hnsw ef-search [required]
|
239
|
+
--index-type [HNSW|HNSW_SQ|HNSW_BQ]
|
240
|
+
Type of index to use. Supported values:
|
241
|
+
HNSW, HNSW_SQ, HNSW_BQ [required]
|
207
242
|
--help Show this message and exit.
|
208
243
|
```
|
209
244
|
|
245
|
+
Execute tests for the index types: IVF_FLAT, IVF_SQ8, or IVF_PQ.
|
246
|
+
|
247
|
+
```shell
|
248
|
+
vectordbbench oceanbaseivf --host xxx --port xxx --user root@mysql_tenant --database test \
|
249
|
+
--nlist 1000 --sample_per_nlist 256 --case-type Performance768D1M \
|
250
|
+
--index-type IVF_FLAT --ivf_nprobes 100
|
251
|
+
```
|
252
|
+
|
253
|
+
To list the options for oceanbase, execute `vectordbbench oceanbaseivf --help`, The following are some OceanBase-specific command-line options.
|
254
|
+
|
255
|
+
```text
|
256
|
+
$ vectordbbench oceanbaseivf --help
|
257
|
+
Usage: vectordbbench oceanbaseivf [OPTIONS]
|
258
|
+
|
259
|
+
Options:
|
260
|
+
[...]
|
261
|
+
--host TEXT OceanBase host
|
262
|
+
--user TEXT OceanBase username [required]
|
263
|
+
--password TEXT OceanBase database password
|
264
|
+
--database TEXT DataBase name [required]
|
265
|
+
--port INTEGER OceanBase port [required]
|
266
|
+
--index-type [IVF_FLAT|IVF_SQ8|IVF_PQ]
|
267
|
+
Type of index to use. Supported values:
|
268
|
+
IVF_FLAT, IVF_SQ8, IVF_PQ [required]
|
269
|
+
--nlist INTEGER Number of cluster centers [required]
|
270
|
+
--sample_per_nlist INTEGER The cluster centers are calculated by total
|
271
|
+
sampling sample_per_nlist * nlist vectors
|
272
|
+
[required]
|
273
|
+
--ivf_nprobes TEXT How many clustering centers to search during
|
274
|
+
the query [required]
|
275
|
+
--m INTEGER The number of sub-vectors that each data
|
276
|
+
vector is divided into during IVF-PQ
|
277
|
+
--help Show this message and exit. Show this message and exit.
|
278
|
+
```
|
279
|
+
|
210
280
|
#### Using a configuration file.
|
211
281
|
|
212
282
|
The vectordbbench command can optionally read some or all the options from a yaml formatted configuration file.
|
@@ -245,6 +315,49 @@ milvushnsw:
|
|
245
315
|
> - Options passed on the command line will override the configuration file*
|
246
316
|
> - Parameter names use an _ not -
|
247
317
|
|
318
|
+
#### Using a batch configuration file.
|
319
|
+
|
320
|
+
The vectordbbench command can read a batch configuration file to run all the test cases in the yaml formatted configuration file.
|
321
|
+
|
322
|
+
By default, configuration files are expected to be in vectordb_bench/config-files/, this can be overridden by setting
|
323
|
+
the environment variable CONFIG_LOCAL_DIR or by passing the full path to the file.
|
324
|
+
|
325
|
+
The required format is:
|
326
|
+
```yaml
|
327
|
+
commandname:
|
328
|
+
- parameter_name: parameter_value
|
329
|
+
another_parameter_name: parameter_value
|
330
|
+
```
|
331
|
+
Example:
|
332
|
+
```yaml
|
333
|
+
pgvectorhnsw:
|
334
|
+
- db_label: pgConfigTest
|
335
|
+
user_name: vectordbbench
|
336
|
+
password: vectordbbench
|
337
|
+
db_name: vectordbbench
|
338
|
+
host: localhost
|
339
|
+
m: 16
|
340
|
+
ef_construction: 128
|
341
|
+
ef_search: 128
|
342
|
+
milvushnsw:
|
343
|
+
- skip_search_serial: True
|
344
|
+
case_type: Performance1536D50K
|
345
|
+
uri: http://localhost:19530
|
346
|
+
m: 16
|
347
|
+
ef_construction: 128
|
348
|
+
ef_search: 128
|
349
|
+
drop_old: False
|
350
|
+
load: False
|
351
|
+
```
|
352
|
+
> Notes:
|
353
|
+
> - Options can only be passed through configuration files
|
354
|
+
> - Parameter names use an _ not -
|
355
|
+
|
356
|
+
How to use?
|
357
|
+
```shell
|
358
|
+
vectordbbench batchcli --batch-config-file <your-yaml-configuration-file>
|
359
|
+
```
|
360
|
+
|
248
361
|
## Leaderboard
|
249
362
|
### Introduction
|
250
363
|
To facilitate the presentation of test results and provide a comprehensive performance analysis report, we offer a [leaderboard page](https://zilliz.com/benchmark). It allows us to choose from QPS, QP$, and latency metrics, and provides a comprehensive assessment of a system's performance based on the test results of various cases and a set of scoring mechanisms (to be introduced later). On this leaderboard, we can select the systems and models to be compared, and filter out cases we do not want to consider. Comprehensive scores are always ranked from best to worst, and the specific test results of each query will be presented in the list below.
|
@@ -313,52 +426,35 @@ The standard benchmark results displayed here include all 15 cases that we curre
|
|
313
426
|
|
314
427
|
All standard benchmark results are generated by a client running on an 8 core, 32 GB host, which is located in the same region as the server being tested. The client host is equipped with an `Intel(R) Xeon(R) Platinum 8375C CPU @ 2.90GHz` processor. Also all the servers for the open-source systems tested in our benchmarks run on hosts with the same type of processor.
|
315
428
|
### Run Test Page
|
316
|
-

|
317
|
-
This is the page to run a test:
|
318
429
|
1. Initially, you select the systems to be tested - multiple selections are allowed. Once selected, corresponding forms will pop up to gather necessary information for using the chosen databases. The db_label is used to differentiate different instances of the same system. We recommend filling in the host size or instance type here (as we do in our standard results).
|
319
430
|
2. The next step is to select the test cases you want to perform. You can select multiple cases at once, and a form to collect corresponding parameters will appear.
|
320
431
|
3. Finally, you'll need to provide a task label to distinguish different test results. Using the same label for different tests will result in the previous results being overwritten.
|
321
432
|
Now we can only run one task at the same time.
|
433
|
+

|
434
|
+

|
435
|
+

|
436
|
+
|
322
437
|
|
323
438
|
## Module
|
324
439
|
### Code Structure
|
325
440
|

|
326
441
|
### Client
|
327
|
-
Our client module is designed with flexibility and extensibility in mind, aiming to integrate APIs from different systems seamlessly. As of now, it supports Milvus, Zilliz Cloud, Elastic Search, Pinecone, Qdrant Cloud, Weaviate Cloud, PgVector, Redis,
|
442
|
+
Our client module is designed with flexibility and extensibility in mind, aiming to integrate APIs from different systems seamlessly. As of now, it supports Milvus, Zilliz Cloud, Elastic Search, Pinecone, Qdrant Cloud, Weaviate Cloud, PgVector, Redis, Chroma, etc. Stay tuned for more options, as we are consistently working on extending our reach to other systems.
|
328
443
|
### Benchmark Cases
|
329
|
-
We've developed
|
444
|
+
We've developed lots of comprehensive benchmark cases to test vector databases' various capabilities, each designed to give you a different piece of the puzzle. These cases are categorized into four main types:
|
330
445
|
#### Capacity Case
|
331
446
|
- **Large Dim:** Tests the database's loading capacity by inserting large-dimension vectors (GIST 100K vectors, 960 dimensions) until fully loaded. The final number of inserted vectors is reported.
|
332
447
|
- **Small Dim:** Similar to the Large Dim case but uses small-dimension vectors (SIFT 500K vectors, 128 dimensions).
|
333
448
|
#### Search Performance Case
|
334
449
|
- **XLarge Dataset:** Measures search performance with a massive dataset (LAION 100M vectors, 768 dimensions) at varying parallel levels. The results include index building time, recall, latency, and maximum QPS.
|
335
|
-
- **Large Dataset:** Similar to the XLarge Dataset case, but uses a slightly smaller dataset (10M-768dim, 5M-1536dim).
|
336
|
-
- **Medium Dataset:** A case using a medium dataset (1M-768dim, 500K-1536dim).
|
450
|
+
- **Large Dataset:** Similar to the XLarge Dataset case, but uses a slightly smaller dataset (10M-1024dim, 10M-768dim, 5M-1536dim).
|
451
|
+
- **Medium Dataset:** A case using a medium dataset (1M-1024dim, 1M-768dim, 500K-1536dim).
|
452
|
+
- **Small Dataset:** For development (100K-768dim, 50K-1536dim).
|
337
453
|
#### Filtering Search Performance Case
|
338
|
-
- **
|
339
|
-
- **
|
340
|
-
|
341
|
-
- **
|
342
|
-
For a quick reference, here is a table summarizing the key aspects of each case:
|
343
|
-
|
344
|
-
Case No. | Case Type | Dataset Size | Filtering Rate | Results |
|
345
|
-
|----------|-----------|--------------|----------------|---------|
|
346
|
-
1 | Capacity Case | SIFT 500K vectors, 128 dimensions | N/A | Number of inserted vectors |
|
347
|
-
2 | Capacity Case | GIST 100K vectors, 960 dimensions | N/A | Number of inserted vectors |
|
348
|
-
3 | Search Performance Case | LAION 100M vectors, 768 dimensions | N/A | Index building time, recall, latency, maximum QPS |
|
349
|
-
4 | Search Performance Case | Cohere 10M vectors, 768 dimensions | N/A | Index building time, recall, latency, maximum QPS |
|
350
|
-
5 | Search Performance Case | Cohere 1M vectors, 768 dimensions | N/A | Index building time, recall, latency, maximum QPS |
|
351
|
-
6 | Filtering Search Performance Case | Cohere 10M vectors, 768 dimensions | 1% vectors | Index building time, recall, latency, maximum QPS |
|
352
|
-
7 | Filtering Search Performance Case | Cohere 1M vectors, 768 dimensions | 1% vectors | Index building time, recall, latency, maximum QPS |
|
353
|
-
8 | Filtering Search Performance Case | Cohere 10M vectors, 768 dimensions | 99% vectors | Index building time, recall, latency, maximum QPS |
|
354
|
-
9 | Filtering Search Performance Case | Cohere 1M vectors, 768 dimensions | 99% vectors | Index building time, recall, latency, maximum QPS |
|
355
|
-
10 | Search Performance Case | OpenAI generated 500K vectors, 1536 dimensions | N/A | Index building time, recall, latency, maximum QPS |
|
356
|
-
11 | Search Performance Case | OpenAI generated 5M vectors, 1536 dimensions | N/A | Index building time, recall, latency, maximum QPS |
|
357
|
-
12 | Filtering Search Performance Case | OpenAI generated 500K vectors, 1536 dimensions | 1% vectors | Index building time, recall, latency, maximum QPS |
|
358
|
-
13 | Filtering Search Performance Case | OpenAI generated 5M vectors, 1536 dimensions | 1% vectors | Index building time, recall, latency, maximum QPS |
|
359
|
-
14 | Filtering Search Performance Case | OpenAI generated 500K vectors, 1536 dimensions | 99% vectors | Index building time, recall, latency, maximum QPS |
|
360
|
-
15 | Filtering Search Performance Case | OpenAI generated 5M vectors, 1536 dimensions | 99% vectors | Index building time, recall, latency, maximum QPS |
|
361
|
-
|
454
|
+
- **Int-Filter Cases:** Evaluates search performance with int-based filter expression (e.g. "id >= 2,000").
|
455
|
+
- **Label-Filter Cases:** Evaluates search performance with label-based filter expressions (e.g., "color == 'red'"). The test includes randomly generated labels to simulate real-world filtering scenarios.
|
456
|
+
#### Streaming Cases
|
457
|
+
- **Insertion-Under-Load Case:** Evaluates search performance while maintaining a constant insertion workload. VectorDBBench applies a steady stream of insert requests at a fixed rate to simulate real-world scenarios where search operations must perform reliably under continuous data ingestion.
|
362
458
|
|
363
459
|
Each case provides an in-depth examination of a vector database's abilities, providing you a comprehensive view of the database's performance.
|
364
460
|
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
@@ -35,7 +35,7 @@ dependencies = [
|
|
35
35
|
"psutil",
|
36
36
|
"polars",
|
37
37
|
"plotly",
|
38
|
-
"environs
|
38
|
+
"environs",
|
39
39
|
"pydantic<v2",
|
40
40
|
"scikit-learn",
|
41
41
|
"pymilvus", # with pandas, numpy, ujson
|
@@ -73,6 +73,7 @@ all = [
|
|
73
73
|
"clickhouse-connect",
|
74
74
|
"pyvespa",
|
75
75
|
"lancedb",
|
76
|
+
"mysql-connector-python",
|
76
77
|
]
|
77
78
|
|
78
79
|
qdrant = [ "qdrant-client" ]
|
@@ -96,6 +97,7 @@ tidb = [ "PyMySQL" ]
|
|
96
97
|
clickhouse = [ "clickhouse-connect" ]
|
97
98
|
vespa = [ "pyvespa" ]
|
98
99
|
lancedb = [ "lancedb" ]
|
100
|
+
oceanbase = [ "mysql-connector-python" ]
|
99
101
|
|
100
102
|
[project.urls]
|
101
103
|
"repository" = "https://github.com/zilliztech/VectorDBBench"
|
@@ -18,37 +18,16 @@ class config:
|
|
18
18
|
DEFAULT_DATASET_URL = env.str("DEFAULT_DATASET_URL", AWS_S3_URL)
|
19
19
|
DATASET_LOCAL_DIR = env.path("DATASET_LOCAL_DIR", "/tmp/vectordb_bench/dataset")
|
20
20
|
NUM_PER_BATCH = env.int("NUM_PER_BATCH", 100)
|
21
|
+
TIME_PER_BATCH = 1 # 1s. for streaming insertion.
|
22
|
+
MAX_INSERT_RETRY = 5
|
23
|
+
MAX_SEARCH_RETRY = 5
|
24
|
+
|
25
|
+
LOAD_MAX_TRY_COUNT = 10
|
21
26
|
|
22
27
|
DROP_OLD = env.bool("DROP_OLD", True)
|
23
28
|
USE_SHUFFLED_DATA = env.bool("USE_SHUFFLED_DATA", True)
|
24
29
|
|
25
|
-
NUM_CONCURRENCY = env.list(
|
26
|
-
"NUM_CONCURRENCY",
|
27
|
-
[
|
28
|
-
1,
|
29
|
-
5,
|
30
|
-
10,
|
31
|
-
15,
|
32
|
-
20,
|
33
|
-
25,
|
34
|
-
30,
|
35
|
-
35,
|
36
|
-
40,
|
37
|
-
45,
|
38
|
-
50,
|
39
|
-
55,
|
40
|
-
60,
|
41
|
-
65,
|
42
|
-
70,
|
43
|
-
75,
|
44
|
-
80,
|
45
|
-
85,
|
46
|
-
90,
|
47
|
-
95,
|
48
|
-
100,
|
49
|
-
],
|
50
|
-
subcast=int,
|
51
|
-
)
|
30
|
+
NUM_CONCURRENCY = env.list("NUM_CONCURRENCY", [1, 5, 10, 20, 30, 40, 60, 80], subcast=int)
|
52
31
|
|
53
32
|
CONCURRENCY_DURATION = 30
|
54
33
|
|
@@ -68,6 +47,7 @@ class config:
|
|
68
47
|
|
69
48
|
CAPACITY_TIMEOUT_IN_SECONDS = 24 * 3600 # 24h
|
70
49
|
LOAD_TIMEOUT_DEFAULT = 24 * 3600 # 24h
|
50
|
+
LOAD_TIMEOUT_768D_100K = 24 * 3600 # 24h
|
71
51
|
LOAD_TIMEOUT_768D_1M = 24 * 3600 # 24h
|
72
52
|
LOAD_TIMEOUT_768D_10M = 240 * 3600 # 10d
|
73
53
|
LOAD_TIMEOUT_768D_100M = 2400 * 3600 # 100d
|
@@ -75,7 +55,11 @@ class config:
|
|
75
55
|
LOAD_TIMEOUT_1536D_500K = 24 * 3600 # 24h
|
76
56
|
LOAD_TIMEOUT_1536D_5M = 240 * 3600 # 10d
|
77
57
|
|
58
|
+
LOAD_TIMEOUT_1024D_1M = 24 * 3600 # 24h
|
59
|
+
LOAD_TIMEOUT_1024D_10M = 240 * 3600 # 10d
|
60
|
+
|
78
61
|
OPTIMIZE_TIMEOUT_DEFAULT = 24 * 3600 # 24h
|
62
|
+
OPTIMIZE_TIMEOUT_768D_100K = 24 * 3600 # 24h
|
79
63
|
OPTIMIZE_TIMEOUT_768D_1M = 24 * 3600 # 24h
|
80
64
|
OPTIMIZE_TIMEOUT_768D_10M = 240 * 3600 # 10d
|
81
65
|
OPTIMIZE_TIMEOUT_768D_100M = 2400 * 3600 # 100d
|
@@ -83,6 +67,9 @@ class config:
|
|
83
67
|
OPTIMIZE_TIMEOUT_1536D_500K = 24 * 3600 # 24h
|
84
68
|
OPTIMIZE_TIMEOUT_1536D_5M = 240 * 3600 # 10d
|
85
69
|
|
70
|
+
OPTIMIZE_TIMEOUT_1024D_1M = 24 * 3600 # 24h
|
71
|
+
OPTIMIZE_TIMEOUT_1024D_10M = 240 * 3600 # 10d
|
72
|
+
|
86
73
|
def display(self) -> str:
|
87
74
|
return [
|
88
75
|
i
|
@@ -1,7 +1,8 @@
|
|
1
1
|
import logging
|
2
2
|
|
3
|
-
from vectordb_bench.backend.clients import EmptyDBCaseConfig
|
3
|
+
from vectordb_bench.backend.clients import DB, EmptyDBCaseConfig
|
4
4
|
from vectordb_bench.backend.data_source import DatasetSource
|
5
|
+
from vectordb_bench.backend.filter import FilterOp
|
5
6
|
from vectordb_bench.models import TaskConfig
|
6
7
|
|
7
8
|
from .cases import CaseLabel
|
@@ -10,6 +11,13 @@ from .task_runner import CaseRunner, RunningStatus, TaskRunner
|
|
10
11
|
log = logging.getLogger(__name__)
|
11
12
|
|
12
13
|
|
14
|
+
class FilterNotSupportedError(ValueError):
|
15
|
+
"""Raised when a filter type is not supported by a vector database."""
|
16
|
+
|
17
|
+
def __init__(self, db_name: str, filter_type: FilterOp):
|
18
|
+
super().__init__(f"{filter_type} Filter test is not supported by {db_name}.")
|
19
|
+
|
20
|
+
|
13
21
|
class Assembler:
|
14
22
|
@classmethod
|
15
23
|
def assemble(cls, run_id: str, task: TaskConfig, source: DatasetSource) -> CaseRunner:
|
@@ -39,25 +47,30 @@ class Assembler:
|
|
39
47
|
runners = [cls.assemble(run_id, task, source) for task in tasks]
|
40
48
|
load_runners = [r for r in runners if r.ca.label == CaseLabel.Load]
|
41
49
|
perf_runners = [r for r in runners if r.ca.label == CaseLabel.Performance]
|
50
|
+
streaming_runners = [r for r in runners if r.ca.label == CaseLabel.Streaming]
|
42
51
|
|
43
52
|
# group by db
|
44
|
-
db2runner = {}
|
53
|
+
db2runner: dict[DB, list[CaseRunner]] = {}
|
45
54
|
for r in perf_runners:
|
46
55
|
db = r.config.db
|
47
56
|
if db not in db2runner:
|
48
57
|
db2runner[db] = []
|
49
58
|
db2runner[db].append(r)
|
50
59
|
|
51
|
-
# check
|
52
|
-
for
|
53
|
-
|
60
|
+
# check
|
61
|
+
for db, runners in db2runner.items():
|
62
|
+
db_instance = db.init_cls
|
63
|
+
for runner in runners:
|
64
|
+
if not db_instance.filter_supported(runner.ca.filters):
|
65
|
+
raise FilterNotSupportedError(db.value, runner.ca.filters.type)
|
54
66
|
|
55
67
|
# sort by dataset size
|
56
68
|
for _, runner in db2runner.items():
|
57
|
-
runner.sort(key=lambda x: x.ca.dataset.data.size)
|
69
|
+
runner.sort(key=lambda x: (x.ca.dataset.data.size, 0 if x.ca.filters.type == FilterOp.StrEqual else 1))
|
58
70
|
|
59
71
|
all_runners = []
|
60
72
|
all_runners.extend(load_runners)
|
73
|
+
all_runners.extend(streaming_runners)
|
61
74
|
for v in db2runner.values():
|
62
75
|
all_runners.extend(v)
|
63
76
|
|