vectordb-bench 0.0.18__tar.gz → 0.0.20__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {vectordb_bench-0.0.18 → vectordb_bench-0.0.20}/.github/workflows/pull_request.yml +4 -0
- vectordb_bench-0.0.20/Makefile +10 -0
- {vectordb_bench-0.0.18 → vectordb_bench-0.0.20}/PKG-INFO +34 -42
- {vectordb_bench-0.0.18 → vectordb_bench-0.0.20}/README.md +19 -21
- vectordb_bench-0.0.20/pyproject.toml +209 -0
- {vectordb_bench-0.0.18 → vectordb_bench-0.0.20}/tests/test_rate_runner.py +3 -3
- vectordb_bench-0.0.20/vectordb_bench/__init__.py +92 -0
- {vectordb_bench-0.0.18 → vectordb_bench-0.0.20}/vectordb_bench/__main__.py +4 -3
- {vectordb_bench-0.0.18 → vectordb_bench-0.0.20}/vectordb_bench/backend/assembler.py +12 -13
- {vectordb_bench-0.0.18 → vectordb_bench-0.0.20}/vectordb_bench/backend/cases.py +56 -46
- {vectordb_bench-0.0.18 → vectordb_bench-0.0.20}/vectordb_bench/backend/clients/__init__.py +101 -14
- vectordb_bench-0.0.20/vectordb_bench/backend/clients/aliyun_elasticsearch/aliyun_elasticsearch.py +26 -0
- vectordb_bench-0.0.20/vectordb_bench/backend/clients/aliyun_elasticsearch/config.py +18 -0
- vectordb_bench-0.0.20/vectordb_bench/backend/clients/aliyun_opensearch/aliyun_opensearch.py +345 -0
- vectordb_bench-0.0.20/vectordb_bench/backend/clients/aliyun_opensearch/config.py +47 -0
- {vectordb_bench-0.0.18 → vectordb_bench-0.0.20}/vectordb_bench/backend/clients/alloydb/alloydb.py +58 -80
- {vectordb_bench-0.0.18 → vectordb_bench-0.0.20}/vectordb_bench/backend/clients/alloydb/cli.py +52 -35
- {vectordb_bench-0.0.18 → vectordb_bench-0.0.20}/vectordb_bench/backend/clients/alloydb/config.py +30 -30
- {vectordb_bench-0.0.18 → vectordb_bench-0.0.20}/vectordb_bench/backend/clients/api.py +8 -9
- {vectordb_bench-0.0.18 → vectordb_bench-0.0.20}/vectordb_bench/backend/clients/aws_opensearch/aws_opensearch.py +46 -47
- {vectordb_bench-0.0.18 → vectordb_bench-0.0.20}/vectordb_bench/backend/clients/aws_opensearch/cli.py +4 -7
- {vectordb_bench-0.0.18 → vectordb_bench-0.0.20}/vectordb_bench/backend/clients/aws_opensearch/config.py +13 -9
- vectordb_bench-0.0.20/vectordb_bench/backend/clients/aws_opensearch/run.py +166 -0
- {vectordb_bench-0.0.18 → vectordb_bench-0.0.20}/vectordb_bench/backend/clients/chroma/chroma.py +38 -36
- {vectordb_bench-0.0.18 → vectordb_bench-0.0.20}/vectordb_bench/backend/clients/chroma/config.py +4 -2
- {vectordb_bench-0.0.18 → vectordb_bench-0.0.20}/vectordb_bench/backend/clients/elastic_cloud/config.py +5 -5
- {vectordb_bench-0.0.18 → vectordb_bench-0.0.20}/vectordb_bench/backend/clients/elastic_cloud/elastic_cloud.py +23 -22
- {vectordb_bench-0.0.18 → vectordb_bench-0.0.20}/vectordb_bench/backend/clients/memorydb/cli.py +8 -8
- {vectordb_bench-0.0.18 → vectordb_bench-0.0.20}/vectordb_bench/backend/clients/memorydb/config.py +2 -2
- {vectordb_bench-0.0.18 → vectordb_bench-0.0.20}/vectordb_bench/backend/clients/memorydb/memorydb.py +65 -53
- {vectordb_bench-0.0.18 → vectordb_bench-0.0.20}/vectordb_bench/backend/clients/milvus/cli.py +62 -80
- {vectordb_bench-0.0.18 → vectordb_bench-0.0.20}/vectordb_bench/backend/clients/milvus/config.py +31 -7
- {vectordb_bench-0.0.18 → vectordb_bench-0.0.20}/vectordb_bench/backend/clients/milvus/milvus.py +23 -26
- {vectordb_bench-0.0.18 → vectordb_bench-0.0.20}/vectordb_bench/backend/clients/pgdiskann/cli.py +29 -22
- {vectordb_bench-0.0.18 → vectordb_bench-0.0.20}/vectordb_bench/backend/clients/pgdiskann/config.py +29 -26
- {vectordb_bench-0.0.18 → vectordb_bench-0.0.20}/vectordb_bench/backend/clients/pgdiskann/pgdiskann.py +55 -73
- {vectordb_bench-0.0.18 → vectordb_bench-0.0.20}/vectordb_bench/backend/clients/pgvecto_rs/cli.py +9 -11
- {vectordb_bench-0.0.18 → vectordb_bench-0.0.20}/vectordb_bench/backend/clients/pgvecto_rs/config.py +8 -14
- {vectordb_bench-0.0.18 → vectordb_bench-0.0.20}/vectordb_bench/backend/clients/pgvecto_rs/pgvecto_rs.py +33 -34
- {vectordb_bench-0.0.18 → vectordb_bench-0.0.20}/vectordb_bench/backend/clients/pgvector/cli.py +40 -31
- {vectordb_bench-0.0.18 → vectordb_bench-0.0.20}/vectordb_bench/backend/clients/pgvector/config.py +63 -73
- {vectordb_bench-0.0.18 → vectordb_bench-0.0.20}/vectordb_bench/backend/clients/pgvector/pgvector.py +97 -98
- {vectordb_bench-0.0.18 → vectordb_bench-0.0.20}/vectordb_bench/backend/clients/pgvectorscale/cli.py +38 -24
- {vectordb_bench-0.0.18 → vectordb_bench-0.0.20}/vectordb_bench/backend/clients/pgvectorscale/config.py +14 -15
- {vectordb_bench-0.0.18 → vectordb_bench-0.0.20}/vectordb_bench/backend/clients/pgvectorscale/pgvectorscale.py +38 -43
- {vectordb_bench-0.0.18 → vectordb_bench-0.0.20}/vectordb_bench/backend/clients/pinecone/config.py +1 -0
- {vectordb_bench-0.0.18 → vectordb_bench-0.0.20}/vectordb_bench/backend/clients/pinecone/pinecone.py +14 -21
- {vectordb_bench-0.0.18 → vectordb_bench-0.0.20}/vectordb_bench/backend/clients/qdrant_cloud/config.py +11 -10
- {vectordb_bench-0.0.18 → vectordb_bench-0.0.20}/vectordb_bench/backend/clients/qdrant_cloud/qdrant_cloud.py +40 -31
- {vectordb_bench-0.0.18 → vectordb_bench-0.0.20}/vectordb_bench/backend/clients/redis/cli.py +6 -12
- {vectordb_bench-0.0.18 → vectordb_bench-0.0.20}/vectordb_bench/backend/clients/redis/config.py +7 -5
- vectordb_bench-0.0.20/vectordb_bench/backend/clients/redis/redis.py +194 -0
- {vectordb_bench-0.0.18 → vectordb_bench-0.0.20}/vectordb_bench/backend/clients/test/cli.py +1 -2
- {vectordb_bench-0.0.18 → vectordb_bench-0.0.20}/vectordb_bench/backend/clients/test/config.py +2 -2
- {vectordb_bench-0.0.18 → vectordb_bench-0.0.20}/vectordb_bench/backend/clients/test/test.py +4 -5
- {vectordb_bench-0.0.18 → vectordb_bench-0.0.20}/vectordb_bench/backend/clients/weaviate_cloud/cli.py +3 -4
- {vectordb_bench-0.0.18 → vectordb_bench-0.0.20}/vectordb_bench/backend/clients/weaviate_cloud/config.py +2 -2
- {vectordb_bench-0.0.18 → vectordb_bench-0.0.20}/vectordb_bench/backend/clients/weaviate_cloud/weaviate_cloud.py +36 -22
- {vectordb_bench-0.0.18 → vectordb_bench-0.0.20}/vectordb_bench/backend/clients/zilliz_cloud/cli.py +14 -11
- {vectordb_bench-0.0.18 → vectordb_bench-0.0.20}/vectordb_bench/backend/clients/zilliz_cloud/config.py +2 -4
- {vectordb_bench-0.0.18 → vectordb_bench-0.0.20}/vectordb_bench/backend/data_source.py +30 -18
- {vectordb_bench-0.0.18 → vectordb_bench-0.0.20}/vectordb_bench/backend/dataset.py +47 -27
- {vectordb_bench-0.0.18 → vectordb_bench-0.0.20}/vectordb_bench/backend/result_collector.py +2 -3
- vectordb_bench-0.0.20/vectordb_bench/backend/runner/__init__.py +10 -0
- {vectordb_bench-0.0.18 → vectordb_bench-0.0.20}/vectordb_bench/backend/runner/mp_runner.py +85 -34
- vectordb_bench-0.0.20/vectordb_bench/backend/runner/rate_runner.py +107 -0
- vectordb_bench-0.0.20/vectordb_bench/backend/runner/read_write_runner.py +206 -0
- {vectordb_bench-0.0.18 → vectordb_bench-0.0.20}/vectordb_bench/backend/runner/serial_runner.py +99 -50
- vectordb_bench-0.0.20/vectordb_bench/backend/runner/util.py +17 -0
- {vectordb_bench-0.0.18 → vectordb_bench-0.0.20}/vectordb_bench/backend/task_runner.py +95 -74
- {vectordb_bench-0.0.18 → vectordb_bench-0.0.20}/vectordb_bench/backend/utils.py +17 -9
- {vectordb_bench-0.0.18 → vectordb_bench-0.0.20}/vectordb_bench/base.py +0 -1
- {vectordb_bench-0.0.18 → vectordb_bench-0.0.20}/vectordb_bench/cli/cli.py +65 -60
- {vectordb_bench-0.0.18 → vectordb_bench-0.0.20}/vectordb_bench/cli/vectordbbench.py +6 -7
- {vectordb_bench-0.0.18 → vectordb_bench-0.0.20}/vectordb_bench/frontend/components/check_results/charts.py +8 -19
- {vectordb_bench-0.0.18 → vectordb_bench-0.0.20}/vectordb_bench/frontend/components/check_results/data.py +4 -16
- {vectordb_bench-0.0.18 → vectordb_bench-0.0.20}/vectordb_bench/frontend/components/check_results/filters.py +8 -16
- {vectordb_bench-0.0.18 → vectordb_bench-0.0.20}/vectordb_bench/frontend/components/check_results/nav.py +4 -4
- {vectordb_bench-0.0.18 → vectordb_bench-0.0.20}/vectordb_bench/frontend/components/check_results/priceTable.py +1 -3
- {vectordb_bench-0.0.18 → vectordb_bench-0.0.20}/vectordb_bench/frontend/components/check_results/stPageConfig.py +2 -1
- {vectordb_bench-0.0.18 → vectordb_bench-0.0.20}/vectordb_bench/frontend/components/concurrent/charts.py +12 -12
- {vectordb_bench-0.0.18 → vectordb_bench-0.0.20}/vectordb_bench/frontend/components/custom/displayCustomCase.py +17 -11
- {vectordb_bench-0.0.18 → vectordb_bench-0.0.20}/vectordb_bench/frontend/components/custom/displaypPrams.py +4 -2
- {vectordb_bench-0.0.18 → vectordb_bench-0.0.20}/vectordb_bench/frontend/components/custom/getCustomConfig.py +1 -2
- {vectordb_bench-0.0.18 → vectordb_bench-0.0.20}/vectordb_bench/frontend/components/custom/initStyle.py +1 -1
- {vectordb_bench-0.0.18 → vectordb_bench-0.0.20}/vectordb_bench/frontend/components/get_results/saveAsImage.py +2 -0
- {vectordb_bench-0.0.18 → vectordb_bench-0.0.20}/vectordb_bench/frontend/components/run_test/caseSelector.py +3 -9
- {vectordb_bench-0.0.18 → vectordb_bench-0.0.20}/vectordb_bench/frontend/components/run_test/dbConfigSetting.py +1 -4
- {vectordb_bench-0.0.18 → vectordb_bench-0.0.20}/vectordb_bench/frontend/components/run_test/dbSelector.py +1 -1
- {vectordb_bench-0.0.18 → vectordb_bench-0.0.20}/vectordb_bench/frontend/components/run_test/generateTasks.py +8 -8
- {vectordb_bench-0.0.18 → vectordb_bench-0.0.20}/vectordb_bench/frontend/components/run_test/submitTask.py +14 -18
- {vectordb_bench-0.0.18 → vectordb_bench-0.0.20}/vectordb_bench/frontend/components/tables/data.py +3 -6
- {vectordb_bench-0.0.18 → vectordb_bench-0.0.20}/vectordb_bench/frontend/config/dbCaseConfigs.py +108 -83
- {vectordb_bench-0.0.18 → vectordb_bench-0.0.20}/vectordb_bench/frontend/pages/concurrent.py +3 -5
- {vectordb_bench-0.0.18 → vectordb_bench-0.0.20}/vectordb_bench/frontend/pages/custom.py +30 -9
- {vectordb_bench-0.0.18 → vectordb_bench-0.0.20}/vectordb_bench/frontend/pages/quries_per_dollar.py +3 -3
- {vectordb_bench-0.0.18 → vectordb_bench-0.0.20}/vectordb_bench/frontend/pages/run_test.py +3 -7
- {vectordb_bench-0.0.18 → vectordb_bench-0.0.20}/vectordb_bench/frontend/utils.py +1 -1
- {vectordb_bench-0.0.18 → vectordb_bench-0.0.20}/vectordb_bench/frontend/vdb_benchmark.py +4 -6
- {vectordb_bench-0.0.18 → vectordb_bench-0.0.20}/vectordb_bench/interface.py +56 -26
- vectordb_bench-0.0.20/vectordb_bench/log_util.py +97 -0
- {vectordb_bench-0.0.18 → vectordb_bench-0.0.20}/vectordb_bench/metric.py +10 -11
- {vectordb_bench-0.0.18 → vectordb_bench-0.0.20}/vectordb_bench/models.py +26 -43
- {vectordb_bench-0.0.18 → vectordb_bench-0.0.20}/vectordb_bench.egg-info/PKG-INFO +34 -42
- {vectordb_bench-0.0.18 → vectordb_bench-0.0.20}/vectordb_bench.egg-info/SOURCES.txt +4 -1
- {vectordb_bench-0.0.18 → vectordb_bench-0.0.20}/vectordb_bench.egg-info/requires.txt +14 -23
- vectordb_bench-0.0.18/.ruff.toml +0 -49
- vectordb_bench-0.0.18/Makefile +0 -2
- vectordb_bench-0.0.18/pyproject.toml +0 -91
- vectordb_bench-0.0.18/vectordb_bench/__init__.py +0 -67
- vectordb_bench-0.0.18/vectordb_bench/backend/clients/aws_opensearch/run.py +0 -156
- vectordb_bench-0.0.18/vectordb_bench/backend/clients/redis/redis.py +0 -158
- vectordb_bench-0.0.18/vectordb_bench/backend/runner/__init__.py +0 -12
- vectordb_bench-0.0.18/vectordb_bench/backend/runner/rate_runner.py +0 -79
- vectordb_bench-0.0.18/vectordb_bench/backend/runner/read_write_runner.py +0 -112
- vectordb_bench-0.0.18/vectordb_bench/backend/runner/util.py +0 -32
- vectordb_bench-0.0.18/vectordb_bench/log_util.py +0 -102
- {vectordb_bench-0.0.18 → vectordb_bench-0.0.20}/.devcontainer/Dockerfile +0 -0
- {vectordb_bench-0.0.18 → vectordb_bench-0.0.20}/.devcontainer/devcontainer.json +0 -0
- {vectordb_bench-0.0.18 → vectordb_bench-0.0.20}/.env.example +0 -0
- {vectordb_bench-0.0.18 → vectordb_bench-0.0.20}/.github/workflows/publish_package_on_release.yml +0 -0
- {vectordb_bench-0.0.18 → vectordb_bench-0.0.20}/.gitignore +0 -0
- {vectordb_bench-0.0.18 → vectordb_bench-0.0.20}/Dockerfile +0 -0
- {vectordb_bench-0.0.18 → vectordb_bench-0.0.20}/LICENSE +0 -0
- {vectordb_bench-0.0.18 → vectordb_bench-0.0.20}/OWNERS +0 -0
- {vectordb_bench-0.0.18 → vectordb_bench-0.0.20}/fig/custom_case_run_test.png +0 -0
- {vectordb_bench-0.0.18 → vectordb_bench-0.0.20}/fig/custom_dataset.png +0 -0
- {vectordb_bench-0.0.18 → vectordb_bench-0.0.20}/install/requirements_py3.11.txt +0 -0
- {vectordb_bench-0.0.18 → vectordb_bench-0.0.20}/install.py +0 -0
- {vectordb_bench-0.0.18 → vectordb_bench-0.0.20}/setup.cfg +0 -0
- {vectordb_bench-0.0.18 → vectordb_bench-0.0.20}/tests/conftest.py +0 -0
- {vectordb_bench-0.0.18 → vectordb_bench-0.0.20}/tests/pytest.ini +0 -0
- {vectordb_bench-0.0.18 → vectordb_bench-0.0.20}/tests/test_bench_runner.py +0 -0
- {vectordb_bench-0.0.18 → vectordb_bench-0.0.20}/tests/test_chroma.py +0 -0
- {vectordb_bench-0.0.18 → vectordb_bench-0.0.20}/tests/test_data_source.py +0 -0
- {vectordb_bench-0.0.18 → vectordb_bench-0.0.20}/tests/test_dataset.py +0 -0
- {vectordb_bench-0.0.18 → vectordb_bench-0.0.20}/tests/test_elasticsearch_cloud.py +0 -0
- {vectordb_bench-0.0.18 → vectordb_bench-0.0.20}/tests/test_models.py +0 -0
- {vectordb_bench-0.0.18 → vectordb_bench-0.0.20}/tests/test_redis.py +0 -0
- {vectordb_bench-0.0.18 → vectordb_bench-0.0.20}/tests/test_utils.py +0 -0
- {vectordb_bench-0.0.18 → vectordb_bench-0.0.20}/tests/ut_cases.py +0 -0
- {vectordb_bench-0.0.18 → vectordb_bench-0.0.20}/vectordb_bench/backend/__init__.py +0 -0
- {vectordb_bench-0.0.18 → vectordb_bench-0.0.20}/vectordb_bench/backend/clients/zilliz_cloud/zilliz_cloud.py +1 -1
- {vectordb_bench-0.0.18 → vectordb_bench-0.0.20}/vectordb_bench/cli/__init__.py +0 -0
- {vectordb_bench-0.0.18 → vectordb_bench-0.0.20}/vectordb_bench/config-files/sample_config.yml +0 -0
- {vectordb_bench-0.0.18 → vectordb_bench-0.0.20}/vectordb_bench/custom/custom_case.json +0 -0
- {vectordb_bench-0.0.18 → vectordb_bench-0.0.20}/vectordb_bench/frontend/components/check_results/expanderStyle.py +0 -0
- {vectordb_bench-0.0.18 → vectordb_bench-0.0.20}/vectordb_bench/frontend/components/check_results/footer.py +0 -0
- {vectordb_bench-0.0.18 → vectordb_bench-0.0.20}/vectordb_bench/frontend/components/check_results/headerIcon.py +0 -0
- {vectordb_bench-0.0.18 → vectordb_bench-0.0.20}/vectordb_bench/frontend/components/run_test/autoRefresh.py +0 -0
- {vectordb_bench-0.0.18 → vectordb_bench-0.0.20}/vectordb_bench/frontend/components/run_test/hideSidebar.py +0 -0
- {vectordb_bench-0.0.18 → vectordb_bench-0.0.20}/vectordb_bench/frontend/components/run_test/initStyle.py +0 -0
- {vectordb_bench-0.0.18 → vectordb_bench-0.0.20}/vectordb_bench/frontend/config/dbPrices.py +0 -0
- {vectordb_bench-0.0.18 → vectordb_bench-0.0.20}/vectordb_bench/frontend/config/styles.py +0 -0
- {vectordb_bench-0.0.18 → vectordb_bench-0.0.20}/vectordb_bench/frontend/pages/tables.py +0 -0
- {vectordb_bench-0.0.18 → vectordb_bench-0.0.20}/vectordb_bench/results/ElasticCloud/result_20230727_standard_elasticcloud.json +0 -0
- {vectordb_bench-0.0.18 → vectordb_bench-0.0.20}/vectordb_bench/results/ElasticCloud/result_20230808_standard_elasticcloud.json +0 -0
- {vectordb_bench-0.0.18 → vectordb_bench-0.0.20}/vectordb_bench/results/Milvus/result_20230727_standard_milvus.json +0 -0
- {vectordb_bench-0.0.18 → vectordb_bench-0.0.20}/vectordb_bench/results/Milvus/result_20230808_standard_milvus.json +0 -0
- {vectordb_bench-0.0.18 → vectordb_bench-0.0.20}/vectordb_bench/results/PgVector/result_20230727_standard_pgvector.json +0 -0
- {vectordb_bench-0.0.18 → vectordb_bench-0.0.20}/vectordb_bench/results/PgVector/result_20230808_standard_pgvector.json +0 -0
- {vectordb_bench-0.0.18 → vectordb_bench-0.0.20}/vectordb_bench/results/Pinecone/result_20230727_standard_pinecone.json +0 -0
- {vectordb_bench-0.0.18 → vectordb_bench-0.0.20}/vectordb_bench/results/Pinecone/result_20230808_standard_pinecone.json +0 -0
- {vectordb_bench-0.0.18 → vectordb_bench-0.0.20}/vectordb_bench/results/QdrantCloud/result_20230727_standard_qdrantcloud.json +0 -0
- {vectordb_bench-0.0.18 → vectordb_bench-0.0.20}/vectordb_bench/results/QdrantCloud/result_20230808_standard_qdrantcloud.json +0 -0
- {vectordb_bench-0.0.18 → vectordb_bench-0.0.20}/vectordb_bench/results/WeaviateCloud/result_20230727_standard_weaviatecloud.json +0 -0
- {vectordb_bench-0.0.18 → vectordb_bench-0.0.20}/vectordb_bench/results/WeaviateCloud/result_20230808_standard_weaviatecloud.json +0 -0
- {vectordb_bench-0.0.18 → vectordb_bench-0.0.20}/vectordb_bench/results/ZillizCloud/result_20230727_standard_zillizcloud.json +0 -0
- {vectordb_bench-0.0.18 → vectordb_bench-0.0.20}/vectordb_bench/results/ZillizCloud/result_20230808_standard_zillizcloud.json +0 -0
- {vectordb_bench-0.0.18 → vectordb_bench-0.0.20}/vectordb_bench/results/ZillizCloud/result_20240105_standard_202401_zillizcloud.json +0 -0
- {vectordb_bench-0.0.18 → vectordb_bench-0.0.20}/vectordb_bench/results/dbPrices.json +0 -0
- {vectordb_bench-0.0.18 → vectordb_bench-0.0.20}/vectordb_bench/results/getLeaderboardData.py +0 -0
- {vectordb_bench-0.0.18 → vectordb_bench-0.0.20}/vectordb_bench/results/leaderboard.json +0 -0
- {vectordb_bench-0.0.18 → vectordb_bench-0.0.20}/vectordb_bench.egg-info/dependency_links.txt +0 -0
- {vectordb_bench-0.0.18 → vectordb_bench-0.0.20}/vectordb_bench.egg-info/entry_points.txt +0 -0
- {vectordb_bench-0.0.18 → vectordb_bench-0.0.20}/vectordb_bench.egg-info/top_level.txt +0 -0
@@ -0,0 +1,10 @@
|
|
1
|
+
unittest:
|
2
|
+
PYTHONPATH=`pwd` python3 -m pytest tests/test_dataset.py::TestDataSet::test_download_small -svv
|
3
|
+
|
4
|
+
format:
|
5
|
+
PYTHONPATH=`pwd` python3 -m black vectordb_bench
|
6
|
+
PYTHONPATH=`pwd` python3 -m ruff check vectordb_bench --fix
|
7
|
+
|
8
|
+
lint:
|
9
|
+
PYTHONPATH=`pwd` python3 -m black vectordb_bench --check
|
10
|
+
PYTHONPATH=`pwd` python3 -m ruff check vectordb_bench
|
@@ -1,6 +1,6 @@
|
|
1
|
-
Metadata-Version: 2.
|
1
|
+
Metadata-Version: 2.2
|
2
2
|
Name: vectordb-bench
|
3
|
-
Version: 0.0.
|
3
|
+
Version: 0.0.20
|
4
4
|
Summary: VectorDBBench is not just an offering of benchmark results for mainstream vector databases and cloud services, it's your go-to tool for the ultimate performance and cost-effectiveness comparison. Designed with ease-of-use in mind, VectorDBBench is devised to help users, even non-professionals, reproduce results or test new systems, making the hunt for the optimal choice amongst a plethora of cloud services and open-source vector databases a breeze.
|
5
5
|
Author-email: XuanYang-cn <xuan.yang@zilliz.com>
|
6
6
|
Project-URL: repository, https://github.com/zilliztech/VectorDBBench
|
@@ -26,6 +26,7 @@ Requires-Dist: pydantic<v2
|
|
26
26
|
Requires-Dist: scikit-learn
|
27
27
|
Requires-Dist: pymilvus
|
28
28
|
Provides-Extra: test
|
29
|
+
Requires-Dist: black; extra == "test"
|
29
30
|
Requires-Dist: ruff; extra == "test"
|
30
31
|
Requires-Dist: pytest; extra == "test"
|
31
32
|
Provides-Extra: all
|
@@ -35,15 +36,18 @@ Requires-Dist: qdrant-client; extra == "all"
|
|
35
36
|
Requires-Dist: pinecone-client; extra == "all"
|
36
37
|
Requires-Dist: weaviate-client; extra == "all"
|
37
38
|
Requires-Dist: elasticsearch; extra == "all"
|
38
|
-
Requires-Dist: pgvector; extra == "all"
|
39
|
-
Requires-Dist: pgvecto_rs[psycopg3]>=0.2.2; extra == "all"
|
40
39
|
Requires-Dist: sqlalchemy; extra == "all"
|
41
40
|
Requires-Dist: redis; extra == "all"
|
42
41
|
Requires-Dist: chromadb; extra == "all"
|
42
|
+
Requires-Dist: pgvector; extra == "all"
|
43
43
|
Requires-Dist: psycopg; extra == "all"
|
44
44
|
Requires-Dist: psycopg-binary; extra == "all"
|
45
|
-
Requires-Dist:
|
46
|
-
Requires-Dist: opensearch-
|
45
|
+
Requires-Dist: pgvecto_rs[psycopg3]>=0.2.2; extra == "all"
|
46
|
+
Requires-Dist: opensearch-dsl; extra == "all"
|
47
|
+
Requires-Dist: opensearch-py; extra == "all"
|
48
|
+
Requires-Dist: memorydb; extra == "all"
|
49
|
+
Requires-Dist: alibabacloud_ha3engine_vector; extra == "all"
|
50
|
+
Requires-Dist: alibabacloud_searchengine20211025; extra == "all"
|
47
51
|
Provides-Extra: qdrant
|
48
52
|
Requires-Dist: qdrant-client; extra == "qdrant"
|
49
53
|
Provides-Extra: pinecone
|
@@ -56,18 +60,6 @@ Provides-Extra: pgvector
|
|
56
60
|
Requires-Dist: psycopg; extra == "pgvector"
|
57
61
|
Requires-Dist: psycopg-binary; extra == "pgvector"
|
58
62
|
Requires-Dist: pgvector; extra == "pgvector"
|
59
|
-
Provides-Extra: pgvectorscale
|
60
|
-
Requires-Dist: psycopg; extra == "pgvectorscale"
|
61
|
-
Requires-Dist: psycopg-binary; extra == "pgvectorscale"
|
62
|
-
Requires-Dist: pgvector; extra == "pgvectorscale"
|
63
|
-
Provides-Extra: pgdiskann
|
64
|
-
Requires-Dist: psycopg; extra == "pgdiskann"
|
65
|
-
Requires-Dist: psycopg-binary; extra == "pgdiskann"
|
66
|
-
Requires-Dist: pgvector; extra == "pgdiskann"
|
67
|
-
Provides-Extra: alloydb
|
68
|
-
Requires-Dist: psycopg; extra == "alloydb"
|
69
|
-
Requires-Dist: psycopg-binary; extra == "alloydb"
|
70
|
-
Requires-Dist: pgvector; extra == "alloydb"
|
71
63
|
Provides-Extra: pgvecto-rs
|
72
64
|
Requires-Dist: pgvecto_rs[psycopg3]>=0.2.2; extra == "pgvecto-rs"
|
73
65
|
Provides-Extra: redis
|
@@ -76,15 +68,27 @@ Provides-Extra: memorydb
|
|
76
68
|
Requires-Dist: memorydb; extra == "memorydb"
|
77
69
|
Provides-Extra: chromadb
|
78
70
|
Requires-Dist: chromadb; extra == "chromadb"
|
79
|
-
Provides-Extra:
|
80
|
-
Requires-Dist:
|
81
|
-
Provides-Extra:
|
71
|
+
Provides-Extra: opensearch
|
72
|
+
Requires-Dist: opensearch-py; extra == "opensearch"
|
73
|
+
Provides-Extra: aliyun-opensearch
|
74
|
+
Requires-Dist: alibabacloud_ha3engine_vector; extra == "aliyun-opensearch"
|
75
|
+
Requires-Dist: alibabacloud_searchengine20211025; extra == "aliyun-opensearch"
|
82
76
|
|
83
77
|
# VectorDBBench: A Benchmark Tool for VectorDB
|
84
78
|
|
85
79
|
[](https://pypi.org/project/vectordb-bench/)
|
86
80
|
[](https://pepy.tech/project/vectordb-bench)
|
87
81
|
|
82
|
+
## What is VectorDBBench
|
83
|
+
VectorDBBench is not just an offering of benchmark results for mainstream vector databases and cloud services, it's your go-to tool for the ultimate performance and cost-effectiveness comparison. Designed with ease-of-use in mind, VectorDBBench is devised to help users, even non-professionals, reproduce results or test new systems, making the hunt for the optimal choice amongst a plethora of cloud services and open-source vector databases a breeze.
|
84
|
+
|
85
|
+
Understanding the importance of user experience, we provide an intuitive visual interface. This not only empowers users to initiate benchmarks at ease, but also to view comparative result reports, thereby reproducing benchmark results effortlessly.
|
86
|
+
To add more relevance and practicality, we provide cost-effectiveness reports particularly for cloud services. This allows for a more realistic and applicable benchmarking process.
|
87
|
+
|
88
|
+
Closely mimicking real-world production environments, we've set up diverse testing scenarios including insertion, searching, and filtered searching. To provide you with credible and reliable data, we've included public datasets from actual production scenarios, such as [SIFT](http://corpus-texmex.irisa.fr/), [GIST](http://corpus-texmex.irisa.fr/), [Cohere](https://huggingface.co/datasets/Cohere/wikipedia-22-12/tree/main/en), and a dataset generated by OpenAI from an opensource [raw dataset](https://huggingface.co/datasets/allenai/c4). It's fascinating to discover how a relatively unknown open-source database might excel in certain circumstances!
|
89
|
+
|
90
|
+
Prepare to delve into the world of VectorDBBench, and let it guide you in uncovering your perfect vector database match.
|
91
|
+
|
88
92
|
**Leaderboard:** https://zilliz.com/benchmark
|
89
93
|
## Quick Start
|
90
94
|
### Prerequirement
|
@@ -111,21 +115,19 @@ All the database client supported
|
|
111
115
|
|
112
116
|
| Optional database client | install command |
|
113
117
|
|--------------------------|---------------------------------------------|
|
114
|
-
| pymilvus(*default*)
|
115
|
-
| all
|
118
|
+
| pymilvus, zilliz_cloud (*default*) | `pip install vectordb-bench` |
|
119
|
+
| all (*clients requirements might be conflict with each other*) | `pip install vectordb-bench[all]` |
|
116
120
|
| qdrant | `pip install vectordb-bench[qdrant]` |
|
117
121
|
| pinecone | `pip install vectordb-bench[pinecone]` |
|
118
122
|
| weaviate | `pip install vectordb-bench[weaviate]` |
|
119
|
-
| elastic
|
120
|
-
| pgvector
|
123
|
+
| elastic, aliyun_elasticsearch| `pip install vectordb-bench[elastic]` |
|
124
|
+
| pgvector, pgvectorscale, pgdiskann, alloydb | `pip install vectordb-bench[pgvector]` |
|
121
125
|
| pgvecto.rs | `pip install vectordb-bench[pgvecto_rs]` |
|
122
|
-
| pgvectorscale | `pip install vectordb-bench[pgvectorscale]` |
|
123
|
-
| pgdiskann | `pip install vectordb-bench[pgdiskann]` |
|
124
126
|
| redis | `pip install vectordb-bench[redis]` |
|
125
127
|
| memorydb | `pip install vectordb-bench[memorydb]` |
|
126
128
|
| chromadb | `pip install vectordb-bench[chromadb]` |
|
127
|
-
| awsopensearch | `pip install vectordb-bench[
|
128
|
-
|
|
129
|
+
| awsopensearch | `pip install vectordb-bench[opensearch]` |
|
130
|
+
| aliyun_opensearch | `pip install vectordb-bench[aliyun_opensearch]` |
|
129
131
|
|
130
132
|
### Run
|
131
133
|
|
@@ -264,16 +266,6 @@ milvushnsw:
|
|
264
266
|
> - Options passed on the command line will override the configuration file*
|
265
267
|
> - Parameter names use an _ not -
|
266
268
|
|
267
|
-
## What is VectorDBBench
|
268
|
-
VectorDBBench is not just an offering of benchmark results for mainstream vector databases and cloud services, it's your go-to tool for the ultimate performance and cost-effectiveness comparison. Designed with ease-of-use in mind, VectorDBBench is devised to help users, even non-professionals, reproduce results or test new systems, making the hunt for the optimal choice amongst a plethora of cloud services and open-source vector databases a breeze.
|
269
|
-
|
270
|
-
Understanding the importance of user experience, we provide an intuitive visual interface. This not only empowers users to initiate benchmarks at ease, but also to view comparative result reports, thereby reproducing benchmark results effortlessly.
|
271
|
-
To add more relevance and practicality, we provide cost-effectiveness reports particularly for cloud services. This allows for a more realistic and applicable benchmarking process.
|
272
|
-
|
273
|
-
Closely mimicking real-world production environments, we've set up diverse testing scenarios including insertion, searching, and filtered searching. To provide you with credible and reliable data, we've included public datasets from actual production scenarios, such as [SIFT](http://corpus-texmex.irisa.fr/), [GIST](http://corpus-texmex.irisa.fr/), [Cohere](https://huggingface.co/datasets/Cohere/wikipedia-22-12/tree/main/en), and a dataset generated by OpenAI from an opensource [raw dataset](https://huggingface.co/datasets/allenai/c4). It's fascinating to discover how a relatively unknown open-source database might excel in certain circumstances!
|
274
|
-
|
275
|
-
Prepare to delve into the world of VectorDBBench, and let it guide you in uncovering your perfect vector database match.
|
276
|
-
|
277
269
|
## Leaderboard
|
278
270
|
### Introduction
|
279
271
|
To facilitate the presentation of test results and provide a comprehensive performance analysis report, we offer a [leaderboard page](https://zilliz.com/benchmark). It allows us to choose from QPS, QP$, and latency metrics, and provides a comprehensive assessment of a system's performance based on the test results of various cases and a set of scoring mechanisms (to be introduced later). On this leaderboard, we can select the systems and models to be compared, and filter out cases we do not want to consider. Comprehensive scores are always ranked from best to worst, and the specific test results of each query will be presented in the list below.
|
@@ -324,13 +316,13 @@ After reopen the repository in container, run `python -m vectordb_bench` in the
|
|
324
316
|
|
325
317
|
### Check coding styles
|
326
318
|
```shell
|
327
|
-
$
|
319
|
+
$ make lint
|
328
320
|
```
|
329
321
|
|
330
|
-
|
322
|
+
To fix the coding styles automatically
|
331
323
|
|
332
324
|
```shell
|
333
|
-
$
|
325
|
+
$ make format
|
334
326
|
```
|
335
327
|
|
336
328
|
## How does it work?
|
@@ -3,6 +3,16 @@
|
|
3
3
|
[](https://pypi.org/project/vectordb-bench/)
|
4
4
|
[](https://pepy.tech/project/vectordb-bench)
|
5
5
|
|
6
|
+
## What is VectorDBBench
|
7
|
+
VectorDBBench is not just an offering of benchmark results for mainstream vector databases and cloud services, it's your go-to tool for the ultimate performance and cost-effectiveness comparison. Designed with ease-of-use in mind, VectorDBBench is devised to help users, even non-professionals, reproduce results or test new systems, making the hunt for the optimal choice amongst a plethora of cloud services and open-source vector databases a breeze.
|
8
|
+
|
9
|
+
Understanding the importance of user experience, we provide an intuitive visual interface. This not only empowers users to initiate benchmarks at ease, but also to view comparative result reports, thereby reproducing benchmark results effortlessly.
|
10
|
+
To add more relevance and practicality, we provide cost-effectiveness reports particularly for cloud services. This allows for a more realistic and applicable benchmarking process.
|
11
|
+
|
12
|
+
Closely mimicking real-world production environments, we've set up diverse testing scenarios including insertion, searching, and filtered searching. To provide you with credible and reliable data, we've included public datasets from actual production scenarios, such as [SIFT](http://corpus-texmex.irisa.fr/), [GIST](http://corpus-texmex.irisa.fr/), [Cohere](https://huggingface.co/datasets/Cohere/wikipedia-22-12/tree/main/en), and a dataset generated by OpenAI from an opensource [raw dataset](https://huggingface.co/datasets/allenai/c4). It's fascinating to discover how a relatively unknown open-source database might excel in certain circumstances!
|
13
|
+
|
14
|
+
Prepare to delve into the world of VectorDBBench, and let it guide you in uncovering your perfect vector database match.
|
15
|
+
|
6
16
|
**Leaderboard:** https://zilliz.com/benchmark
|
7
17
|
## Quick Start
|
8
18
|
### Prerequirement
|
@@ -29,21 +39,19 @@ All the database client supported
|
|
29
39
|
|
30
40
|
| Optional database client | install command |
|
31
41
|
|--------------------------|---------------------------------------------|
|
32
|
-
| pymilvus(*default*)
|
33
|
-
| all
|
42
|
+
| pymilvus, zilliz_cloud (*default*) | `pip install vectordb-bench` |
|
43
|
+
| all (*clients requirements might be conflict with each other*) | `pip install vectordb-bench[all]` |
|
34
44
|
| qdrant | `pip install vectordb-bench[qdrant]` |
|
35
45
|
| pinecone | `pip install vectordb-bench[pinecone]` |
|
36
46
|
| weaviate | `pip install vectordb-bench[weaviate]` |
|
37
|
-
| elastic
|
38
|
-
| pgvector
|
47
|
+
| elastic, aliyun_elasticsearch| `pip install vectordb-bench[elastic]` |
|
48
|
+
| pgvector, pgvectorscale, pgdiskann, alloydb | `pip install vectordb-bench[pgvector]` |
|
39
49
|
| pgvecto.rs | `pip install vectordb-bench[pgvecto_rs]` |
|
40
|
-
| pgvectorscale | `pip install vectordb-bench[pgvectorscale]` |
|
41
|
-
| pgdiskann | `pip install vectordb-bench[pgdiskann]` |
|
42
50
|
| redis | `pip install vectordb-bench[redis]` |
|
43
51
|
| memorydb | `pip install vectordb-bench[memorydb]` |
|
44
52
|
| chromadb | `pip install vectordb-bench[chromadb]` |
|
45
|
-
| awsopensearch | `pip install vectordb-bench[
|
46
|
-
|
|
53
|
+
| awsopensearch | `pip install vectordb-bench[opensearch]` |
|
54
|
+
| aliyun_opensearch | `pip install vectordb-bench[aliyun_opensearch]` |
|
47
55
|
|
48
56
|
### Run
|
49
57
|
|
@@ -182,16 +190,6 @@ milvushnsw:
|
|
182
190
|
> - Options passed on the command line will override the configuration file*
|
183
191
|
> - Parameter names use an _ not -
|
184
192
|
|
185
|
-
## What is VectorDBBench
|
186
|
-
VectorDBBench is not just an offering of benchmark results for mainstream vector databases and cloud services, it's your go-to tool for the ultimate performance and cost-effectiveness comparison. Designed with ease-of-use in mind, VectorDBBench is devised to help users, even non-professionals, reproduce results or test new systems, making the hunt for the optimal choice amongst a plethora of cloud services and open-source vector databases a breeze.
|
187
|
-
|
188
|
-
Understanding the importance of user experience, we provide an intuitive visual interface. This not only empowers users to initiate benchmarks at ease, but also to view comparative result reports, thereby reproducing benchmark results effortlessly.
|
189
|
-
To add more relevance and practicality, we provide cost-effectiveness reports particularly for cloud services. This allows for a more realistic and applicable benchmarking process.
|
190
|
-
|
191
|
-
Closely mimicking real-world production environments, we've set up diverse testing scenarios including insertion, searching, and filtered searching. To provide you with credible and reliable data, we've included public datasets from actual production scenarios, such as [SIFT](http://corpus-texmex.irisa.fr/), [GIST](http://corpus-texmex.irisa.fr/), [Cohere](https://huggingface.co/datasets/Cohere/wikipedia-22-12/tree/main/en), and a dataset generated by OpenAI from an opensource [raw dataset](https://huggingface.co/datasets/allenai/c4). It's fascinating to discover how a relatively unknown open-source database might excel in certain circumstances!
|
192
|
-
|
193
|
-
Prepare to delve into the world of VectorDBBench, and let it guide you in uncovering your perfect vector database match.
|
194
|
-
|
195
193
|
## Leaderboard
|
196
194
|
### Introduction
|
197
195
|
To facilitate the presentation of test results and provide a comprehensive performance analysis report, we offer a [leaderboard page](https://zilliz.com/benchmark). It allows us to choose from QPS, QP$, and latency metrics, and provides a comprehensive assessment of a system's performance based on the test results of various cases and a set of scoring mechanisms (to be introduced later). On this leaderboard, we can select the systems and models to be compared, and filter out cases we do not want to consider. Comprehensive scores are always ranked from best to worst, and the specific test results of each query will be presented in the list below.
|
@@ -242,13 +240,13 @@ After reopen the repository in container, run `python -m vectordb_bench` in the
|
|
242
240
|
|
243
241
|
### Check coding styles
|
244
242
|
```shell
|
245
|
-
$
|
243
|
+
$ make lint
|
246
244
|
```
|
247
245
|
|
248
|
-
|
246
|
+
To fix the coding styles automatically
|
249
247
|
|
250
248
|
```shell
|
251
|
-
$
|
249
|
+
$ make format
|
252
250
|
```
|
253
251
|
|
254
252
|
## How does it work?
|
@@ -0,0 +1,209 @@
|
|
1
|
+
[build-system]
|
2
|
+
requires = ["setuptools>=67.0", "wheel", "setuptools_scm[toml]>=6.2"]
|
3
|
+
build-backend = "setuptools.build_meta"
|
4
|
+
|
5
|
+
[tool.setuptools.package-data]
|
6
|
+
"vectordb_bench.results" = ["*.json"]
|
7
|
+
|
8
|
+
[tool.setuptools.packages.find]
|
9
|
+
where = ["."]
|
10
|
+
include = ["vectordb_bench", "vectordb_bench.cli"]
|
11
|
+
|
12
|
+
[project]
|
13
|
+
name = "vectordb-bench"
|
14
|
+
authors = [
|
15
|
+
{name="XuanYang-cn", email="xuan.yang@zilliz.com"},
|
16
|
+
]
|
17
|
+
description = "VectorDBBench is not just an offering of benchmark results for mainstream vector databases and cloud services, it's your go-to tool for the ultimate performance and cost-effectiveness comparison. Designed with ease-of-use in mind, VectorDBBench is devised to help users, even non-professionals, reproduce results or test new systems, making the hunt for the optimal choice amongst a plethora of cloud services and open-source vector databases a breeze."
|
18
|
+
|
19
|
+
readme = "README.md"
|
20
|
+
requires-python = ">=3.11"
|
21
|
+
classifiers = [
|
22
|
+
"Programming Language :: Python :: 3",
|
23
|
+
"License :: OSI Approved :: MIT License",
|
24
|
+
"Operating System :: OS Independent",
|
25
|
+
]
|
26
|
+
dependencies = [
|
27
|
+
"click",
|
28
|
+
"pytz",
|
29
|
+
"streamlit-autorefresh",
|
30
|
+
"streamlit!=1.34.0",
|
31
|
+
"streamlit_extras",
|
32
|
+
"tqdm",
|
33
|
+
"s3fs",
|
34
|
+
"oss2",
|
35
|
+
"psutil",
|
36
|
+
"polars",
|
37
|
+
"plotly",
|
38
|
+
"environs",
|
39
|
+
"pydantic<v2",
|
40
|
+
"scikit-learn",
|
41
|
+
"pymilvus", # with pandas, numpy, ujson
|
42
|
+
]
|
43
|
+
dynamic = ["version"]
|
44
|
+
|
45
|
+
[project.optional-dependencies]
|
46
|
+
test = [
|
47
|
+
"black",
|
48
|
+
"ruff",
|
49
|
+
"pytest",
|
50
|
+
]
|
51
|
+
|
52
|
+
all = [
|
53
|
+
"grpcio==1.53.0", # for qdrant-client and pymilvus
|
54
|
+
"grpcio-tools==1.53.0", # for qdrant-client and pymilvus
|
55
|
+
"qdrant-client",
|
56
|
+
"pinecone-client",
|
57
|
+
"weaviate-client",
|
58
|
+
"elasticsearch",
|
59
|
+
"sqlalchemy",
|
60
|
+
"redis",
|
61
|
+
"chromadb",
|
62
|
+
"pgvector",
|
63
|
+
"psycopg",
|
64
|
+
"psycopg-binary",
|
65
|
+
"pgvecto_rs[psycopg3]>=0.2.2",
|
66
|
+
"opensearch-dsl",
|
67
|
+
"opensearch-py",
|
68
|
+
"memorydb",
|
69
|
+
"alibabacloud_ha3engine_vector",
|
70
|
+
"alibabacloud_searchengine20211025",
|
71
|
+
]
|
72
|
+
|
73
|
+
qdrant = [ "qdrant-client" ]
|
74
|
+
pinecone = [ "pinecone-client" ]
|
75
|
+
weaviate = [ "weaviate-client" ]
|
76
|
+
elastic = [ "elasticsearch" ]
|
77
|
+
# For elastic and aliyun_elasticsearch
|
78
|
+
|
79
|
+
pgvector = [ "psycopg", "psycopg-binary", "pgvector" ]
|
80
|
+
# for pgvector, pgvectorscale, pgdiskann, and, alloydb
|
81
|
+
|
82
|
+
pgvecto_rs = [ "pgvecto_rs[psycopg3]>=0.2.2" ]
|
83
|
+
redis = [ "redis" ]
|
84
|
+
memorydb = [ "memorydb" ]
|
85
|
+
chromadb = [ "chromadb" ]
|
86
|
+
opensearch = [ "opensearch-py" ]
|
87
|
+
aliyun_opensearch = [ "alibabacloud_ha3engine_vector", "alibabacloud_searchengine20211025"]
|
88
|
+
|
89
|
+
[project.urls]
|
90
|
+
"repository" = "https://github.com/zilliztech/VectorDBBench"
|
91
|
+
|
92
|
+
[project.scripts]
|
93
|
+
init_bench = "vectordb_bench.__main__:main"
|
94
|
+
vectordbbench = "vectordb_bench.cli.vectordbbench:cli"
|
95
|
+
|
96
|
+
[tool.setuptools_scm]
|
97
|
+
|
98
|
+
[tool.black]
|
99
|
+
line-length = 120
|
100
|
+
target-version = ['py311']
|
101
|
+
include = '\.pyi?$'
|
102
|
+
|
103
|
+
[tool.ruff]
|
104
|
+
lint.select = [
|
105
|
+
"E",
|
106
|
+
"F",
|
107
|
+
"C90",
|
108
|
+
"I",
|
109
|
+
"N",
|
110
|
+
"B", "C", "G",
|
111
|
+
"A",
|
112
|
+
"ANN001",
|
113
|
+
"S", "T", "W", "ARG", "BLE", "COM", "DJ", "EM", "ERA", "EXE", "FBT", "ICN", "INP", "ISC", "NPY", "PD", "PGH", "PIE", "PL", "PT", "PTH", "PYI", "RET", "RSE", "RUF", "SIM", "SLF", "TCH", "TID", "TRY", "UP", "YTT"
|
114
|
+
]
|
115
|
+
lint.ignore = [
|
116
|
+
"BLE001", # blind-except (BLE001)
|
117
|
+
"SLF001", # SLF001 Private member accessed [E]
|
118
|
+
"TRY003", # [ruff] TRY003 Avoid specifying long messages outside the exception class [E]
|
119
|
+
"FBT001", "FBT002", "FBT003",
|
120
|
+
"G004", # [ruff] G004 Logging statement uses f-string [E]
|
121
|
+
"UP031",
|
122
|
+
"RUF012",
|
123
|
+
"EM101",
|
124
|
+
"N805",
|
125
|
+
"ARG002",
|
126
|
+
"ARG003",
|
127
|
+
"PIE796", # https://github.com/zilliztech/VectorDBBench/issues/438
|
128
|
+
"INP001", # TODO
|
129
|
+
"TID252", # TODO
|
130
|
+
"N801", "N802", "N815",
|
131
|
+
"S101", "S108", "S603", "S311",
|
132
|
+
"PLR2004",
|
133
|
+
"RUF017",
|
134
|
+
"C416",
|
135
|
+
"PLW0603",
|
136
|
+
]
|
137
|
+
|
138
|
+
# Allow autofix for all enabled rules (when `--fix`) is provided.
|
139
|
+
lint.fixable = [
|
140
|
+
"A", "B", "C", "D", "E", "F", "G", "I", "N", "Q", "S", "T", "W",
|
141
|
+
"ANN", "ARG", "BLE", "COM", "DJ", "DTZ", "EM", "ERA", "EXE", "FBT",
|
142
|
+
"ICN", "INP", "ISC", "NPY", "PD", "PGH", "PIE", "PL", "PT", "PTH",
|
143
|
+
"PYI", "RET", "RSE", "RUF", "SIM", "SLF", "TCH", "TID", "TRY", "UP",
|
144
|
+
"YTT",
|
145
|
+
]
|
146
|
+
lint.unfixable = []
|
147
|
+
|
148
|
+
show-fixes = true
|
149
|
+
|
150
|
+
# Exclude a variety of commonly ignored directories.
|
151
|
+
exclude = [
|
152
|
+
".bzr",
|
153
|
+
".direnv",
|
154
|
+
".eggs",
|
155
|
+
".git",
|
156
|
+
".git-rewrite",
|
157
|
+
".hg",
|
158
|
+
".mypy_cache",
|
159
|
+
".nox",
|
160
|
+
".pants.d",
|
161
|
+
".pytype",
|
162
|
+
".ruff_cache",
|
163
|
+
".svn",
|
164
|
+
".tox",
|
165
|
+
".venv",
|
166
|
+
"__pypackages__",
|
167
|
+
"_build",
|
168
|
+
"buck-out",
|
169
|
+
"build",
|
170
|
+
"dist",
|
171
|
+
"node_modules",
|
172
|
+
"venv",
|
173
|
+
"grpc_gen",
|
174
|
+
"__pycache__",
|
175
|
+
"frontend", # TODO
|
176
|
+
"tests",
|
177
|
+
]
|
178
|
+
|
179
|
+
# Same as Black.
|
180
|
+
line-length = 120
|
181
|
+
|
182
|
+
# Allow unused variables when underscore-prefixed.
|
183
|
+
lint.dummy-variable-rgx = "^(_+|(_+[a-zA-Z0-9_]*[a-zA-Z0-9]+?))$"
|
184
|
+
|
185
|
+
# Assume Python 3.11
|
186
|
+
target-version = "py311"
|
187
|
+
|
188
|
+
[tool.ruff.lint.mccabe]
|
189
|
+
# Unlike Flake8, default to a complexity level of 10.
|
190
|
+
max-complexity = 18
|
191
|
+
|
192
|
+
[tool.ruff.lint.pycodestyle]
|
193
|
+
max-line-length = 120
|
194
|
+
max-doc-length = 120
|
195
|
+
|
196
|
+
[tool.ruff.lint.pylint]
|
197
|
+
max-args = 20
|
198
|
+
max-branches = 15
|
199
|
+
|
200
|
+
[tool.ruff.lint.flake8-builtins]
|
201
|
+
builtins-ignorelist = [
|
202
|
+
# "format",
|
203
|
+
# "next",
|
204
|
+
# "object", # TODO
|
205
|
+
# "id",
|
206
|
+
# "dict", # TODO
|
207
|
+
# "filter",
|
208
|
+
]
|
209
|
+
|
@@ -52,9 +52,9 @@ def test_read_write_runner(db, insert_rate, conc: list, search_stage: Iterable[f
|
|
52
52
|
|
53
53
|
def get_db(db: str, config: dict) -> VectorDB:
|
54
54
|
if db == DB.Milvus.name:
|
55
|
-
return DB.Milvus.init_cls(dim=768, db_config=config, db_case_config=FLATConfig(metric_type="COSINE"), drop_old=True
|
55
|
+
return DB.Milvus.init_cls(dim=768, db_config=config, db_case_config=FLATConfig(metric_type="COSINE"), drop_old=True)
|
56
56
|
elif db == DB.ZillizCloud.name:
|
57
|
-
return DB.ZillizCloud.init_cls(dim=768, db_config=config, db_case_config=AutoIndexConfig(metric_type="COSINE"), drop_old=True
|
57
|
+
return DB.ZillizCloud.init_cls(dim=768, db_config=config, db_case_config=AutoIndexConfig(metric_type="COSINE"), drop_old=True)
|
58
58
|
else:
|
59
59
|
raise ValueError(f"unknown db: {db}")
|
60
60
|
|
@@ -76,7 +76,7 @@ if __name__ == "__main__":
|
|
76
76
|
}
|
77
77
|
|
78
78
|
conc = (1, 15, 50)
|
79
|
-
search_stage = (0.5, 0.6, 0.7, 0.8, 0.9
|
79
|
+
search_stage = (0.5, 0.6, 0.7, 0.8, 0.9)
|
80
80
|
|
81
81
|
db = get_db(flags.db, config)
|
82
82
|
test_read_write_runner(
|
@@ -0,0 +1,92 @@
|
|
1
|
+
import inspect
|
2
|
+
import pathlib
|
3
|
+
|
4
|
+
import environs
|
5
|
+
|
6
|
+
from . import log_util
|
7
|
+
|
8
|
+
env = environs.Env()
|
9
|
+
env.read_env(".env", False)
|
10
|
+
|
11
|
+
|
12
|
+
class config:
|
13
|
+
ALIYUN_OSS_URL = "assets.zilliz.com.cn/benchmark/"
|
14
|
+
AWS_S3_URL = "assets.zilliz.com/benchmark/"
|
15
|
+
|
16
|
+
LOG_LEVEL = env.str("LOG_LEVEL", "INFO")
|
17
|
+
|
18
|
+
DEFAULT_DATASET_URL = env.str("DEFAULT_DATASET_URL", AWS_S3_URL)
|
19
|
+
DATASET_LOCAL_DIR = env.path("DATASET_LOCAL_DIR", "/tmp/vectordb_bench/dataset")
|
20
|
+
NUM_PER_BATCH = env.int("NUM_PER_BATCH", 100)
|
21
|
+
|
22
|
+
DROP_OLD = env.bool("DROP_OLD", True)
|
23
|
+
USE_SHUFFLED_DATA = env.bool("USE_SHUFFLED_DATA", True)
|
24
|
+
|
25
|
+
NUM_CONCURRENCY = env.list(
|
26
|
+
"NUM_CONCURRENCY",
|
27
|
+
[
|
28
|
+
1,
|
29
|
+
5,
|
30
|
+
10,
|
31
|
+
15,
|
32
|
+
20,
|
33
|
+
25,
|
34
|
+
30,
|
35
|
+
35,
|
36
|
+
40,
|
37
|
+
45,
|
38
|
+
50,
|
39
|
+
55,
|
40
|
+
60,
|
41
|
+
65,
|
42
|
+
70,
|
43
|
+
75,
|
44
|
+
80,
|
45
|
+
85,
|
46
|
+
90,
|
47
|
+
95,
|
48
|
+
100,
|
49
|
+
],
|
50
|
+
subcast=int,
|
51
|
+
)
|
52
|
+
|
53
|
+
CONCURRENCY_DURATION = 30
|
54
|
+
|
55
|
+
RESULTS_LOCAL_DIR = env.path(
|
56
|
+
"RESULTS_LOCAL_DIR",
|
57
|
+
pathlib.Path(__file__).parent.joinpath("results"),
|
58
|
+
)
|
59
|
+
CONFIG_LOCAL_DIR = env.path(
|
60
|
+
"CONFIG_LOCAL_DIR",
|
61
|
+
pathlib.Path(__file__).parent.joinpath("config-files"),
|
62
|
+
)
|
63
|
+
|
64
|
+
K_DEFAULT = 100 # default return top k nearest neighbors during search
|
65
|
+
CUSTOM_CONFIG_DIR = pathlib.Path(__file__).parent.joinpath("custom/custom_case.json")
|
66
|
+
|
67
|
+
CAPACITY_TIMEOUT_IN_SECONDS = 24 * 3600 # 24h
|
68
|
+
LOAD_TIMEOUT_DEFAULT = 24 * 3600 # 24h
|
69
|
+
LOAD_TIMEOUT_768D_1M = 24 * 3600 # 24h
|
70
|
+
LOAD_TIMEOUT_768D_10M = 240 * 3600 # 10d
|
71
|
+
LOAD_TIMEOUT_768D_100M = 2400 * 3600 # 100d
|
72
|
+
|
73
|
+
LOAD_TIMEOUT_1536D_500K = 24 * 3600 # 24h
|
74
|
+
LOAD_TIMEOUT_1536D_5M = 240 * 3600 # 10d
|
75
|
+
|
76
|
+
OPTIMIZE_TIMEOUT_DEFAULT = 24 * 3600 # 24h
|
77
|
+
OPTIMIZE_TIMEOUT_768D_1M = 24 * 3600 # 24h
|
78
|
+
OPTIMIZE_TIMEOUT_768D_10M = 240 * 3600 # 10d
|
79
|
+
OPTIMIZE_TIMEOUT_768D_100M = 2400 * 3600 # 100d
|
80
|
+
|
81
|
+
OPTIMIZE_TIMEOUT_1536D_500K = 24 * 3600 # 24h
|
82
|
+
OPTIMIZE_TIMEOUT_1536D_5M = 240 * 3600 # 10d
|
83
|
+
|
84
|
+
def display(self) -> str:
|
85
|
+
return [
|
86
|
+
i
|
87
|
+
for i in inspect.getmembers(self)
|
88
|
+
if not inspect.ismethod(i[1]) and not i[0].startswith("_") and "TIMEOUT" not in i[0]
|
89
|
+
]
|
90
|
+
|
91
|
+
|
92
|
+
log_util.init(config.LOG_LEVEL)
|
@@ -1,7 +1,8 @@
|
|
1
|
-
import traceback
|
2
1
|
import logging
|
2
|
+
import pathlib
|
3
3
|
import subprocess
|
4
|
-
import
|
4
|
+
import traceback
|
5
|
+
|
5
6
|
from . import config
|
6
7
|
|
7
8
|
log = logging.getLogger("vectordb_bench")
|
@@ -16,7 +17,7 @@ def run_streamlit():
|
|
16
17
|
cmd = [
|
17
18
|
"streamlit",
|
18
19
|
"run",
|
19
|
-
f"{
|
20
|
+
f"{pathlib.Path(__file__).parent}/frontend/vdb_benchmark.py",
|
20
21
|
"--logger.level",
|
21
22
|
"info",
|
22
23
|
"--theme.base",
|
@@ -1,24 +1,25 @@
|
|
1
|
-
from .cases import CaseLabel
|
2
|
-
from .task_runner import CaseRunner, RunningStatus, TaskRunner
|
3
|
-
from ..models import TaskConfig
|
4
|
-
from ..backend.clients import EmptyDBCaseConfig
|
5
|
-
from ..backend.data_source import DatasetSource
|
6
1
|
import logging
|
7
2
|
|
3
|
+
from vectordb_bench.backend.clients import EmptyDBCaseConfig
|
4
|
+
from vectordb_bench.backend.data_source import DatasetSource
|
5
|
+
from vectordb_bench.models import TaskConfig
|
6
|
+
|
7
|
+
from .cases import CaseLabel
|
8
|
+
from .task_runner import CaseRunner, RunningStatus, TaskRunner
|
8
9
|
|
9
10
|
log = logging.getLogger(__name__)
|
10
11
|
|
11
12
|
|
12
13
|
class Assembler:
|
13
14
|
@classmethod
|
14
|
-
def assemble(cls, run_id , task: TaskConfig, source: DatasetSource) -> CaseRunner:
|
15
|
+
def assemble(cls, run_id: str, task: TaskConfig, source: DatasetSource) -> CaseRunner:
|
15
16
|
c_cls = task.case_config.case_id.case_cls
|
16
17
|
|
17
18
|
c = c_cls(task.case_config.custom_case)
|
18
|
-
if type(task.db_case_config)
|
19
|
+
if type(task.db_case_config) is not EmptyDBCaseConfig:
|
19
20
|
task.db_case_config.metric_type = c.dataset.data.metric_type
|
20
21
|
|
21
|
-
|
22
|
+
return CaseRunner(
|
22
23
|
run_id=run_id,
|
23
24
|
config=task,
|
24
25
|
ca=c,
|
@@ -26,8 +27,6 @@ class Assembler:
|
|
26
27
|
dataset_source=source,
|
27
28
|
)
|
28
29
|
|
29
|
-
return runner
|
30
|
-
|
31
30
|
@classmethod
|
32
31
|
def assemble_all(
|
33
32
|
cls,
|
@@ -50,12 +49,12 @@ class Assembler:
|
|
50
49
|
db2runner[db].append(r)
|
51
50
|
|
52
51
|
# check dbclient installed
|
53
|
-
for k in db2runner
|
52
|
+
for k in db2runner:
|
54
53
|
_ = k.init_cls
|
55
54
|
|
56
55
|
# sort by dataset size
|
57
|
-
for k in db2runner
|
58
|
-
db2runner[k].sort(key=lambda x:x.ca.dataset.data.size)
|
56
|
+
for k, _ in db2runner:
|
57
|
+
db2runner[k].sort(key=lambda x: x.ca.dataset.data.size)
|
59
58
|
|
60
59
|
all_runners = []
|
61
60
|
all_runners.extend(load_runners)
|