vectordb-bench 0.0.10__tar.gz → 0.0.12__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {vectordb_bench-0.0.10 → vectordb_bench-0.0.12}/.env.example +1 -1
- {vectordb_bench-0.0.10 → vectordb_bench-0.0.12}/PKG-INFO +228 -14
- {vectordb_bench-0.0.10 → vectordb_bench-0.0.12}/README.md +218 -12
- vectordb_bench-0.0.12/fig/custom_case_run_test.png +0 -0
- vectordb_bench-0.0.12/fig/custom_dataset.png +0 -0
- {vectordb_bench-0.0.10 → vectordb_bench-0.0.12}/pyproject.toml +9 -2
- {vectordb_bench-0.0.10 → vectordb_bench-0.0.12}/vectordb_bench/__init__.py +19 -5
- {vectordb_bench-0.0.10 → vectordb_bench-0.0.12}/vectordb_bench/backend/assembler.py +1 -1
- {vectordb_bench-0.0.10 → vectordb_bench-0.0.12}/vectordb_bench/backend/cases.py +93 -27
- {vectordb_bench-0.0.10 → vectordb_bench-0.0.12}/vectordb_bench/backend/clients/__init__.py +14 -0
- {vectordb_bench-0.0.10 → vectordb_bench-0.0.12}/vectordb_bench/backend/clients/api.py +1 -1
- vectordb_bench-0.0.12/vectordb_bench/backend/clients/aws_opensearch/aws_opensearch.py +159 -0
- vectordb_bench-0.0.12/vectordb_bench/backend/clients/aws_opensearch/cli.py +44 -0
- vectordb_bench-0.0.12/vectordb_bench/backend/clients/aws_opensearch/config.py +58 -0
- vectordb_bench-0.0.12/vectordb_bench/backend/clients/aws_opensearch/run.py +125 -0
- vectordb_bench-0.0.12/vectordb_bench/backend/clients/milvus/cli.py +291 -0
- {vectordb_bench-0.0.10 → vectordb_bench-0.0.12}/vectordb_bench/backend/clients/milvus/milvus.py +13 -6
- vectordb_bench-0.0.12/vectordb_bench/backend/clients/pgvector/cli.py +116 -0
- {vectordb_bench-0.0.10 → vectordb_bench-0.0.12}/vectordb_bench/backend/clients/pgvector/config.py +1 -1
- {vectordb_bench-0.0.10 → vectordb_bench-0.0.12}/vectordb_bench/backend/clients/pgvector/pgvector.py +7 -4
- vectordb_bench-0.0.12/vectordb_bench/backend/clients/redis/cli.py +74 -0
- vectordb_bench-0.0.12/vectordb_bench/backend/clients/test/cli.py +25 -0
- vectordb_bench-0.0.12/vectordb_bench/backend/clients/test/config.py +18 -0
- vectordb_bench-0.0.12/vectordb_bench/backend/clients/test/test.py +62 -0
- vectordb_bench-0.0.12/vectordb_bench/backend/clients/weaviate_cloud/cli.py +41 -0
- vectordb_bench-0.0.12/vectordb_bench/backend/clients/zilliz_cloud/cli.py +55 -0
- {vectordb_bench-0.0.10 → vectordb_bench-0.0.12}/vectordb_bench/backend/dataset.py +27 -5
- {vectordb_bench-0.0.10 → vectordb_bench-0.0.12}/vectordb_bench/backend/runner/mp_runner.py +14 -3
- {vectordb_bench-0.0.10 → vectordb_bench-0.0.12}/vectordb_bench/backend/runner/serial_runner.py +7 -3
- {vectordb_bench-0.0.10 → vectordb_bench-0.0.12}/vectordb_bench/backend/task_runner.py +76 -26
- vectordb_bench-0.0.12/vectordb_bench/cli/__init__.py +0 -0
- vectordb_bench-0.0.12/vectordb_bench/cli/cli.py +362 -0
- vectordb_bench-0.0.12/vectordb_bench/cli/vectordbbench.py +22 -0
- vectordb_bench-0.0.12/vectordb_bench/config-files/sample_config.yml +17 -0
- vectordb_bench-0.0.12/vectordb_bench/custom/custom_case.json +18 -0
- {vectordb_bench-0.0.10 → vectordb_bench-0.0.12}/vectordb_bench/frontend/components/check_results/charts.py +6 -6
- {vectordb_bench-0.0.10 → vectordb_bench-0.0.12}/vectordb_bench/frontend/components/check_results/data.py +23 -20
- {vectordb_bench-0.0.10 → vectordb_bench-0.0.12}/vectordb_bench/frontend/components/check_results/expanderStyle.py +1 -1
- {vectordb_bench-0.0.10 → vectordb_bench-0.0.12}/vectordb_bench/frontend/components/check_results/filters.py +20 -13
- {vectordb_bench-0.0.10 → vectordb_bench-0.0.12}/vectordb_bench/frontend/components/check_results/headerIcon.py +1 -1
- {vectordb_bench-0.0.10 → vectordb_bench-0.0.12}/vectordb_bench/frontend/components/check_results/priceTable.py +1 -1
- {vectordb_bench-0.0.10 → vectordb_bench-0.0.12}/vectordb_bench/frontend/components/check_results/stPageConfig.py +1 -1
- vectordb_bench-0.0.12/vectordb_bench/frontend/components/concurrent/charts.py +79 -0
- vectordb_bench-0.0.12/vectordb_bench/frontend/components/custom/displayCustomCase.py +31 -0
- vectordb_bench-0.0.12/vectordb_bench/frontend/components/custom/displaypPrams.py +11 -0
- vectordb_bench-0.0.12/vectordb_bench/frontend/components/custom/getCustomConfig.py +40 -0
- vectordb_bench-0.0.12/vectordb_bench/frontend/components/custom/initStyle.py +15 -0
- {vectordb_bench-0.0.10 → vectordb_bench-0.0.12}/vectordb_bench/frontend/components/run_test/autoRefresh.py +1 -1
- {vectordb_bench-0.0.10 → vectordb_bench-0.0.12}/vectordb_bench/frontend/components/run_test/caseSelector.py +40 -28
- {vectordb_bench-0.0.10 → vectordb_bench-0.0.12}/vectordb_bench/frontend/components/run_test/dbConfigSetting.py +1 -5
- {vectordb_bench-0.0.10 → vectordb_bench-0.0.12}/vectordb_bench/frontend/components/run_test/dbSelector.py +8 -14
- {vectordb_bench-0.0.10 → vectordb_bench-0.0.12}/vectordb_bench/frontend/components/run_test/generateTasks.py +3 -5
- vectordb_bench-0.0.12/vectordb_bench/frontend/components/run_test/initStyle.py +14 -0
- {vectordb_bench-0.0.10 → vectordb_bench-0.0.12}/vectordb_bench/frontend/components/run_test/submitTask.py +13 -5
- vectordb_bench-0.0.12/vectordb_bench/frontend/components/tables/data.py +44 -0
- {vectordb_bench-0.0.10/vectordb_bench/frontend/const → vectordb_bench-0.0.12/vectordb_bench/frontend/config}/dbCaseConfigs.py +140 -32
- {vectordb_bench-0.0.10/vectordb_bench/frontend/const → vectordb_bench-0.0.12/vectordb_bench/frontend/config}/styles.py +2 -0
- vectordb_bench-0.0.12/vectordb_bench/frontend/pages/concurrent.py +65 -0
- vectordb_bench-0.0.12/vectordb_bench/frontend/pages/custom.py +64 -0
- {vectordb_bench-0.0.10 → vectordb_bench-0.0.12}/vectordb_bench/frontend/pages/quries_per_dollar.py +5 -5
- {vectordb_bench-0.0.10 → vectordb_bench-0.0.12}/vectordb_bench/frontend/pages/run_test.py +4 -0
- vectordb_bench-0.0.12/vectordb_bench/frontend/pages/tables.py +24 -0
- vectordb_bench-0.0.12/vectordb_bench/frontend/utils.py +22 -0
- {vectordb_bench-0.0.10 → vectordb_bench-0.0.12}/vectordb_bench/frontend/vdb_benchmark.py +3 -3
- {vectordb_bench-0.0.10 → vectordb_bench-0.0.12}/vectordb_bench/interface.py +21 -25
- {vectordb_bench-0.0.10 → vectordb_bench-0.0.12}/vectordb_bench/metric.py +23 -1
- {vectordb_bench-0.0.10 → vectordb_bench-0.0.12}/vectordb_bench/models.py +45 -1
- {vectordb_bench-0.0.10 → vectordb_bench-0.0.12}/vectordb_bench/results/getLeaderboardData.py +1 -1
- {vectordb_bench-0.0.10 → vectordb_bench-0.0.12}/vectordb_bench.egg-info/PKG-INFO +228 -14
- {vectordb_bench-0.0.10 → vectordb_bench-0.0.12}/vectordb_bench.egg-info/SOURCES.txt +32 -3
- {vectordb_bench-0.0.10 → vectordb_bench-0.0.12}/vectordb_bench.egg-info/entry_points.txt +1 -0
- {vectordb_bench-0.0.10 → vectordb_bench-0.0.12}/vectordb_bench.egg-info/requires.txt +11 -1
- vectordb_bench-0.0.10/vectordb_bench/frontend/utils.py +0 -6
- {vectordb_bench-0.0.10 → vectordb_bench-0.0.12}/.devcontainer/Dockerfile +0 -0
- {vectordb_bench-0.0.10 → vectordb_bench-0.0.12}/.devcontainer/devcontainer.json +0 -0
- {vectordb_bench-0.0.10 → vectordb_bench-0.0.12}/.github/workflows/publish_package_on_release.yml +0 -0
- {vectordb_bench-0.0.10 → vectordb_bench-0.0.12}/.github/workflows/pull_request.yml +0 -0
- {vectordb_bench-0.0.10 → vectordb_bench-0.0.12}/.gitignore +0 -0
- {vectordb_bench-0.0.10 → vectordb_bench-0.0.12}/.ruff.toml +0 -0
- {vectordb_bench-0.0.10 → vectordb_bench-0.0.12}/Dockerfile +0 -0
- {vectordb_bench-0.0.10 → vectordb_bench-0.0.12}/LICENSE +0 -0
- {vectordb_bench-0.0.10 → vectordb_bench-0.0.12}/Makefile +0 -0
- {vectordb_bench-0.0.10 → vectordb_bench-0.0.12}/OWNERS +0 -0
- {vectordb_bench-0.0.10 → vectordb_bench-0.0.12}/install/requirements_py3.11.txt +0 -0
- {vectordb_bench-0.0.10 → vectordb_bench-0.0.12}/install.py +0 -0
- {vectordb_bench-0.0.10 → vectordb_bench-0.0.12}/setup.cfg +0 -0
- {vectordb_bench-0.0.10 → vectordb_bench-0.0.12}/tests/conftest.py +0 -0
- {vectordb_bench-0.0.10 → vectordb_bench-0.0.12}/tests/pytest.ini +0 -0
- {vectordb_bench-0.0.10 → vectordb_bench-0.0.12}/tests/test_bench_runner.py +0 -0
- {vectordb_bench-0.0.10 → vectordb_bench-0.0.12}/tests/test_chroma.py +0 -0
- {vectordb_bench-0.0.10 → vectordb_bench-0.0.12}/tests/test_data_source.py +0 -0
- {vectordb_bench-0.0.10 → vectordb_bench-0.0.12}/tests/test_dataset.py +0 -0
- {vectordb_bench-0.0.10 → vectordb_bench-0.0.12}/tests/test_elasticsearch_cloud.py +0 -0
- {vectordb_bench-0.0.10 → vectordb_bench-0.0.12}/tests/test_models.py +0 -0
- {vectordb_bench-0.0.10 → vectordb_bench-0.0.12}/tests/test_redis.py +0 -0
- {vectordb_bench-0.0.10 → vectordb_bench-0.0.12}/tests/test_utils.py +0 -0
- {vectordb_bench-0.0.10 → vectordb_bench-0.0.12}/tests/ut_cases.py +0 -0
- {vectordb_bench-0.0.10 → vectordb_bench-0.0.12}/vectordb_bench/__main__.py +0 -0
- {vectordb_bench-0.0.10 → vectordb_bench-0.0.12}/vectordb_bench/backend/__init__.py +0 -0
- {vectordb_bench-0.0.10 → vectordb_bench-0.0.12}/vectordb_bench/backend/clients/chroma/chroma.py +0 -0
- {vectordb_bench-0.0.10 → vectordb_bench-0.0.12}/vectordb_bench/backend/clients/chroma/config.py +0 -0
- {vectordb_bench-0.0.10 → vectordb_bench-0.0.12}/vectordb_bench/backend/clients/elastic_cloud/config.py +0 -0
- {vectordb_bench-0.0.10 → vectordb_bench-0.0.12}/vectordb_bench/backend/clients/elastic_cloud/elastic_cloud.py +0 -0
- {vectordb_bench-0.0.10 → vectordb_bench-0.0.12}/vectordb_bench/backend/clients/milvus/config.py +0 -0
- {vectordb_bench-0.0.10 → vectordb_bench-0.0.12}/vectordb_bench/backend/clients/pgvecto_rs/config.py +0 -0
- {vectordb_bench-0.0.10 → vectordb_bench-0.0.12}/vectordb_bench/backend/clients/pgvecto_rs/pgvecto_rs.py +0 -0
- {vectordb_bench-0.0.10 → vectordb_bench-0.0.12}/vectordb_bench/backend/clients/pinecone/config.py +0 -0
- {vectordb_bench-0.0.10 → vectordb_bench-0.0.12}/vectordb_bench/backend/clients/pinecone/pinecone.py +0 -0
- {vectordb_bench-0.0.10 → vectordb_bench-0.0.12}/vectordb_bench/backend/clients/qdrant_cloud/config.py +0 -0
- {vectordb_bench-0.0.10 → vectordb_bench-0.0.12}/vectordb_bench/backend/clients/qdrant_cloud/qdrant_cloud.py +0 -0
- {vectordb_bench-0.0.10 → vectordb_bench-0.0.12}/vectordb_bench/backend/clients/redis/config.py +0 -0
- {vectordb_bench-0.0.10 → vectordb_bench-0.0.12}/vectordb_bench/backend/clients/redis/redis.py +0 -0
- {vectordb_bench-0.0.10 → vectordb_bench-0.0.12}/vectordb_bench/backend/clients/weaviate_cloud/config.py +0 -0
- {vectordb_bench-0.0.10 → vectordb_bench-0.0.12}/vectordb_bench/backend/clients/weaviate_cloud/weaviate_cloud.py +0 -0
- {vectordb_bench-0.0.10 → vectordb_bench-0.0.12}/vectordb_bench/backend/clients/zilliz_cloud/config.py +0 -0
- {vectordb_bench-0.0.10 → vectordb_bench-0.0.12}/vectordb_bench/backend/clients/zilliz_cloud/zilliz_cloud.py +0 -0
- {vectordb_bench-0.0.10 → vectordb_bench-0.0.12}/vectordb_bench/backend/data_source.py +0 -0
- {vectordb_bench-0.0.10 → vectordb_bench-0.0.12}/vectordb_bench/backend/result_collector.py +0 -0
- {vectordb_bench-0.0.10 → vectordb_bench-0.0.12}/vectordb_bench/backend/runner/__init__.py +0 -0
- {vectordb_bench-0.0.10 → vectordb_bench-0.0.12}/vectordb_bench/backend/utils.py +0 -0
- {vectordb_bench-0.0.10 → vectordb_bench-0.0.12}/vectordb_bench/base.py +0 -0
- {vectordb_bench-0.0.10 → vectordb_bench-0.0.12}/vectordb_bench/frontend/components/check_results/footer.py +0 -0
- {vectordb_bench-0.0.10 → vectordb_bench-0.0.12}/vectordb_bench/frontend/components/check_results/nav.py +0 -0
- {vectordb_bench-0.0.10 → vectordb_bench-0.0.12}/vectordb_bench/frontend/components/get_results/saveAsImage.py +0 -0
- {vectordb_bench-0.0.10 → vectordb_bench-0.0.12}/vectordb_bench/frontend/components/run_test/hideSidebar.py +0 -0
- {vectordb_bench-0.0.10/vectordb_bench/frontend/const → vectordb_bench-0.0.12/vectordb_bench/frontend/config}/dbPrices.py +0 -0
- {vectordb_bench-0.0.10 → vectordb_bench-0.0.12}/vectordb_bench/log_util.py +0 -0
- {vectordb_bench-0.0.10 → vectordb_bench-0.0.12}/vectordb_bench/results/ElasticCloud/result_20230727_standard_elasticcloud.json +0 -0
- {vectordb_bench-0.0.10 → vectordb_bench-0.0.12}/vectordb_bench/results/ElasticCloud/result_20230808_standard_elasticcloud.json +0 -0
- {vectordb_bench-0.0.10 → vectordb_bench-0.0.12}/vectordb_bench/results/Milvus/result_20230727_standard_milvus.json +0 -0
- {vectordb_bench-0.0.10 → vectordb_bench-0.0.12}/vectordb_bench/results/Milvus/result_20230808_standard_milvus.json +0 -0
- {vectordb_bench-0.0.10 → vectordb_bench-0.0.12}/vectordb_bench/results/PgVector/result_20230727_standard_pgvector.json +0 -0
- {vectordb_bench-0.0.10 → vectordb_bench-0.0.12}/vectordb_bench/results/PgVector/result_20230808_standard_pgvector.json +0 -0
- {vectordb_bench-0.0.10 → vectordb_bench-0.0.12}/vectordb_bench/results/Pinecone/result_20230727_standard_pinecone.json +0 -0
- {vectordb_bench-0.0.10 → vectordb_bench-0.0.12}/vectordb_bench/results/Pinecone/result_20230808_standard_pinecone.json +0 -0
- {vectordb_bench-0.0.10 → vectordb_bench-0.0.12}/vectordb_bench/results/QdrantCloud/result_20230727_standard_qdrantcloud.json +0 -0
- {vectordb_bench-0.0.10 → vectordb_bench-0.0.12}/vectordb_bench/results/QdrantCloud/result_20230808_standard_qdrantcloud.json +0 -0
- {vectordb_bench-0.0.10 → vectordb_bench-0.0.12}/vectordb_bench/results/WeaviateCloud/result_20230727_standard_weaviatecloud.json +0 -0
- {vectordb_bench-0.0.10 → vectordb_bench-0.0.12}/vectordb_bench/results/WeaviateCloud/result_20230808_standard_weaviatecloud.json +0 -0
- {vectordb_bench-0.0.10 → vectordb_bench-0.0.12}/vectordb_bench/results/ZillizCloud/result_20230727_standard_zillizcloud.json +0 -0
- {vectordb_bench-0.0.10 → vectordb_bench-0.0.12}/vectordb_bench/results/ZillizCloud/result_20230808_standard_zillizcloud.json +0 -0
- {vectordb_bench-0.0.10 → vectordb_bench-0.0.12}/vectordb_bench/results/ZillizCloud/result_20240105_standard_202401_zillizcloud.json +0 -0
- {vectordb_bench-0.0.10 → vectordb_bench-0.0.12}/vectordb_bench/results/dbPrices.json +0 -0
- {vectordb_bench-0.0.10 → vectordb_bench-0.0.12}/vectordb_bench/results/leaderboard.json +0 -0
- {vectordb_bench-0.0.10 → vectordb_bench-0.0.12}/vectordb_bench.egg-info/dependency_links.txt +0 -0
- {vectordb_bench-0.0.10 → vectordb_bench-0.0.12}/vectordb_bench.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: vectordb-bench
|
3
|
-
Version: 0.0.
|
3
|
+
Version: 0.0.12
|
4
4
|
Summary: VectorDBBench is not just an offering of benchmark results for mainstream vector databases and cloud services, it's your go-to tool for the ultimate performance and cost-effectiveness comparison. Designed with ease-of-use in mind, VectorDBBench is devised to help users, even non-professionals, reproduce results or test new systems, making the hunt for the optimal choice amongst a plethora of cloud services and open-source vector databases a breeze.
|
5
5
|
Author-email: XuanYang-cn <xuan.yang@zilliz.com>
|
6
6
|
Project-URL: repository, https://github.com/zilliztech/VectorDBBench
|
@@ -10,6 +10,7 @@ Classifier: Operating System :: OS Independent
|
|
10
10
|
Requires-Python: >=3.11
|
11
11
|
Description-Content-Type: text/markdown
|
12
12
|
License-File: LICENSE
|
13
|
+
Requires-Dist: click
|
13
14
|
Requires-Dist: pytz
|
14
15
|
Requires-Dist: streamlit-autorefresh
|
15
16
|
Requires-Dist: streamlit!=1.34.0
|
@@ -40,6 +41,9 @@ Requires-Dist: redis; extra == "all"
|
|
40
41
|
Requires-Dist: chromadb; extra == "all"
|
41
42
|
Requires-Dist: psycopg2; extra == "all"
|
42
43
|
Requires-Dist: psycopg; extra == "all"
|
44
|
+
Requires-Dist: psycopg-binary; extra == "all"
|
45
|
+
Requires-Dist: opensearch-dsl==2.1.0; extra == "all"
|
46
|
+
Requires-Dist: opensearch-py==2.6.0; extra == "all"
|
43
47
|
Provides-Extra: qdrant
|
44
48
|
Requires-Dist: qdrant-client; extra == "qdrant"
|
45
49
|
Provides-Extra: pinecone
|
@@ -49,14 +53,18 @@ Requires-Dist: weaviate-client; extra == "weaviate"
|
|
49
53
|
Provides-Extra: elastic
|
50
54
|
Requires-Dist: elasticsearch; extra == "elastic"
|
51
55
|
Provides-Extra: pgvector
|
52
|
-
Requires-Dist: pgvector; extra == "pgvector"
|
53
56
|
Requires-Dist: psycopg; extra == "pgvector"
|
57
|
+
Requires-Dist: psycopg-binary; extra == "pgvector"
|
58
|
+
Requires-Dist: pgvector; extra == "pgvector"
|
54
59
|
Provides-Extra: pgvecto-rs
|
55
60
|
Requires-Dist: psycopg2; extra == "pgvecto-rs"
|
56
61
|
Provides-Extra: redis
|
57
62
|
Requires-Dist: redis; extra == "redis"
|
58
63
|
Provides-Extra: chromadb
|
59
64
|
Requires-Dist: chromadb; extra == "chromadb"
|
65
|
+
Provides-Extra: awsopensearch
|
66
|
+
Requires-Dist: awsopensearch; extra == "awsopensearch"
|
67
|
+
Provides-Extra: zilliz-cloud
|
60
68
|
|
61
69
|
# VectorDBBench: A Benchmark Tool for VectorDB
|
62
70
|
|
@@ -87,24 +95,134 @@ pip install vectordb-bench[pinecone]
|
|
87
95
|
```
|
88
96
|
All the database client supported
|
89
97
|
|
90
|
-
|Optional database client|install command|
|
91
|
-
|
92
|
-
|pymilvus(*default*)
|
93
|
-
|all
|
94
|
-
|qdrant
|
95
|
-
|pinecone
|
96
|
-
|weaviate
|
97
|
-
|elastic
|
98
|
-
|pgvector
|
99
|
-
|pgvecto.rs
|
100
|
-
|redis
|
101
|
-
|chromadb
|
98
|
+
| Optional database client | install command |
|
99
|
+
|--------------------------|---------------------------------------------|
|
100
|
+
| pymilvus(*default*) | `pip install vectordb-bench` |
|
101
|
+
| all | `pip install vectordb-bench[all]` |
|
102
|
+
| qdrant | `pip install vectordb-bench[qdrant]` |
|
103
|
+
| pinecone | `pip install vectordb-bench[pinecone]` |
|
104
|
+
| weaviate | `pip install vectordb-bench[weaviate]` |
|
105
|
+
| elastic | `pip install vectordb-bench[elastic]` |
|
106
|
+
| pgvector | `pip install vectordb-bench[pgvector]` |
|
107
|
+
| pgvecto.rs | `pip install vectordb-bench[pgvecto_rs]` |
|
108
|
+
| redis | `pip install vectordb-bench[redis]` |
|
109
|
+
| chromadb | `pip install vectordb-bench[chromadb]` |
|
110
|
+
| awsopensearch | `pip install vectordb-bench[awsopensearch]` |
|
102
111
|
|
103
112
|
### Run
|
104
113
|
|
105
114
|
``` shell
|
106
115
|
init_bench
|
107
116
|
```
|
117
|
+
|
118
|
+
OR:
|
119
|
+
|
120
|
+
### Run from the command line.
|
121
|
+
|
122
|
+
``` shell
|
123
|
+
vectordbbench [OPTIONS] COMMAND [ARGS]...
|
124
|
+
```
|
125
|
+
To list the clients that are runnable via the commandline option, execute: `vectordbbench --help`
|
126
|
+
``` text
|
127
|
+
$ vectordbbench --help
|
128
|
+
Usage: vectordbbench [OPTIONS] COMMAND [ARGS]...
|
129
|
+
|
130
|
+
Options:
|
131
|
+
--help Show this message and exit.
|
132
|
+
|
133
|
+
Commands:
|
134
|
+
pgvectorhnsw
|
135
|
+
pgvectorivfflat
|
136
|
+
test
|
137
|
+
weaviate
|
138
|
+
```
|
139
|
+
To list the options for each command, execute `vectordbbench [command] --help`
|
140
|
+
|
141
|
+
```text
|
142
|
+
$ vectordbbench pgvectorhnsw --help
|
143
|
+
Usage: vectordbbench pgvectorhnsw [OPTIONS]
|
144
|
+
|
145
|
+
Options:
|
146
|
+
--config-file PATH Read configuration from yaml file
|
147
|
+
--drop-old / --skip-drop-old Drop old or skip [default: drop-old]
|
148
|
+
--load / --skip-load Load or skip [default: load]
|
149
|
+
--search-serial / --skip-search-serial
|
150
|
+
Search serial or skip [default: search-
|
151
|
+
serial]
|
152
|
+
--search-concurrent / --skip-search-concurrent
|
153
|
+
Search concurrent or skip [default: search-
|
154
|
+
concurrent]
|
155
|
+
--case-type [CapacityDim128|CapacityDim960|Performance768D100M|Performance768D10M|Performance768D1M|Performance768D10M1P|Performance768D1M1P|Performance768D10M99P|Performance768D1M99P|Performance1536D500K|Performance1536D5M|Performance1536D500K1P|Performance1536D5M1P|Performance1536D500K99P|Performance1536D5M99P|Performance1536D50K]
|
156
|
+
Case type
|
157
|
+
--db-label TEXT Db label, default: date in ISO format
|
158
|
+
[default: 2024-05-20T20:26:31.113290]
|
159
|
+
--dry-run Print just the configuration and exit
|
160
|
+
without running the tasks
|
161
|
+
--k INTEGER K value for number of nearest neighbors to
|
162
|
+
search [default: 100]
|
163
|
+
--concurrency-duration INTEGER Adjusts the duration in seconds of each
|
164
|
+
concurrency search [default: 30]
|
165
|
+
--num-concurrency TEXT Comma-separated list of concurrency values
|
166
|
+
to test during concurrent search [default:
|
167
|
+
1,10,20]
|
168
|
+
--user-name TEXT Db username [required]
|
169
|
+
--password TEXT Db password [required]
|
170
|
+
--host TEXT Db host [required]
|
171
|
+
--db-name TEXT Db name [required]
|
172
|
+
--maintenance-work-mem TEXT Sets the maximum memory to be used for
|
173
|
+
maintenance operations (index creation). Can
|
174
|
+
be entered as string with unit like '64GB'
|
175
|
+
or as an integer number of KB.This will set
|
176
|
+
the parameters:
|
177
|
+
max_parallel_maintenance_workers,
|
178
|
+
max_parallel_workers &
|
179
|
+
table(parallel_workers)
|
180
|
+
--max-parallel-workers INTEGER Sets the maximum number of parallel
|
181
|
+
processes per maintenance operation (index
|
182
|
+
creation)
|
183
|
+
--m INTEGER hnsw m
|
184
|
+
--ef-construction INTEGER hnsw ef-construction
|
185
|
+
--ef-search INTEGER hnsw ef-search
|
186
|
+
--help Show this message and exit.
|
187
|
+
```
|
188
|
+
#### Using a configuration file.
|
189
|
+
|
190
|
+
The vectordbbench command can optionally read some or all the options from a yaml formatted configuration file.
|
191
|
+
|
192
|
+
By default, configuration files are expected to be in vectordb_bench/config-files/, this can be overridden by setting
|
193
|
+
the environment variable CONFIG_LOCAL_DIR or by passing the full path to the file.
|
194
|
+
|
195
|
+
The required format is:
|
196
|
+
```yaml
|
197
|
+
commandname:
|
198
|
+
parameter_name: parameter_value
|
199
|
+
parameter_name: parameter_value
|
200
|
+
```
|
201
|
+
Example:
|
202
|
+
```yaml
|
203
|
+
pgvectorhnsw:
|
204
|
+
db_label: pgConfigTest
|
205
|
+
user_name: vectordbbench
|
206
|
+
password: vectordbbench
|
207
|
+
db_name: vectordbbench
|
208
|
+
host: localhost
|
209
|
+
m: 16
|
210
|
+
ef_construction: 128
|
211
|
+
ef_search: 128
|
212
|
+
milvushnsw:
|
213
|
+
skip_search_serial: True
|
214
|
+
case_type: Performance1536D50K
|
215
|
+
uri: http://localhost:19530
|
216
|
+
m: 16
|
217
|
+
ef_construction: 128
|
218
|
+
ef_search: 128
|
219
|
+
drop_old: False
|
220
|
+
load: False
|
221
|
+
```
|
222
|
+
> Notes:
|
223
|
+
> - Options passed on the command line will override the configuration file*
|
224
|
+
> - Parameter names use an _ not -
|
225
|
+
|
108
226
|
## What is VectorDBBench
|
109
227
|
VectorDBBench is not just an offering of benchmark results for mainstream vector databases and cloud services, it's your go-to tool for the ultimate performance and cost-effectiveness comparison. Designed with ease-of-use in mind, VectorDBBench is devised to help users, even non-professionals, reproduce results or test new systems, making the hunt for the optimal choice amongst a plethora of cloud services and open-source vector databases a breeze.
|
110
228
|
|
@@ -232,6 +350,24 @@ Case No. | Case Type | Dataset Size | Filtering Rate | Results |
|
|
232
350
|
|
233
351
|
Each case provides an in-depth examination of a vector database's abilities, providing you a comprehensive view of the database's performance.
|
234
352
|
|
353
|
+
#### Custom Dataset for Performance case
|
354
|
+
|
355
|
+
Through the `/custom` page, users can customize their own performance case using local datasets. After saving, the corresponding case can be selected from the `/run_test` page to perform the test.
|
356
|
+
|
357
|
+

|
358
|
+

|
359
|
+
|
360
|
+
We have strict requirements for the data set format, please follow them.
|
361
|
+
- `Folder Path` - The path to the folder containing all the files. Please ensure that all files in the folder are in the `Parquet` format.
|
362
|
+
- Vectors data files: The file must be named `train.parquet` and should have two columns: `id` as an incrementing `int` and `emb` as an array of `float32`.
|
363
|
+
- Query test vectors: The file must be named `test.parquet` and should have two columns: `id` as an incrementing `int` and `emb` as an array of `float32`.
|
364
|
+
- Ground truth file: The file must be named `neighbors.parquet` and should have two columns: `id` corresponding to query vectors and `neighbors_id` as an array of `int`.
|
365
|
+
|
366
|
+
- `Train File Count` - If the vector file is too large, you can consider splitting it into multiple files. The naming format for the split files should be `train-[index]-of-[file_count].parquet`. For example, `train-01-of-10.parquet` represents the second file (0-indexed) among 10 split files.
|
367
|
+
|
368
|
+
- `Use Shuffled Data` - If you check this option, the vector data files need to be modified. VectorDBBench will load the data labeled with `shuffle`. For example, use `shuffle_train.parquet` instead of `train.parquet` and `shuffle_train-04-of-10.parquet` instead of `train-04-of-10.parquet`. The `id` column in the shuffled data can be in any order.
|
369
|
+
|
370
|
+
|
235
371
|
## Goals
|
236
372
|
Our goals of this benchmark are:
|
237
373
|
### Reproducibility & Usability
|
@@ -280,6 +416,7 @@ class NewDBCaseConfig(DBCaseConfig):
|
|
280
416
|
# Implement optional case-specific configuration fields
|
281
417
|
# ...
|
282
418
|
```
|
419
|
+
|
283
420
|
**Step 3: Importing the DB Client and Updating Initialization**
|
284
421
|
|
285
422
|
In this final step, you will import your DB client into clients/__init__.py and update the initialization process.
|
@@ -318,6 +455,83 @@ class DB(Enum):
|
|
318
455
|
return NewClientCaseConfig
|
319
456
|
|
320
457
|
```
|
458
|
+
**Step 4: Implement new_client/cli.py and vectordb_bench/cli/vectordbbench.py**
|
459
|
+
|
460
|
+
In this (optional, but encouraged) step you will enable the test to be run from the command line.
|
461
|
+
1. Navigate to the vectordb_bench/backend/clients/"client" directory.
|
462
|
+
2. Inside the "client" folder, create a cli.py file.
|
463
|
+
Using zilliz as an example cli.py:
|
464
|
+
```python
|
465
|
+
from typing import Annotated, Unpack
|
466
|
+
|
467
|
+
import click
|
468
|
+
import os
|
469
|
+
from pydantic import SecretStr
|
470
|
+
|
471
|
+
from vectordb_bench.cli.cli import (
|
472
|
+
CommonTypedDict,
|
473
|
+
cli,
|
474
|
+
click_parameter_decorators_from_typed_dict,
|
475
|
+
run,
|
476
|
+
)
|
477
|
+
from vectordb_bench.backend.clients import DB
|
478
|
+
|
479
|
+
|
480
|
+
class ZillizTypedDict(CommonTypedDict):
|
481
|
+
uri: Annotated[
|
482
|
+
str, click.option("--uri", type=str, help="uri connection string", required=True)
|
483
|
+
]
|
484
|
+
user_name: Annotated[
|
485
|
+
str, click.option("--user-name", type=str, help="Db username", required=True)
|
486
|
+
]
|
487
|
+
password: Annotated[
|
488
|
+
str,
|
489
|
+
click.option("--password",
|
490
|
+
type=str,
|
491
|
+
help="Zilliz password",
|
492
|
+
default=lambda: os.environ.get("ZILLIZ_PASSWORD", ""),
|
493
|
+
show_default="$ZILLIZ_PASSWORD",
|
494
|
+
),
|
495
|
+
]
|
496
|
+
level: Annotated[
|
497
|
+
str,
|
498
|
+
click.option("--level", type=str, help="Zilliz index level", required=False),
|
499
|
+
]
|
500
|
+
|
501
|
+
|
502
|
+
@cli.command()
|
503
|
+
@click_parameter_decorators_from_typed_dict(ZillizTypedDict)
|
504
|
+
def ZillizAutoIndex(**parameters: Unpack[ZillizTypedDict]):
|
505
|
+
from .config import ZillizCloudConfig, AutoIndexConfig
|
506
|
+
|
507
|
+
run(
|
508
|
+
db=DB.ZillizCloud,
|
509
|
+
db_config=ZillizCloudConfig(
|
510
|
+
db_label=parameters["db_label"],
|
511
|
+
uri=SecretStr(parameters["uri"]),
|
512
|
+
user=parameters["user_name"],
|
513
|
+
password=SecretStr(parameters["password"]),
|
514
|
+
),
|
515
|
+
db_case_config=AutoIndexConfig(
|
516
|
+
params={parameters["level"]},
|
517
|
+
),
|
518
|
+
**parameters,
|
519
|
+
)
|
520
|
+
```
|
521
|
+
3. Update cli by adding:
|
522
|
+
1. Add database specific options as an Annotated TypedDict, see ZillizTypedDict above.
|
523
|
+
2. Add index configuration specific options as an Annotated TypedDict. (example: vectordb_bench/backend/clients/pgvector/cli.py)
|
524
|
+
1. May not be needed if there is only one index config.
|
525
|
+
2. Repeat for each index configuration, nesting them if possible.
|
526
|
+
2. Add a index config specific function for each index type, see Zilliz above. The function name, in lowercase, will be the command name passed to the vectordbbench command.
|
527
|
+
3. Update db_config and db_case_config to match client requirements
|
528
|
+
4. Continue to add new functions for each index config.
|
529
|
+
5. Import the client cli module and command to vectordb_bench/cli/vectordbbench.py (for databases with multiple commands (index configs), this only needs to be done for one command)
|
530
|
+
|
531
|
+
> cli modules with multiple index configs:
|
532
|
+
> - pgvector: vectordb_bench/backend/clients/pgvector/cli.py
|
533
|
+
> - milvus: vectordb_bench/backend/clients/milvus/cli.py
|
534
|
+
|
321
535
|
That's it! You have successfully added a new DB client to the vectordb_bench project.
|
322
536
|
|
323
537
|
## Rules
|
@@ -27,24 +27,134 @@ pip install vectordb-bench[pinecone]
|
|
27
27
|
```
|
28
28
|
All the database client supported
|
29
29
|
|
30
|
-
|Optional database client|install command|
|
31
|
-
|
32
|
-
|pymilvus(*default*)
|
33
|
-
|all
|
34
|
-
|qdrant
|
35
|
-
|pinecone
|
36
|
-
|weaviate
|
37
|
-
|elastic
|
38
|
-
|pgvector
|
39
|
-
|pgvecto.rs
|
40
|
-
|redis
|
41
|
-
|chromadb
|
30
|
+
| Optional database client | install command |
|
31
|
+
|--------------------------|---------------------------------------------|
|
32
|
+
| pymilvus(*default*) | `pip install vectordb-bench` |
|
33
|
+
| all | `pip install vectordb-bench[all]` |
|
34
|
+
| qdrant | `pip install vectordb-bench[qdrant]` |
|
35
|
+
| pinecone | `pip install vectordb-bench[pinecone]` |
|
36
|
+
| weaviate | `pip install vectordb-bench[weaviate]` |
|
37
|
+
| elastic | `pip install vectordb-bench[elastic]` |
|
38
|
+
| pgvector | `pip install vectordb-bench[pgvector]` |
|
39
|
+
| pgvecto.rs | `pip install vectordb-bench[pgvecto_rs]` |
|
40
|
+
| redis | `pip install vectordb-bench[redis]` |
|
41
|
+
| chromadb | `pip install vectordb-bench[chromadb]` |
|
42
|
+
| awsopensearch | `pip install vectordb-bench[awsopensearch]` |
|
42
43
|
|
43
44
|
### Run
|
44
45
|
|
45
46
|
``` shell
|
46
47
|
init_bench
|
47
48
|
```
|
49
|
+
|
50
|
+
OR:
|
51
|
+
|
52
|
+
### Run from the command line.
|
53
|
+
|
54
|
+
``` shell
|
55
|
+
vectordbbench [OPTIONS] COMMAND [ARGS]...
|
56
|
+
```
|
57
|
+
To list the clients that are runnable via the commandline option, execute: `vectordbbench --help`
|
58
|
+
``` text
|
59
|
+
$ vectordbbench --help
|
60
|
+
Usage: vectordbbench [OPTIONS] COMMAND [ARGS]...
|
61
|
+
|
62
|
+
Options:
|
63
|
+
--help Show this message and exit.
|
64
|
+
|
65
|
+
Commands:
|
66
|
+
pgvectorhnsw
|
67
|
+
pgvectorivfflat
|
68
|
+
test
|
69
|
+
weaviate
|
70
|
+
```
|
71
|
+
To list the options for each command, execute `vectordbbench [command] --help`
|
72
|
+
|
73
|
+
```text
|
74
|
+
$ vectordbbench pgvectorhnsw --help
|
75
|
+
Usage: vectordbbench pgvectorhnsw [OPTIONS]
|
76
|
+
|
77
|
+
Options:
|
78
|
+
--config-file PATH Read configuration from yaml file
|
79
|
+
--drop-old / --skip-drop-old Drop old or skip [default: drop-old]
|
80
|
+
--load / --skip-load Load or skip [default: load]
|
81
|
+
--search-serial / --skip-search-serial
|
82
|
+
Search serial or skip [default: search-
|
83
|
+
serial]
|
84
|
+
--search-concurrent / --skip-search-concurrent
|
85
|
+
Search concurrent or skip [default: search-
|
86
|
+
concurrent]
|
87
|
+
--case-type [CapacityDim128|CapacityDim960|Performance768D100M|Performance768D10M|Performance768D1M|Performance768D10M1P|Performance768D1M1P|Performance768D10M99P|Performance768D1M99P|Performance1536D500K|Performance1536D5M|Performance1536D500K1P|Performance1536D5M1P|Performance1536D500K99P|Performance1536D5M99P|Performance1536D50K]
|
88
|
+
Case type
|
89
|
+
--db-label TEXT Db label, default: date in ISO format
|
90
|
+
[default: 2024-05-20T20:26:31.113290]
|
91
|
+
--dry-run Print just the configuration and exit
|
92
|
+
without running the tasks
|
93
|
+
--k INTEGER K value for number of nearest neighbors to
|
94
|
+
search [default: 100]
|
95
|
+
--concurrency-duration INTEGER Adjusts the duration in seconds of each
|
96
|
+
concurrency search [default: 30]
|
97
|
+
--num-concurrency TEXT Comma-separated list of concurrency values
|
98
|
+
to test during concurrent search [default:
|
99
|
+
1,10,20]
|
100
|
+
--user-name TEXT Db username [required]
|
101
|
+
--password TEXT Db password [required]
|
102
|
+
--host TEXT Db host [required]
|
103
|
+
--db-name TEXT Db name [required]
|
104
|
+
--maintenance-work-mem TEXT Sets the maximum memory to be used for
|
105
|
+
maintenance operations (index creation). Can
|
106
|
+
be entered as string with unit like '64GB'
|
107
|
+
or as an integer number of KB.This will set
|
108
|
+
the parameters:
|
109
|
+
max_parallel_maintenance_workers,
|
110
|
+
max_parallel_workers &
|
111
|
+
table(parallel_workers)
|
112
|
+
--max-parallel-workers INTEGER Sets the maximum number of parallel
|
113
|
+
processes per maintenance operation (index
|
114
|
+
creation)
|
115
|
+
--m INTEGER hnsw m
|
116
|
+
--ef-construction INTEGER hnsw ef-construction
|
117
|
+
--ef-search INTEGER hnsw ef-search
|
118
|
+
--help Show this message and exit.
|
119
|
+
```
|
120
|
+
#### Using a configuration file.
|
121
|
+
|
122
|
+
The vectordbbench command can optionally read some or all the options from a yaml formatted configuration file.
|
123
|
+
|
124
|
+
By default, configuration files are expected to be in vectordb_bench/config-files/, this can be overridden by setting
|
125
|
+
the environment variable CONFIG_LOCAL_DIR or by passing the full path to the file.
|
126
|
+
|
127
|
+
The required format is:
|
128
|
+
```yaml
|
129
|
+
commandname:
|
130
|
+
parameter_name: parameter_value
|
131
|
+
parameter_name: parameter_value
|
132
|
+
```
|
133
|
+
Example:
|
134
|
+
```yaml
|
135
|
+
pgvectorhnsw:
|
136
|
+
db_label: pgConfigTest
|
137
|
+
user_name: vectordbbench
|
138
|
+
password: vectordbbench
|
139
|
+
db_name: vectordbbench
|
140
|
+
host: localhost
|
141
|
+
m: 16
|
142
|
+
ef_construction: 128
|
143
|
+
ef_search: 128
|
144
|
+
milvushnsw:
|
145
|
+
skip_search_serial: True
|
146
|
+
case_type: Performance1536D50K
|
147
|
+
uri: http://localhost:19530
|
148
|
+
m: 16
|
149
|
+
ef_construction: 128
|
150
|
+
ef_search: 128
|
151
|
+
drop_old: False
|
152
|
+
load: False
|
153
|
+
```
|
154
|
+
> Notes:
|
155
|
+
> - Options passed on the command line will override the configuration file*
|
156
|
+
> - Parameter names use an _ not -
|
157
|
+
|
48
158
|
## What is VectorDBBench
|
49
159
|
VectorDBBench is not just an offering of benchmark results for mainstream vector databases and cloud services, it's your go-to tool for the ultimate performance and cost-effectiveness comparison. Designed with ease-of-use in mind, VectorDBBench is devised to help users, even non-professionals, reproduce results or test new systems, making the hunt for the optimal choice amongst a plethora of cloud services and open-source vector databases a breeze.
|
50
160
|
|
@@ -172,6 +282,24 @@ Case No. | Case Type | Dataset Size | Filtering Rate | Results |
|
|
172
282
|
|
173
283
|
Each case provides an in-depth examination of a vector database's abilities, providing you a comprehensive view of the database's performance.
|
174
284
|
|
285
|
+
#### Custom Dataset for Performance case
|
286
|
+
|
287
|
+
Through the `/custom` page, users can customize their own performance case using local datasets. After saving, the corresponding case can be selected from the `/run_test` page to perform the test.
|
288
|
+
|
289
|
+

|
290
|
+

|
291
|
+
|
292
|
+
We have strict requirements for the data set format, please follow them.
|
293
|
+
- `Folder Path` - The path to the folder containing all the files. Please ensure that all files in the folder are in the `Parquet` format.
|
294
|
+
- Vectors data files: The file must be named `train.parquet` and should have two columns: `id` as an incrementing `int` and `emb` as an array of `float32`.
|
295
|
+
- Query test vectors: The file must be named `test.parquet` and should have two columns: `id` as an incrementing `int` and `emb` as an array of `float32`.
|
296
|
+
- Ground truth file: The file must be named `neighbors.parquet` and should have two columns: `id` corresponding to query vectors and `neighbors_id` as an array of `int`.
|
297
|
+
|
298
|
+
- `Train File Count` - If the vector file is too large, you can consider splitting it into multiple files. The naming format for the split files should be `train-[index]-of-[file_count].parquet`. For example, `train-01-of-10.parquet` represents the second file (0-indexed) among 10 split files.
|
299
|
+
|
300
|
+
- `Use Shuffled Data` - If you check this option, the vector data files need to be modified. VectorDBBench will load the data labeled with `shuffle`. For example, use `shuffle_train.parquet` instead of `train.parquet` and `shuffle_train-04-of-10.parquet` instead of `train-04-of-10.parquet`. The `id` column in the shuffled data can be in any order.
|
301
|
+
|
302
|
+
|
175
303
|
## Goals
|
176
304
|
Our goals of this benchmark are:
|
177
305
|
### Reproducibility & Usability
|
@@ -220,6 +348,7 @@ class NewDBCaseConfig(DBCaseConfig):
|
|
220
348
|
# Implement optional case-specific configuration fields
|
221
349
|
# ...
|
222
350
|
```
|
351
|
+
|
223
352
|
**Step 3: Importing the DB Client and Updating Initialization**
|
224
353
|
|
225
354
|
In this final step, you will import your DB client into clients/__init__.py and update the initialization process.
|
@@ -258,6 +387,83 @@ class DB(Enum):
|
|
258
387
|
return NewClientCaseConfig
|
259
388
|
|
260
389
|
```
|
390
|
+
**Step 4: Implement new_client/cli.py and vectordb_bench/cli/vectordbbench.py**
|
391
|
+
|
392
|
+
In this (optional, but encouraged) step you will enable the test to be run from the command line.
|
393
|
+
1. Navigate to the vectordb_bench/backend/clients/"client" directory.
|
394
|
+
2. Inside the "client" folder, create a cli.py file.
|
395
|
+
Using zilliz as an example cli.py:
|
396
|
+
```python
|
397
|
+
from typing import Annotated, Unpack
|
398
|
+
|
399
|
+
import click
|
400
|
+
import os
|
401
|
+
from pydantic import SecretStr
|
402
|
+
|
403
|
+
from vectordb_bench.cli.cli import (
|
404
|
+
CommonTypedDict,
|
405
|
+
cli,
|
406
|
+
click_parameter_decorators_from_typed_dict,
|
407
|
+
run,
|
408
|
+
)
|
409
|
+
from vectordb_bench.backend.clients import DB
|
410
|
+
|
411
|
+
|
412
|
+
class ZillizTypedDict(CommonTypedDict):
|
413
|
+
uri: Annotated[
|
414
|
+
str, click.option("--uri", type=str, help="uri connection string", required=True)
|
415
|
+
]
|
416
|
+
user_name: Annotated[
|
417
|
+
str, click.option("--user-name", type=str, help="Db username", required=True)
|
418
|
+
]
|
419
|
+
password: Annotated[
|
420
|
+
str,
|
421
|
+
click.option("--password",
|
422
|
+
type=str,
|
423
|
+
help="Zilliz password",
|
424
|
+
default=lambda: os.environ.get("ZILLIZ_PASSWORD", ""),
|
425
|
+
show_default="$ZILLIZ_PASSWORD",
|
426
|
+
),
|
427
|
+
]
|
428
|
+
level: Annotated[
|
429
|
+
str,
|
430
|
+
click.option("--level", type=str, help="Zilliz index level", required=False),
|
431
|
+
]
|
432
|
+
|
433
|
+
|
434
|
+
@cli.command()
|
435
|
+
@click_parameter_decorators_from_typed_dict(ZillizTypedDict)
|
436
|
+
def ZillizAutoIndex(**parameters: Unpack[ZillizTypedDict]):
|
437
|
+
from .config import ZillizCloudConfig, AutoIndexConfig
|
438
|
+
|
439
|
+
run(
|
440
|
+
db=DB.ZillizCloud,
|
441
|
+
db_config=ZillizCloudConfig(
|
442
|
+
db_label=parameters["db_label"],
|
443
|
+
uri=SecretStr(parameters["uri"]),
|
444
|
+
user=parameters["user_name"],
|
445
|
+
password=SecretStr(parameters["password"]),
|
446
|
+
),
|
447
|
+
db_case_config=AutoIndexConfig(
|
448
|
+
params={parameters["level"]},
|
449
|
+
),
|
450
|
+
**parameters,
|
451
|
+
)
|
452
|
+
```
|
453
|
+
3. Update cli by adding:
|
454
|
+
1. Add database specific options as an Annotated TypedDict, see ZillizTypedDict above.
|
455
|
+
2. Add index configuration specific options as an Annotated TypedDict. (example: vectordb_bench/backend/clients/pgvector/cli.py)
|
456
|
+
1. May not be needed if there is only one index config.
|
457
|
+
2. Repeat for each index configuration, nesting them if possible.
|
458
|
+
2. Add a index config specific function for each index type, see Zilliz above. The function name, in lowercase, will be the command name passed to the vectordbbench command.
|
459
|
+
3. Update db_config and db_case_config to match client requirements
|
460
|
+
4. Continue to add new functions for each index config.
|
461
|
+
5. Import the client cli module and command to vectordb_bench/cli/vectordbbench.py (for databases with multiple commands (index configs), this only needs to be done for one command)
|
462
|
+
|
463
|
+
> cli modules with multiple index configs:
|
464
|
+
> - pgvector: vectordb_bench/backend/clients/pgvector/cli.py
|
465
|
+
> - milvus: vectordb_bench/backend/clients/milvus/cli.py
|
466
|
+
|
261
467
|
That's it! You have successfully added a new DB client to the vectordb_bench project.
|
262
468
|
|
263
469
|
## Rules
|
Binary file
|
Binary file
|
@@ -7,7 +7,7 @@ build-backend = "setuptools.build_meta"
|
|
7
7
|
|
8
8
|
[tool.setuptools.packages.find]
|
9
9
|
where = ["."]
|
10
|
-
include = ["vectordb_bench"]
|
10
|
+
include = ["vectordb_bench", "vectordb_bench.cli"]
|
11
11
|
|
12
12
|
[project]
|
13
13
|
name = "vectordb-bench"
|
@@ -24,6 +24,7 @@ classifiers = [
|
|
24
24
|
"Operating System :: OS Independent",
|
25
25
|
]
|
26
26
|
dependencies = [
|
27
|
+
"click",
|
27
28
|
"pytz",
|
28
29
|
"streamlit-autorefresh",
|
29
30
|
"streamlit!=1.34.0",
|
@@ -60,21 +61,27 @@ all = [
|
|
60
61
|
"chromadb",
|
61
62
|
"psycopg2",
|
62
63
|
"psycopg",
|
64
|
+
"psycopg-binary",
|
65
|
+
"opensearch-dsl==2.1.0",
|
66
|
+
"opensearch-py==2.6.0",
|
63
67
|
]
|
64
68
|
|
65
69
|
qdrant = [ "qdrant-client" ]
|
66
70
|
pinecone = [ "pinecone-client" ]
|
67
71
|
weaviate = [ "weaviate-client" ]
|
68
72
|
elastic = [ "elasticsearch" ]
|
69
|
-
pgvector = [ "
|
73
|
+
pgvector = [ "psycopg", "psycopg-binary", "pgvector" ]
|
70
74
|
pgvecto_rs = [ "psycopg2" ]
|
71
75
|
redis = [ "redis" ]
|
72
76
|
chromadb = [ "chromadb" ]
|
77
|
+
awsopensearch = [ "awsopensearch" ]
|
78
|
+
zilliz_cloud = []
|
73
79
|
|
74
80
|
[project.urls]
|
75
81
|
"repository" = "https://github.com/zilliztech/VectorDBBench"
|
76
82
|
|
77
83
|
[project.scripts]
|
78
84
|
init_bench = "vectordb_bench.__main__:main"
|
85
|
+
vectordbbench = "vectordb_bench.cli.vectordbbench:cli"
|
79
86
|
|
80
87
|
[tool.setuptools_scm]
|
@@ -1,11 +1,13 @@
|
|
1
|
-
import environs
|
2
1
|
import inspect
|
3
2
|
import pathlib
|
4
|
-
from . import log_util
|
5
3
|
|
4
|
+
import environs
|
5
|
+
|
6
|
+
from . import log_util
|
6
7
|
|
7
8
|
env = environs.Env()
|
8
|
-
env.read_env(".env")
|
9
|
+
env.read_env(".env", False)
|
10
|
+
|
9
11
|
|
10
12
|
class config:
|
11
13
|
ALIYUN_OSS_URL = "assets.zilliz.com.cn/benchmark/"
|
@@ -19,9 +21,21 @@ class config:
|
|
19
21
|
|
20
22
|
DROP_OLD = env.bool("DROP_OLD", True)
|
21
23
|
USE_SHUFFLED_DATA = env.bool("USE_SHUFFLED_DATA", True)
|
22
|
-
NUM_CONCURRENCY = [1, 5, 10, 15, 20, 25, 30, 35]
|
23
24
|
|
24
|
-
|
25
|
+
NUM_CONCURRENCY = env.list("NUM_CONCURRENCY", [1, 5, 10, 15, 20, 25, 30, 35, 40, 45, 50, 55, 60, 65, 70, 75, 80, 85, 90, 95, 100], subcast=int )
|
26
|
+
|
27
|
+
CONCURRENCY_DURATION = 30
|
28
|
+
|
29
|
+
RESULTS_LOCAL_DIR = env.path(
|
30
|
+
"RESULTS_LOCAL_DIR", pathlib.Path(__file__).parent.joinpath("results")
|
31
|
+
)
|
32
|
+
CONFIG_LOCAL_DIR = env.path(
|
33
|
+
"CONFIG_LOCAL_DIR", pathlib.Path(__file__).parent.joinpath("config-files")
|
34
|
+
)
|
35
|
+
|
36
|
+
|
37
|
+
K_DEFAULT = 100 # default return top k nearest neighbors during search
|
38
|
+
CUSTOM_CONFIG_DIR = pathlib.Path(__file__).parent.joinpath("custom/custom_case.json")
|
25
39
|
|
26
40
|
CAPACITY_TIMEOUT_IN_SECONDS = 24 * 3600 # 24h
|
27
41
|
LOAD_TIMEOUT_DEFAULT = 2.5 * 3600 # 2.5h
|
@@ -14,7 +14,7 @@ class Assembler:
|
|
14
14
|
def assemble(cls, run_id , task: TaskConfig, source: DatasetSource) -> CaseRunner:
|
15
15
|
c_cls = task.case_config.case_id.case_cls
|
16
16
|
|
17
|
-
c = c_cls()
|
17
|
+
c = c_cls(task.case_config.custom_case)
|
18
18
|
if type(task.db_case_config) != EmptyDBCaseConfig:
|
19
19
|
task.db_case_config.metric_type = c.dataset.data.metric_type
|
20
20
|
|