vectordb-bench 0.0.9__py3-none-any.whl → 0.0.11__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vectordb_bench/__init__.py +22 -9
- vectordb_bench/backend/cases.py +32 -12
- vectordb_bench/backend/clients/__init__.py +1 -0
- vectordb_bench/backend/clients/api.py +1 -1
- vectordb_bench/backend/clients/milvus/cli.py +291 -0
- vectordb_bench/backend/clients/milvus/config.py +6 -2
- vectordb_bench/backend/clients/milvus/milvus.py +16 -5
- vectordb_bench/backend/clients/pgvector/cli.py +116 -0
- vectordb_bench/backend/clients/pgvector/config.py +1 -1
- vectordb_bench/backend/clients/pgvector/pgvector.py +7 -4
- vectordb_bench/backend/clients/redis/cli.py +74 -0
- vectordb_bench/backend/clients/test/cli.py +25 -0
- vectordb_bench/backend/clients/test/config.py +18 -0
- vectordb_bench/backend/clients/test/test.py +62 -0
- vectordb_bench/backend/clients/weaviate_cloud/cli.py +41 -0
- vectordb_bench/backend/clients/zilliz_cloud/cli.py +55 -0
- vectordb_bench/backend/runner/mp_runner.py +14 -3
- vectordb_bench/backend/runner/serial_runner.py +7 -3
- vectordb_bench/backend/task_runner.py +76 -26
- vectordb_bench/cli/__init__.py +0 -0
- vectordb_bench/cli/cli.py +362 -0
- vectordb_bench/cli/vectordbbench.py +20 -0
- vectordb_bench/config-files/sample_config.yml +17 -0
- vectordb_bench/frontend/components/check_results/data.py +11 -8
- vectordb_bench/frontend/components/concurrent/charts.py +82 -0
- vectordb_bench/frontend/components/run_test/dbSelector.py +7 -1
- vectordb_bench/frontend/components/run_test/submitTask.py +12 -4
- vectordb_bench/frontend/components/tables/data.py +44 -0
- vectordb_bench/frontend/const/dbCaseConfigs.py +2 -1
- vectordb_bench/frontend/pages/concurrent.py +72 -0
- vectordb_bench/frontend/pages/tables.py +24 -0
- vectordb_bench/interface.py +21 -25
- vectordb_bench/metric.py +23 -1
- vectordb_bench/models.py +45 -5
- {vectordb_bench-0.0.9.dist-info → vectordb_bench-0.0.11.dist-info}/METADATA +193 -2
- {vectordb_bench-0.0.9.dist-info → vectordb_bench-0.0.11.dist-info}/RECORD +40 -24
- {vectordb_bench-0.0.9.dist-info → vectordb_bench-0.0.11.dist-info}/WHEEL +1 -1
- {vectordb_bench-0.0.9.dist-info → vectordb_bench-0.0.11.dist-info}/entry_points.txt +1 -0
- {vectordb_bench-0.0.9.dist-info → vectordb_bench-0.0.11.dist-info}/LICENSE +0 -0
- {vectordb_bench-0.0.9.dist-info → vectordb_bench-0.0.11.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,24 @@
|
|
1
|
+
import streamlit as st
|
2
|
+
from vectordb_bench.frontend.components.check_results.headerIcon import drawHeaderIcon
|
3
|
+
from vectordb_bench.frontend.components.tables.data import getNewResults
|
4
|
+
from vectordb_bench.frontend.const.styles import FAVICON
|
5
|
+
|
6
|
+
|
7
|
+
def main():
|
8
|
+
# set page config
|
9
|
+
st.set_page_config(
|
10
|
+
page_title="Table",
|
11
|
+
page_icon=FAVICON,
|
12
|
+
layout="wide",
|
13
|
+
# initial_sidebar_state="collapsed",
|
14
|
+
)
|
15
|
+
|
16
|
+
# header
|
17
|
+
drawHeaderIcon(st)
|
18
|
+
|
19
|
+
df = getNewResults()
|
20
|
+
st.dataframe(df, height=800)
|
21
|
+
|
22
|
+
|
23
|
+
if __name__ == "__main__":
|
24
|
+
main()
|
vectordb_bench/interface.py
CHANGED
@@ -1,38 +1,33 @@
|
|
1
|
-
import
|
1
|
+
import concurrent.futures
|
2
|
+
import logging
|
3
|
+
import multiprocessing as mp
|
2
4
|
import pathlib
|
3
5
|
import signal
|
4
|
-
import
|
6
|
+
import traceback
|
5
7
|
import uuid
|
6
|
-
import
|
7
|
-
import multiprocessing as mp
|
8
|
+
from enum import Enum
|
8
9
|
from multiprocessing.connection import Connection
|
9
10
|
|
10
11
|
import psutil
|
11
|
-
from enum import Enum
|
12
12
|
|
13
13
|
from . import config
|
14
|
-
from .metric import Metric
|
15
|
-
from .models import (
|
16
|
-
TaskConfig,
|
17
|
-
TestResult,
|
18
|
-
CaseResult,
|
19
|
-
LoadTimeoutError,
|
20
|
-
PerformanceTimeoutError,
|
21
|
-
ResultLabel,
|
22
|
-
)
|
23
|
-
from .backend.result_collector import ResultCollector
|
24
14
|
from .backend.assembler import Assembler
|
25
|
-
from .backend.task_runner import TaskRunner
|
26
15
|
from .backend.data_source import DatasetSource
|
16
|
+
from .backend.result_collector import ResultCollector
|
17
|
+
from .backend.task_runner import TaskRunner
|
18
|
+
from .metric import Metric
|
19
|
+
from .models import (CaseResult, LoadTimeoutError, PerformanceTimeoutError,
|
20
|
+
ResultLabel, TaskConfig, TaskStage, TestResult)
|
27
21
|
|
28
22
|
log = logging.getLogger(__name__)
|
29
23
|
|
30
24
|
global_result_future: concurrent.futures.Future | None = None
|
31
25
|
|
26
|
+
|
32
27
|
class SIGNAL(Enum):
|
33
|
-
SUCCESS=0
|
34
|
-
ERROR=1
|
35
|
-
WIP=2
|
28
|
+
SUCCESS = 0
|
29
|
+
ERROR = 1
|
30
|
+
WIP = 2
|
36
31
|
|
37
32
|
|
38
33
|
class BenchMarkRunner:
|
@@ -42,9 +37,11 @@ class BenchMarkRunner:
|
|
42
37
|
self.drop_old: bool = True
|
43
38
|
self.dataset_source: DatasetSource = DatasetSource.S3
|
44
39
|
|
40
|
+
|
45
41
|
def set_drop_old(self, drop_old: bool):
|
46
42
|
self.drop_old = drop_old
|
47
43
|
|
44
|
+
|
48
45
|
def set_download_address(self, use_aliyun: bool):
|
49
46
|
if use_aliyun:
|
50
47
|
self.dataset_source = DatasetSource.AliyunOSS
|
@@ -152,13 +149,13 @@ class BenchMarkRunner:
|
|
152
149
|
latest_runner, cached_load_duration = None, None
|
153
150
|
for idx, runner in enumerate(running_task.case_runners):
|
154
151
|
case_res = CaseResult(
|
155
|
-
result_id=idx,
|
156
152
|
metrics=Metric(),
|
157
153
|
task_config=runner.config,
|
158
154
|
)
|
159
155
|
|
160
156
|
# drop_old = False if latest_runner and runner == latest_runner else config.DROP_OLD
|
161
|
-
drop_old = config.DROP_OLD
|
157
|
+
# drop_old = config.DROP_OLD
|
158
|
+
drop_old = TaskStage.DROP_OLD in runner.config.stages
|
162
159
|
if latest_runner and runner == latest_runner:
|
163
160
|
drop_old = False
|
164
161
|
elif not self.drop_old:
|
@@ -167,7 +164,7 @@ class BenchMarkRunner:
|
|
167
164
|
log.info(f"[{idx+1}/{running_task.num_cases()}] start case: {runner.display()}, drop_old={drop_old}")
|
168
165
|
case_res.metrics = runner.run(drop_old)
|
169
166
|
log.info(f"[{idx+1}/{running_task.num_cases()}] finish case: {runner.display()}, "
|
170
|
-
|
167
|
+
f"result={case_res.metrics}, label={case_res.label}")
|
171
168
|
|
172
169
|
# cache the latest succeeded runner
|
173
170
|
latest_runner = runner
|
@@ -193,7 +190,6 @@ class BenchMarkRunner:
|
|
193
190
|
c_results.append(case_res)
|
194
191
|
send_conn.send((SIGNAL.WIP, idx))
|
195
192
|
|
196
|
-
|
197
193
|
test_result = TestResult(
|
198
194
|
run_id=running_task.run_id,
|
199
195
|
task_label=running_task.task_label,
|
@@ -204,7 +200,7 @@ class BenchMarkRunner:
|
|
204
200
|
|
205
201
|
send_conn.send((SIGNAL.SUCCESS, None))
|
206
202
|
send_conn.close()
|
207
|
-
log.info(f"
|
203
|
+
log.info(f"Success to finish task: label={running_task.task_label}, run_id={running_task.run_id}")
|
208
204
|
|
209
205
|
except Exception as e:
|
210
206
|
err_msg = f"An error occurs when running task={running_task.task_label}, run_id={running_task.run_id}, err={e}"
|
@@ -246,7 +242,7 @@ class BenchMarkRunner:
|
|
246
242
|
called as soon as a child terminates.
|
247
243
|
"""
|
248
244
|
children = psutil.Process().children(recursive=True)
|
249
|
-
for p in
|
245
|
+
for p in children:
|
250
246
|
try:
|
251
247
|
log.warning(f"sending SIGTERM to child process: {p}")
|
252
248
|
p.send_signal(sig)
|
vectordb_bench/metric.py
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
import logging
|
2
2
|
import numpy as np
|
3
3
|
|
4
|
-
from dataclasses import dataclass
|
4
|
+
from dataclasses import dataclass, field
|
5
5
|
|
6
6
|
|
7
7
|
log = logging.getLogger(__name__)
|
@@ -19,6 +19,10 @@ class Metric:
|
|
19
19
|
qps: float = 0.0
|
20
20
|
serial_latency_p99: float = 0.0
|
21
21
|
recall: float = 0.0
|
22
|
+
ndcg: float = 0.0
|
23
|
+
conc_num_list: list[int] = field(default_factory=list)
|
24
|
+
conc_qps_list: list[float] = field(default_factory=list)
|
25
|
+
conc_latency_p99_list: list[float] = field(default_factory=list)
|
22
26
|
|
23
27
|
|
24
28
|
QURIES_PER_DOLLAR_METRIC = "QP$ (Quries per Dollar)"
|
@@ -60,3 +64,21 @@ def calc_recall(count: int, ground_truth: list[int], got: list[int]) -> float:
|
|
60
64
|
recalls[i] = 1
|
61
65
|
|
62
66
|
return np.mean(recalls)
|
67
|
+
|
68
|
+
|
69
|
+
def get_ideal_dcg(k: int):
|
70
|
+
ideal_dcg = 0
|
71
|
+
for i in range(k):
|
72
|
+
ideal_dcg += 1 / np.log2(i+2)
|
73
|
+
|
74
|
+
return ideal_dcg
|
75
|
+
|
76
|
+
|
77
|
+
def calc_ndcg(ground_truth: list[int], got: list[int], ideal_dcg: float) -> float:
|
78
|
+
dcg = 0
|
79
|
+
ground_truth = list(ground_truth)
|
80
|
+
for id in set(got):
|
81
|
+
if id in ground_truth:
|
82
|
+
idx = ground_truth.index(id)
|
83
|
+
dcg += 1 / np.log2(idx+2)
|
84
|
+
return dcg / ideal_dcg
|
vectordb_bench/models.py
CHANGED
@@ -17,7 +17,6 @@ from .base import BaseModel
|
|
17
17
|
from . import config
|
18
18
|
from .metric import Metric
|
19
19
|
|
20
|
-
|
21
20
|
log = logging.getLogger(__name__)
|
22
21
|
|
23
22
|
|
@@ -65,15 +64,55 @@ class CaseConfigParamType(Enum):
|
|
65
64
|
maintenance_work_mem = "maintenance_work_mem"
|
66
65
|
max_parallel_workers = "max_parallel_workers"
|
67
66
|
|
67
|
+
|
68
68
|
class CustomizedCase(BaseModel):
|
69
69
|
pass
|
70
70
|
|
71
71
|
|
72
|
+
class ConcurrencySearchConfig(BaseModel):
|
73
|
+
num_concurrency: List[int] = config.NUM_CONCURRENCY
|
74
|
+
concurrency_duration: int = config.CONCURRENCY_DURATION
|
75
|
+
|
76
|
+
|
72
77
|
class CaseConfig(BaseModel):
|
73
78
|
"""cases, dataset, test cases, filter rate, params"""
|
74
79
|
|
75
80
|
case_id: CaseType
|
76
81
|
custom_case: dict | None = None
|
82
|
+
k: int | None = config.K_DEFAULT
|
83
|
+
concurrency_search_config: ConcurrencySearchConfig = ConcurrencySearchConfig()
|
84
|
+
|
85
|
+
'''
|
86
|
+
@property
|
87
|
+
def k(self):
|
88
|
+
"""K search parameter, default is config.K_DEFAULT"""
|
89
|
+
return self._k
|
90
|
+
|
91
|
+
#
|
92
|
+
@k.setter
|
93
|
+
def k(self, value):
|
94
|
+
self._k = value
|
95
|
+
'''
|
96
|
+
|
97
|
+
class TaskStage(StrEnum):
|
98
|
+
"""Enumerations of various stages of the task"""
|
99
|
+
|
100
|
+
DROP_OLD = auto()
|
101
|
+
LOAD = auto()
|
102
|
+
SEARCH_SERIAL = auto()
|
103
|
+
SEARCH_CONCURRENT = auto()
|
104
|
+
|
105
|
+
def __repr__(self) -> str:
|
106
|
+
return str.__repr__(self.value)
|
107
|
+
|
108
|
+
|
109
|
+
# TODO: Add CapacityCase enums and adjust TaskRunner to utilize
|
110
|
+
ALL_TASK_STAGES = [
|
111
|
+
TaskStage.DROP_OLD,
|
112
|
+
TaskStage.LOAD,
|
113
|
+
TaskStage.SEARCH_SERIAL,
|
114
|
+
TaskStage.SEARCH_CONCURRENT,
|
115
|
+
]
|
77
116
|
|
78
117
|
|
79
118
|
class TaskConfig(BaseModel):
|
@@ -81,6 +120,7 @@ class TaskConfig(BaseModel):
|
|
81
120
|
db_config: DBConfig
|
82
121
|
db_case_config: DBCaseConfig
|
83
122
|
case_config: CaseConfig
|
123
|
+
stages: List[TaskStage] = ALL_TASK_STAGES
|
84
124
|
|
85
125
|
@property
|
86
126
|
def db_name(self):
|
@@ -210,18 +250,18 @@ class TestResult(BaseModel):
|
|
210
250
|
|
211
251
|
max_db = max(map(len, [f.task_config.db.name for f in filtered_results]))
|
212
252
|
max_db_labels = (
|
213
|
-
|
214
|
-
|
253
|
+
max(map(len, [f.task_config.db_config.db_label for f in filtered_results]))
|
254
|
+
+ 3
|
215
255
|
)
|
216
256
|
max_case = max(
|
217
257
|
map(len, [f.task_config.case_config.case_id.name for f in filtered_results])
|
218
258
|
)
|
219
259
|
max_load_dur = (
|
220
|
-
|
260
|
+
max(map(len, [str(f.metrics.load_duration) for f in filtered_results])) + 3
|
221
261
|
)
|
222
262
|
max_qps = max(map(len, [str(f.metrics.qps) for f in filtered_results])) + 3
|
223
263
|
max_recall = (
|
224
|
-
|
264
|
+
max(map(len, [str(f.metrics.recall) for f in filtered_results])) + 3
|
225
265
|
)
|
226
266
|
|
227
267
|
max_db_labels = 8 if max_db_labels < 8 else max_db_labels
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: vectordb-bench
|
3
|
-
Version: 0.0.
|
3
|
+
Version: 0.0.11
|
4
4
|
Summary: VectorDBBench is not just an offering of benchmark results for mainstream vector databases and cloud services, it's your go-to tool for the ultimate performance and cost-effectiveness comparison. Designed with ease-of-use in mind, VectorDBBench is devised to help users, even non-professionals, reproduce results or test new systems, making the hunt for the optimal choice amongst a plethora of cloud services and open-source vector databases a breeze.
|
5
5
|
Author-email: XuanYang-cn <xuan.yang@zilliz.com>
|
6
6
|
Project-URL: repository, https://github.com/zilliztech/VectorDBBench
|
@@ -10,6 +10,7 @@ Classifier: Operating System :: OS Independent
|
|
10
10
|
Requires-Python: >=3.11
|
11
11
|
Description-Content-Type: text/markdown
|
12
12
|
License-File: LICENSE
|
13
|
+
Requires-Dist: click
|
13
14
|
Requires-Dist: pytz
|
14
15
|
Requires-Dist: streamlit-autorefresh
|
15
16
|
Requires-Dist: streamlit !=1.34.0
|
@@ -37,6 +38,7 @@ Requires-Dist: redis ; extra == 'all'
|
|
37
38
|
Requires-Dist: chromadb ; extra == 'all'
|
38
39
|
Requires-Dist: psycopg2 ; extra == 'all'
|
39
40
|
Requires-Dist: psycopg ; extra == 'all'
|
41
|
+
Requires-Dist: psycopg-binary ; extra == 'all'
|
40
42
|
Provides-Extra: chromadb
|
41
43
|
Requires-Dist: chromadb ; extra == 'chromadb'
|
42
44
|
Provides-Extra: elastic
|
@@ -44,8 +46,9 @@ Requires-Dist: elasticsearch ; extra == 'elastic'
|
|
44
46
|
Provides-Extra: pgvecto_rs
|
45
47
|
Requires-Dist: psycopg2 ; extra == 'pgvecto_rs'
|
46
48
|
Provides-Extra: pgvector
|
47
|
-
Requires-Dist: pgvector ; extra == 'pgvector'
|
48
49
|
Requires-Dist: psycopg ; extra == 'pgvector'
|
50
|
+
Requires-Dist: psycopg-binary ; extra == 'pgvector'
|
51
|
+
Requires-Dist: pgvector ; extra == 'pgvector'
|
49
52
|
Provides-Extra: pinecone
|
50
53
|
Requires-Dist: pinecone-client ; extra == 'pinecone'
|
51
54
|
Provides-Extra: qdrant
|
@@ -57,6 +60,7 @@ Requires-Dist: ruff ; extra == 'test'
|
|
57
60
|
Requires-Dist: pytest ; extra == 'test'
|
58
61
|
Provides-Extra: weaviate
|
59
62
|
Requires-Dist: weaviate-client ; extra == 'weaviate'
|
63
|
+
Provides-Extra: zilliz_cloud
|
60
64
|
|
61
65
|
# VectorDBBench: A Benchmark Tool for VectorDB
|
62
66
|
|
@@ -105,6 +109,115 @@ All the database client supported
|
|
105
109
|
``` shell
|
106
110
|
init_bench
|
107
111
|
```
|
112
|
+
|
113
|
+
OR:
|
114
|
+
|
115
|
+
### Run from the command line.
|
116
|
+
|
117
|
+
``` shell
|
118
|
+
vectordbbench [OPTIONS] COMMAND [ARGS]...
|
119
|
+
```
|
120
|
+
To list the clients that are runnable via the commandline option, execute: `vectordbbench --help`
|
121
|
+
``` text
|
122
|
+
$ vectordbbench --help
|
123
|
+
Usage: vectordbbench [OPTIONS] COMMAND [ARGS]...
|
124
|
+
|
125
|
+
Options:
|
126
|
+
--help Show this message and exit.
|
127
|
+
|
128
|
+
Commands:
|
129
|
+
pgvectorhnsw
|
130
|
+
pgvectorivfflat
|
131
|
+
test
|
132
|
+
weaviate
|
133
|
+
```
|
134
|
+
To list the options for each command, execute `vectordbbench [command] --help`
|
135
|
+
|
136
|
+
```text
|
137
|
+
$ vectordbbench pgvectorhnsw --help
|
138
|
+
Usage: vectordbbench pgvectorhnsw [OPTIONS]
|
139
|
+
|
140
|
+
Options:
|
141
|
+
--config-file PATH Read configuration from yaml file
|
142
|
+
--drop-old / --skip-drop-old Drop old or skip [default: drop-old]
|
143
|
+
--load / --skip-load Load or skip [default: load]
|
144
|
+
--search-serial / --skip-search-serial
|
145
|
+
Search serial or skip [default: search-
|
146
|
+
serial]
|
147
|
+
--search-concurrent / --skip-search-concurrent
|
148
|
+
Search concurrent or skip [default: search-
|
149
|
+
concurrent]
|
150
|
+
--case-type [CapacityDim128|CapacityDim960|Performance768D100M|Performance768D10M|Performance768D1M|Performance768D10M1P|Performance768D1M1P|Performance768D10M99P|Performance768D1M99P|Performance1536D500K|Performance1536D5M|Performance1536D500K1P|Performance1536D5M1P|Performance1536D500K99P|Performance1536D5M99P|Performance1536D50K]
|
151
|
+
Case type
|
152
|
+
--db-label TEXT Db label, default: date in ISO format
|
153
|
+
[default: 2024-05-20T20:26:31.113290]
|
154
|
+
--dry-run Print just the configuration and exit
|
155
|
+
without running the tasks
|
156
|
+
--k INTEGER K value for number of nearest neighbors to
|
157
|
+
search [default: 100]
|
158
|
+
--concurrency-duration INTEGER Adjusts the duration in seconds of each
|
159
|
+
concurrency search [default: 30]
|
160
|
+
--num-concurrency TEXT Comma-separated list of concurrency values
|
161
|
+
to test during concurrent search [default:
|
162
|
+
1,10,20]
|
163
|
+
--user-name TEXT Db username [required]
|
164
|
+
--password TEXT Db password [required]
|
165
|
+
--host TEXT Db host [required]
|
166
|
+
--db-name TEXT Db name [required]
|
167
|
+
--maintenance-work-mem TEXT Sets the maximum memory to be used for
|
168
|
+
maintenance operations (index creation). Can
|
169
|
+
be entered as string with unit like '64GB'
|
170
|
+
or as an integer number of KB.This will set
|
171
|
+
the parameters:
|
172
|
+
max_parallel_maintenance_workers,
|
173
|
+
max_parallel_workers &
|
174
|
+
table(parallel_workers)
|
175
|
+
--max-parallel-workers INTEGER Sets the maximum number of parallel
|
176
|
+
processes per maintenance operation (index
|
177
|
+
creation)
|
178
|
+
--m INTEGER hnsw m
|
179
|
+
--ef-construction INTEGER hnsw ef-construction
|
180
|
+
--ef-search INTEGER hnsw ef-search
|
181
|
+
--help Show this message and exit.
|
182
|
+
```
|
183
|
+
#### Using a configuration file.
|
184
|
+
|
185
|
+
The vectordbbench command can optionally read some or all the options from a yaml formatted configuration file.
|
186
|
+
|
187
|
+
By default, configuration files are expected to be in vectordb_bench/config-files/, this can be overridden by setting
|
188
|
+
the environment variable CONFIG_LOCAL_DIR or by passing the full path to the file.
|
189
|
+
|
190
|
+
The required format is:
|
191
|
+
```yaml
|
192
|
+
commandname:
|
193
|
+
parameter_name: parameter_value
|
194
|
+
parameter_name: parameter_value
|
195
|
+
```
|
196
|
+
Example:
|
197
|
+
```yaml
|
198
|
+
pgvectorhnsw:
|
199
|
+
db_label: pgConfigTest
|
200
|
+
user_name: vectordbbench
|
201
|
+
password: vectordbbench
|
202
|
+
db_name: vectordbbench
|
203
|
+
host: localhost
|
204
|
+
m: 16
|
205
|
+
ef_construction: 128
|
206
|
+
ef_search: 128
|
207
|
+
milvushnsw:
|
208
|
+
skip_search_serial: True
|
209
|
+
case_type: Performance1536D50K
|
210
|
+
uri: http://localhost:19530
|
211
|
+
m: 16
|
212
|
+
ef_construction: 128
|
213
|
+
ef_search: 128
|
214
|
+
drop_old: False
|
215
|
+
load: False
|
216
|
+
```
|
217
|
+
> Notes:
|
218
|
+
> - Options passed on the command line will override the configuration file*
|
219
|
+
> - Parameter names use an _ not -
|
220
|
+
|
108
221
|
## What is VectorDBBench
|
109
222
|
VectorDBBench is not just an offering of benchmark results for mainstream vector databases and cloud services, it's your go-to tool for the ultimate performance and cost-effectiveness comparison. Designed with ease-of-use in mind, VectorDBBench is devised to help users, even non-professionals, reproduce results or test new systems, making the hunt for the optimal choice amongst a plethora of cloud services and open-source vector databases a breeze.
|
110
223
|
|
@@ -280,6 +393,7 @@ class NewDBCaseConfig(DBCaseConfig):
|
|
280
393
|
# Implement optional case-specific configuration fields
|
281
394
|
# ...
|
282
395
|
```
|
396
|
+
|
283
397
|
**Step 3: Importing the DB Client and Updating Initialization**
|
284
398
|
|
285
399
|
In this final step, you will import your DB client into clients/__init__.py and update the initialization process.
|
@@ -318,6 +432,83 @@ class DB(Enum):
|
|
318
432
|
return NewClientCaseConfig
|
319
433
|
|
320
434
|
```
|
435
|
+
**Step 4: Implement new_client/cli.py and vectordb_bench/cli/vectordbbench.py**
|
436
|
+
|
437
|
+
In this (optional, but encouraged) step you will enable the test to be run from the command line.
|
438
|
+
1. Navigate to the vectordb_bench/backend/clients/"client" directory.
|
439
|
+
2. Inside the "client" folder, create a cli.py file.
|
440
|
+
Using zilliz as an example cli.py:
|
441
|
+
```python
|
442
|
+
from typing import Annotated, Unpack
|
443
|
+
|
444
|
+
import click
|
445
|
+
import os
|
446
|
+
from pydantic import SecretStr
|
447
|
+
|
448
|
+
from vectordb_bench.cli.cli import (
|
449
|
+
CommonTypedDict,
|
450
|
+
cli,
|
451
|
+
click_parameter_decorators_from_typed_dict,
|
452
|
+
run,
|
453
|
+
)
|
454
|
+
from vectordb_bench.backend.clients import DB
|
455
|
+
|
456
|
+
|
457
|
+
class ZillizTypedDict(CommonTypedDict):
|
458
|
+
uri: Annotated[
|
459
|
+
str, click.option("--uri", type=str, help="uri connection string", required=True)
|
460
|
+
]
|
461
|
+
user_name: Annotated[
|
462
|
+
str, click.option("--user-name", type=str, help="Db username", required=True)
|
463
|
+
]
|
464
|
+
password: Annotated[
|
465
|
+
str,
|
466
|
+
click.option("--password",
|
467
|
+
type=str,
|
468
|
+
help="Zilliz password",
|
469
|
+
default=lambda: os.environ.get("ZILLIZ_PASSWORD", ""),
|
470
|
+
show_default="$ZILLIZ_PASSWORD",
|
471
|
+
),
|
472
|
+
]
|
473
|
+
level: Annotated[
|
474
|
+
str,
|
475
|
+
click.option("--level", type=str, help="Zilliz index level", required=False),
|
476
|
+
]
|
477
|
+
|
478
|
+
|
479
|
+
@cli.command()
|
480
|
+
@click_parameter_decorators_from_typed_dict(ZillizTypedDict)
|
481
|
+
def ZillizAutoIndex(**parameters: Unpack[ZillizTypedDict]):
|
482
|
+
from .config import ZillizCloudConfig, AutoIndexConfig
|
483
|
+
|
484
|
+
run(
|
485
|
+
db=DB.ZillizCloud,
|
486
|
+
db_config=ZillizCloudConfig(
|
487
|
+
db_label=parameters["db_label"],
|
488
|
+
uri=SecretStr(parameters["uri"]),
|
489
|
+
user=parameters["user_name"],
|
490
|
+
password=SecretStr(parameters["password"]),
|
491
|
+
),
|
492
|
+
db_case_config=AutoIndexConfig(
|
493
|
+
params={parameters["level"]},
|
494
|
+
),
|
495
|
+
**parameters,
|
496
|
+
)
|
497
|
+
```
|
498
|
+
3. Update cli by adding:
|
499
|
+
1. Add database specific options as an Annotated TypedDict, see ZillizTypedDict above.
|
500
|
+
2. Add index configuration specific options as an Annotated TypedDict. (example: vectordb_bench/backend/clients/pgvector/cli.py)
|
501
|
+
1. May not be needed if there is only one index config.
|
502
|
+
2. Repeat for each index configuration, nesting them if possible.
|
503
|
+
2. Add a index config specific function for each index type, see Zilliz above. The function name, in lowercase, will be the command name passed to the vectordbbench command.
|
504
|
+
3. Update db_config and db_case_config to match client requirements
|
505
|
+
4. Continue to add new functions for each index config.
|
506
|
+
5. Import the client cli module and command to vectordb_bench/cli/vectordbbench.py (for databases with multiple commands (index configs), this only needs to be done for one command)
|
507
|
+
|
508
|
+
> cli modules with multiple index configs:
|
509
|
+
> - pgvector: vectordb_bench/backend/clients/pgvector/cli.py
|
510
|
+
> - milvus: vectordb_bench/backend/clients/milvus/cli.py
|
511
|
+
|
321
512
|
That's it! You have successfully added a new DB client to the vectordb_bench project.
|
322
513
|
|
323
514
|
## Rules
|
@@ -1,47 +1,59 @@
|
|
1
|
-
vectordb_bench/__init__.py,sha256=
|
1
|
+
vectordb_bench/__init__.py,sha256=cvCI4khbLpcbxYMCwRsvG4THZa4GayqQ9aE1iXbFuUU,2052
|
2
2
|
vectordb_bench/__main__.py,sha256=YJOTn5MlbmLyr3PRsecY6fj7igHLB6_D3y1HwF_sO20,848
|
3
3
|
vectordb_bench/base.py,sha256=d34WCGXZI1u5RGQtqrPHd3HbOF5AmioFrM2j30Aj1sY,130
|
4
|
-
vectordb_bench/interface.py,sha256=
|
4
|
+
vectordb_bench/interface.py,sha256=ZT3pseyq--TuxtopdP2hRut-6vIInKo62pvAl2zBD10,9708
|
5
5
|
vectordb_bench/log_util.py,sha256=nMnW-sN24WyURcI07t-WA3q2N5R-YIvFgboRsSrNJDg,2906
|
6
|
-
vectordb_bench/metric.py,sha256
|
7
|
-
vectordb_bench/models.py,sha256=
|
6
|
+
vectordb_bench/metric.py,sha256=osb58NvGGmqs3EKTfFujO7Qq5fAhGO9AOkCsziKgEUs,1976
|
7
|
+
vectordb_bench/models.py,sha256=ye39851RviGwZJ-CZ0INjKDjAIuc0uX2maYPSXfbVWI,9744
|
8
8
|
vectordb_bench/backend/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
9
9
|
vectordb_bench/backend/assembler.py,sha256=W03o8xaOoa5CTBr5586nIzm4sEJ4a-85sfcEG-d60VY,2000
|
10
|
-
vectordb_bench/backend/cases.py,sha256=
|
10
|
+
vectordb_bench/backend/cases.py,sha256=879vcSRzyoLko1tdtxQDM15Id8vQGpu9e-t_e6m7bi4,14518
|
11
11
|
vectordb_bench/backend/data_source.py,sha256=j4-eD0nIe7Y6fSM5WKEij3GfhyU_YOQ3L5Tyl-1GxX0,5446
|
12
12
|
vectordb_bench/backend/dataset.py,sha256=E-ZdYNXwCN3Fa4b_9rvhbiJgPLiKXQmi_fqZk0r7AHk,8295
|
13
13
|
vectordb_bench/backend/result_collector.py,sha256=jdQf5-q1z5y07SKy9Sig1wFROmm-p9x_Y81fId0sjaU,807
|
14
|
-
vectordb_bench/backend/task_runner.py,sha256=
|
14
|
+
vectordb_bench/backend/task_runner.py,sha256=Y1HYWvWlIo_4pe0EMLuAN8bWj2xhqfbw59afbCmZeAI,11855
|
15
15
|
vectordb_bench/backend/utils.py,sha256=2UixYyfKvl8zRiashywB1l6hTI3jMtiZhiVm_bXHV1Y,1811
|
16
|
-
vectordb_bench/backend/clients/__init__.py,sha256=
|
17
|
-
vectordb_bench/backend/clients/api.py,sha256=
|
16
|
+
vectordb_bench/backend/clients/__init__.py,sha256=M7RT9-Px4iwEYnBGeo3tywk6fRlfmFWynZxuymenXVY,4600
|
17
|
+
vectordb_bench/backend/clients/api.py,sha256=YQlvABf8KfXcDpJzFglQKmFN3zmJuFKG4ihabxYIrDU,5634
|
18
18
|
vectordb_bench/backend/clients/chroma/chroma.py,sha256=Rg-GVWSDLdw32XfltJQlS3JHtNX1BJYDHxTSy086tKA,3739
|
19
19
|
vectordb_bench/backend/clients/chroma/config.py,sha256=7Tp_di0cdBsh4kX-IijTLsmFK2JJpcrXP2K6e24OUGc,345
|
20
20
|
vectordb_bench/backend/clients/elastic_cloud/config.py,sha256=xkaBNtsayByelVLda8LiSEwxjQjESpijJ8IFOh03f_0,1598
|
21
21
|
vectordb_bench/backend/clients/elastic_cloud/elastic_cloud.py,sha256=rWHthqGEpYwwka-0bsjyWfCwTAsYKNPvB17qe0Z1VDQ,5709
|
22
|
-
vectordb_bench/backend/clients/milvus/
|
23
|
-
vectordb_bench/backend/clients/milvus/
|
22
|
+
vectordb_bench/backend/clients/milvus/cli.py,sha256=QqzYIOeUSXEvdLH0_YUMhwDHUDJirTNKeUxrJQIqSdw,8506
|
23
|
+
vectordb_bench/backend/clients/milvus/config.py,sha256=AZ4QHoufRIjsX2eVrtnug8SeYnuHeBMna_34OQNFxz0,6847
|
24
|
+
vectordb_bench/backend/clients/milvus/milvus.py,sha256=BzOySmlYCQnNScazK9XBjKPh3X99jZSm0W3-IigRAYY,7653
|
24
25
|
vectordb_bench/backend/clients/pgvecto_rs/config.py,sha256=scdEXN6RT4yGA5j8fXSAooAvB550WQQ1JnN7SBQCUZM,3648
|
25
26
|
vectordb_bench/backend/clients/pgvecto_rs/pgvecto_rs.py,sha256=Od9g1wIhgslXBavEwCV8-LYsclqOJB3OwpeU6ZA265k,6195
|
26
|
-
vectordb_bench/backend/clients/pgvector/
|
27
|
-
vectordb_bench/backend/clients/pgvector/
|
27
|
+
vectordb_bench/backend/clients/pgvector/cli.py,sha256=4fDweywfb57dzf0HzQuNk_2Xutjo_XKi91mHIuYOBQM,3582
|
28
|
+
vectordb_bench/backend/clients/pgvector/config.py,sha256=jbSPXd2SiFTwuRzEzN_c7oShtb2Fz-hy2VM5lI-bIGw,7202
|
29
|
+
vectordb_bench/backend/clients/pgvector/pgvector.py,sha256=FtCvhjAr8kYnLLyBLHy3jLuMYH14dSJo3zUt6_mT6T0,12500
|
28
30
|
vectordb_bench/backend/clients/pinecone/config.py,sha256=4WvMu-9zxgoGfP5GPb7hpW-PRYEORADhlQvMa8JJh8k,384
|
29
31
|
vectordb_bench/backend/clients/pinecone/pinecone.py,sha256=U31QbXLuTcNPp7PK24glE6LM23-YpbxK_Kj-NmEwoZY,4078
|
30
32
|
vectordb_bench/backend/clients/qdrant_cloud/config.py,sha256=jk6gLcjZnjV0kQlc4RrrcXyekF6qkwzgWOYD3Mm8AOU,1385
|
31
33
|
vectordb_bench/backend/clients/qdrant_cloud/qdrant_cloud.py,sha256=lgMPwP37A-NQWHtxL58qojsUcL68EtrH_0Xk_hrs7OY,5224
|
34
|
+
vectordb_bench/backend/clients/redis/cli.py,sha256=xlbXcQhLyZbsFvt9XhgGehAw4a3BFblnkUt4rBCUvgk,1991
|
32
35
|
vectordb_bench/backend/clients/redis/config.py,sha256=8DBtsMRjuAJ0Vy5wsrKOkt35xcRFA8L-xTYoErk0Bzc,351
|
33
36
|
vectordb_bench/backend/clients/redis/redis.py,sha256=35g9KPL8_zGeo7eYdYRaabq5dlkDXwMQ9TD8YsVser0,5976
|
37
|
+
vectordb_bench/backend/clients/test/cli.py,sha256=8TByn_5OhO_rMyJV1vdRmKYbtsrXXyFK1hh3Ii2QOhw,552
|
38
|
+
vectordb_bench/backend/clients/test/config.py,sha256=1olqOWmkcucyd3EMMSkCT-0lbPyf4v5RHfTsGMuWSvc,412
|
39
|
+
vectordb_bench/backend/clients/test/test.py,sha256=b9-LOih079Lj_CMcZyno1vpKaU_Fooi8lpY3rF-3BsU,1479
|
40
|
+
vectordb_bench/backend/clients/weaviate_cloud/cli.py,sha256=i8evyzTPFEgnevYYe46ZOjpFOYd3IpZ5Q-wVeuh_vgk,1060
|
34
41
|
vectordb_bench/backend/clients/weaviate_cloud/config.py,sha256=jFhZ9cwMRoLxppR4udboSc4EPO-bnv5A5ITR9akevHE,1247
|
35
42
|
vectordb_bench/backend/clients/weaviate_cloud/weaviate_cloud.py,sha256=QWXXSE02iZOOHy2fcBjJPYwGP5d4qnFvh9ha_2FAQZI,4991
|
43
|
+
vectordb_bench/backend/clients/zilliz_cloud/cli.py,sha256=V8XnjrM4IOexqJksQCBgEYyYfQJPXouKeTthEAVRlYU,1569
|
36
44
|
vectordb_bench/backend/clients/zilliz_cloud/config.py,sha256=3Tk7X4r0n2SLzan110xlF63otVGjCKe28CVDfCEI04c,910
|
37
45
|
vectordb_bench/backend/clients/zilliz_cloud/zilliz_cloud.py,sha256=4JcwiVEJcdEykW6n471nfHeIlmhIDa-gOZ7G5H_4krY,681
|
38
46
|
vectordb_bench/backend/runner/__init__.py,sha256=5dZfPky8pY9Bi9HD5GZ3Fge8V2FJWrkGkQUkNL2v1t0,230
|
39
|
-
vectordb_bench/backend/runner/mp_runner.py,sha256=
|
40
|
-
vectordb_bench/backend/runner/serial_runner.py,sha256=
|
47
|
+
vectordb_bench/backend/runner/mp_runner.py,sha256=FWhCU6y97cxbJSSBfHWcif7t4ew6SOmP3d94C1vKvfg,5406
|
48
|
+
vectordb_bench/backend/runner/serial_runner.py,sha256=ku1Dtps9JcmwCwZq7eDw0pcP9IN2Zjjg-1VJumXYJpA,9414
|
49
|
+
vectordb_bench/cli/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
50
|
+
vectordb_bench/cli/cli.py,sha256=M6LE3hqmnt7QYrhm2KU2-kFQRpRp86GwkKXHc-XLeHo,10850
|
51
|
+
vectordb_bench/cli/vectordbbench.py,sha256=4DUMN2awCaHt2gRQHz3yCU19Y0hpABPrHPmIVwpfozg,554
|
52
|
+
vectordb_bench/config-files/sample_config.yml,sha256=yw9ZgHczNi9PedNuTVxZKiOTI6AVoQS1h8INNgoDjPk,340
|
41
53
|
vectordb_bench/frontend/utils.py,sha256=BzKR1kMX1ErlXAzkFUb06O2mIcxBbquRzJtxoHgRnKs,162
|
42
54
|
vectordb_bench/frontend/vdb_benchmark.py,sha256=z9VCsMp2Ra43J-oFXRsX98Ww3Xq_koUpZ9PEZoU5qws,1649
|
43
55
|
vectordb_bench/frontend/components/check_results/charts.py,sha256=zbtEyUSk2FJbSlaGw1LH4boSljFXfhVZlC8rrIgVw_4,5113
|
44
|
-
vectordb_bench/frontend/components/check_results/data.py,sha256=
|
56
|
+
vectordb_bench/frontend/components/check_results/data.py,sha256=HFqjYp2UitWcPF1LMka3xCLh2DBnU-EohFlxjUs-RlQ,3282
|
45
57
|
vectordb_bench/frontend/components/check_results/expanderStyle.py,sha256=dETG2-smcWntQTARUXzbSv90Py5yN74mGXoBGn4loow,1295
|
46
58
|
vectordb_bench/frontend/components/check_results/filters.py,sha256=41KXtVyC1BMGRXh3OoHodMXNw3eZLQHSzNZ91eviIk4,4225
|
47
59
|
vectordb_bench/frontend/components/check_results/footer.py,sha256=Nh1RzorDg-8R5ewp_UGFnUqWaAEZ7xZ1RpqHDew1mGY,395
|
@@ -49,19 +61,23 @@ vectordb_bench/frontend/components/check_results/headerIcon.py,sha256=Q0mbnav2FW
|
|
49
61
|
vectordb_bench/frontend/components/check_results/nav.py,sha256=DQl74rujw70ayh37PQaiO4AdtVZ95-OtTMEtw_Ui7hE,685
|
50
62
|
vectordb_bench/frontend/components/check_results/priceTable.py,sha256=E7sxhSCjkBOMlQFHe6zFizhQLsJ-mUcXUbNj4FpqSUE,1308
|
51
63
|
vectordb_bench/frontend/components/check_results/stPageConfig.py,sha256=rAL2prWx0hT7Q3QWz6ALyKUMNladX6U48GlKvVq3DFA,429
|
64
|
+
vectordb_bench/frontend/components/concurrent/charts.py,sha256=EKVpQuJXVb8ju-F_B6yvEUXDmFOcPoSyCDKkqUg7814,2416
|
52
65
|
vectordb_bench/frontend/components/get_results/saveAsImage.py,sha256=MdQCqjrX5rQyK34XfTkVykVLOcOouIz4enMR1P5GBiY,1457
|
53
66
|
vectordb_bench/frontend/components/run_test/autoRefresh.py,sha256=ofsl2sdmBd2y9O_xaJDr58NPycJsDwCdf2rEyE_f6e8,288
|
54
67
|
vectordb_bench/frontend/components/run_test/caseSelector.py,sha256=B1rtbSDlzPHbdPK52mxFf3FbF4qYs1J9YNYdSnTxCRg,3945
|
55
68
|
vectordb_bench/frontend/components/run_test/dbConfigSetting.py,sha256=hoelDzXP-J2EmzvgGh6Euk7uBfu9iw0YGM7lxjo0cb8,2074
|
56
|
-
vectordb_bench/frontend/components/run_test/dbSelector.py,sha256=
|
69
|
+
vectordb_bench/frontend/components/run_test/dbSelector.py,sha256=GU_Fr3tk5xJASVQdgBAlwuoy55RoVIM5nT-sjDEe4mY,1452
|
57
70
|
vectordb_bench/frontend/components/run_test/generateTasks.py,sha256=9r1vb03FMSJ_vG4px_wHMKMB_RWaKv6ttv0FptsytgA,812
|
58
71
|
vectordb_bench/frontend/components/run_test/hideSidebar.py,sha256=vb5kzIMmbMqWX67qFEHek21X4sGO_tPyn_uPqUEtp3Q,234
|
59
|
-
vectordb_bench/frontend/components/run_test/submitTask.py,sha256=
|
60
|
-
vectordb_bench/frontend/
|
72
|
+
vectordb_bench/frontend/components/run_test/submitTask.py,sha256=xte8qcIWYra9Yg8iC5sGIDt24hu_jPH3_SWzDeTcrKk,3340
|
73
|
+
vectordb_bench/frontend/components/tables/data.py,sha256=pVG_hb4bTMLfUt10NUCJSqcFkPmnN7i9jTw9DcWizpI,1364
|
74
|
+
vectordb_bench/frontend/const/dbCaseConfigs.py,sha256=JP2Wyguss9o4stDgWaZwO2Cr-RhKhG9kDKZ09JSR2I8,18395
|
61
75
|
vectordb_bench/frontend/const/dbPrices.py,sha256=10aBKjVcEg8y7TPSda28opmBM1KmXNrvbU9WM_BsZcE,176
|
62
76
|
vectordb_bench/frontend/const/styles.py,sha256=B2ycRJ6CHBzew8B2P7z9nnzY-20W5SlggXLI2olfxac,2174
|
77
|
+
vectordb_bench/frontend/pages/concurrent.py,sha256=lfCBoNNJalPi9vLfs90xp6cCIgiJegqGoHwAwfhwxE8,2290
|
63
78
|
vectordb_bench/frontend/pages/quries_per_dollar.py,sha256=SpXwKwdarwPz7RtF_qxyODfwARBb3VI9iKElYtnwEVs,2422
|
64
79
|
vectordb_bench/frontend/pages/run_test.py,sha256=nkXTVBauuCgkJ6WRSQ_-qa83RAmc0Z3VH2uTDrNCQL8,2045
|
80
|
+
vectordb_bench/frontend/pages/tables.py,sha256=Vm16SdaJ29YB_P8ofO5JEz--3sNBKrB13yFkxehZ_lU,564
|
65
81
|
vectordb_bench/results/dbPrices.json,sha256=VoaOjqbWyTdEMLXuzerL5xR46QbxOWFmxCf2mPhjJV4,576
|
66
82
|
vectordb_bench/results/getLeaderboardData.py,sha256=viQ4iSDwI5BM0eNGy9p2kEuXsmGhW3oaUo0eK9F087s,1760
|
67
83
|
vectordb_bench/results/leaderboard.json,sha256=N1yPKoiz2GJ8SrGvhV7WDDlBgTxtJ1GpQwq4Kec5ExA,66580
|
@@ -80,9 +96,9 @@ vectordb_bench/results/WeaviateCloud/result_20230808_standard_weaviatecloud.json
|
|
80
96
|
vectordb_bench/results/ZillizCloud/result_20230727_standard_zillizcloud.json,sha256=wzrlCEsqaoy4EujDNeLebCKZIC__aXNe2NhFDEdewKo,17398
|
81
97
|
vectordb_bench/results/ZillizCloud/result_20230808_standard_zillizcloud.json,sha256=G44g4aTJfeC0FyqosPEtaC-iy8JUX-bVpnA6dn0iiYU,14969
|
82
98
|
vectordb_bench/results/ZillizCloud/result_20240105_standard_202401_zillizcloud.json,sha256=5R5PGJheoCOksx9uOXeSu8Z24Zc6Xp9LUkgJ-OzGAtM,41007
|
83
|
-
vectordb_bench-0.0.
|
84
|
-
vectordb_bench-0.0.
|
85
|
-
vectordb_bench-0.0.
|
86
|
-
vectordb_bench-0.0.
|
87
|
-
vectordb_bench-0.0.
|
88
|
-
vectordb_bench-0.0.
|
99
|
+
vectordb_bench-0.0.11.dist-info/LICENSE,sha256=HXbxhrb5u5SegVzeLNF_voVgRsJMavcLaOmD1N0lZkM,1067
|
100
|
+
vectordb_bench-0.0.11.dist-info/METADATA,sha256=YqAAFU7oDb5YOqiCwHcbWt7YSsDfrvzPkhIEv4RATjQ,30175
|
101
|
+
vectordb_bench-0.0.11.dist-info/WHEEL,sha256=mguMlWGMX-VHnMpKOjjQidIo1ssRlCFu4a4mBpz1s2M,91
|
102
|
+
vectordb_bench-0.0.11.dist-info/entry_points.txt,sha256=Qzw6gVx96ui8esG21H6yHsI6nboEohRmV424TYhQNrA,113
|
103
|
+
vectordb_bench-0.0.11.dist-info/top_level.txt,sha256=jnhZFZAuKX1J60yt-XOeBZ__ctiZMvoC_s0RFq29lpM,15
|
104
|
+
vectordb_bench-0.0.11.dist-info/RECORD,,
|
File without changes
|
File without changes
|