vectordb-bench 0.0.13__py3-none-any.whl → 0.0.15__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vectordb_bench/__init__.py +14 -13
- vectordb_bench/backend/clients/__init__.py +13 -0
- vectordb_bench/backend/clients/api.py +2 -0
- vectordb_bench/backend/clients/aws_opensearch/aws_opensearch.py +47 -6
- vectordb_bench/backend/clients/aws_opensearch/config.py +12 -6
- vectordb_bench/backend/clients/aws_opensearch/run.py +34 -3
- vectordb_bench/backend/clients/pgdiskann/cli.py +99 -0
- vectordb_bench/backend/clients/pgdiskann/config.py +145 -0
- vectordb_bench/backend/clients/pgdiskann/pgdiskann.py +350 -0
- vectordb_bench/backend/clients/pgvector/cli.py +62 -1
- vectordb_bench/backend/clients/pgvector/config.py +48 -10
- vectordb_bench/backend/clients/pgvector/pgvector.py +145 -26
- vectordb_bench/backend/clients/pgvectorscale/cli.py +108 -0
- vectordb_bench/backend/clients/pgvectorscale/pgvectorscale.py +22 -4
- vectordb_bench/backend/clients/pinecone/config.py +0 -2
- vectordb_bench/backend/clients/pinecone/pinecone.py +34 -36
- vectordb_bench/backend/clients/redis/cli.py +8 -0
- vectordb_bench/backend/clients/redis/config.py +37 -6
- vectordb_bench/backend/clients/weaviate_cloud/weaviate_cloud.py +1 -1
- vectordb_bench/backend/runner/mp_runner.py +2 -1
- vectordb_bench/cli/cli.py +137 -0
- vectordb_bench/cli/vectordbbench.py +4 -1
- vectordb_bench/frontend/components/check_results/charts.py +9 -6
- vectordb_bench/frontend/components/concurrent/charts.py +3 -6
- vectordb_bench/frontend/components/run_test/caseSelector.py +6 -0
- vectordb_bench/frontend/config/dbCaseConfigs.py +165 -1
- vectordb_bench/frontend/pages/quries_per_dollar.py +13 -5
- vectordb_bench/frontend/vdb_benchmark.py +11 -3
- vectordb_bench/models.py +13 -3
- vectordb_bench/results/Milvus/result_20230727_standard_milvus.json +53 -1
- vectordb_bench/results/Milvus/result_20230808_standard_milvus.json +48 -0
- vectordb_bench/results/ZillizCloud/result_20230727_standard_zillizcloud.json +29 -1
- vectordb_bench/results/ZillizCloud/result_20230808_standard_zillizcloud.json +24 -0
- vectordb_bench/results/ZillizCloud/result_20240105_standard_202401_zillizcloud.json +98 -49
- vectordb_bench/results/getLeaderboardData.py +17 -7
- vectordb_bench/results/leaderboard.json +1 -1
- {vectordb_bench-0.0.13.dist-info → vectordb_bench-0.0.15.dist-info}/METADATA +65 -35
- {vectordb_bench-0.0.13.dist-info → vectordb_bench-0.0.15.dist-info}/RECORD +42 -38
- {vectordb_bench-0.0.13.dist-info → vectordb_bench-0.0.15.dist-info}/WHEEL +1 -1
- {vectordb_bench-0.0.13.dist-info → vectordb_bench-0.0.15.dist-info}/LICENSE +0 -0
- {vectordb_bench-0.0.13.dist-info → vectordb_bench-0.0.15.dist-info}/entry_points.txt +0 -0
- {vectordb_bench-0.0.13.dist-info → vectordb_bench-0.0.15.dist-info}/top_level.txt +0 -0
vectordb_bench/cli/cli.py
CHANGED
@@ -17,6 +17,8 @@ from typing import (
|
|
17
17
|
Any,
|
18
18
|
)
|
19
19
|
import click
|
20
|
+
|
21
|
+
from vectordb_bench.backend.clients.api import MetricType
|
20
22
|
from .. import config
|
21
23
|
from ..backend.clients import DB
|
22
24
|
from ..interface import benchMarkRunner, global_result_future
|
@@ -147,6 +149,37 @@ def parse_task_stages(
|
|
147
149
|
return stages
|
148
150
|
|
149
151
|
|
152
|
+
def check_custom_case_parameters(ctx, param, value):
|
153
|
+
if ctx.params.get("case_type") == "PerformanceCustomDataset":
|
154
|
+
if value is None:
|
155
|
+
raise click.BadParameter("Custom case parameters\
|
156
|
+
\n--custom-case-name\n--custom-dataset-name\n--custom-dataset-dir\n--custom-dataset-size \
|
157
|
+
\n--custom-dataset-dim\n--custom-dataset-file-count\n are required")
|
158
|
+
return value
|
159
|
+
|
160
|
+
|
161
|
+
def get_custom_case_config(parameters: dict) -> dict:
|
162
|
+
custom_case_config = {}
|
163
|
+
if parameters["case_type"] == "PerformanceCustomDataset":
|
164
|
+
custom_case_config = {
|
165
|
+
"name": parameters["custom_case_name"],
|
166
|
+
"description": parameters["custom_case_description"],
|
167
|
+
"load_timeout": parameters["custom_case_load_timeout"],
|
168
|
+
"optimize_timeout": parameters["custom_case_optimize_timeout"],
|
169
|
+
"dataset_config": {
|
170
|
+
"name": parameters["custom_dataset_name"],
|
171
|
+
"dir": parameters["custom_dataset_dir"],
|
172
|
+
"size": parameters["custom_dataset_size"],
|
173
|
+
"dim": parameters["custom_dataset_dim"],
|
174
|
+
"metric_type": parameters["custom_dataset_metric_type"],
|
175
|
+
"file_count": parameters["custom_dataset_file_count"],
|
176
|
+
"use_shuffled": parameters["custom_dataset_use_shuffled"],
|
177
|
+
"with_gt": parameters["custom_dataset_with_gt"],
|
178
|
+
}
|
179
|
+
}
|
180
|
+
return custom_case_config
|
181
|
+
|
182
|
+
|
150
183
|
log = logging.getLogger(__name__)
|
151
184
|
|
152
185
|
|
@@ -205,6 +238,7 @@ class CommonTypedDict(TypedDict):
|
|
205
238
|
click.option(
|
206
239
|
"--case-type",
|
207
240
|
type=click.Choice([ct.name for ct in CaseType if ct.name != "Custom"]),
|
241
|
+
is_eager=True,
|
208
242
|
default="Performance1536D50K",
|
209
243
|
help="Case type",
|
210
244
|
),
|
@@ -258,6 +292,108 @@ class CommonTypedDict(TypedDict):
|
|
258
292
|
callback=lambda *args: list(map(int, click_arg_split(*args))),
|
259
293
|
),
|
260
294
|
]
|
295
|
+
custom_case_name: Annotated[
|
296
|
+
str,
|
297
|
+
click.option(
|
298
|
+
"--custom-case-name",
|
299
|
+
help="Custom dataset case name",
|
300
|
+
callback=check_custom_case_parameters,
|
301
|
+
)
|
302
|
+
]
|
303
|
+
custom_case_description: Annotated[
|
304
|
+
str,
|
305
|
+
click.option(
|
306
|
+
"--custom-case-description",
|
307
|
+
help="Custom dataset case description",
|
308
|
+
default="This is a customized dataset.",
|
309
|
+
show_default=True,
|
310
|
+
)
|
311
|
+
]
|
312
|
+
custom_case_load_timeout: Annotated[
|
313
|
+
int,
|
314
|
+
click.option(
|
315
|
+
"--custom-case-load-timeout",
|
316
|
+
help="Custom dataset case load timeout",
|
317
|
+
default=36000,
|
318
|
+
show_default=True,
|
319
|
+
)
|
320
|
+
]
|
321
|
+
custom_case_optimize_timeout: Annotated[
|
322
|
+
int,
|
323
|
+
click.option(
|
324
|
+
"--custom-case-optimize-timeout",
|
325
|
+
help="Custom dataset case optimize timeout",
|
326
|
+
default=36000,
|
327
|
+
show_default=True,
|
328
|
+
)
|
329
|
+
]
|
330
|
+
custom_dataset_name: Annotated[
|
331
|
+
str,
|
332
|
+
click.option(
|
333
|
+
"--custom-dataset-name",
|
334
|
+
help="Custom dataset name",
|
335
|
+
callback=check_custom_case_parameters,
|
336
|
+
),
|
337
|
+
]
|
338
|
+
custom_dataset_dir: Annotated[
|
339
|
+
str,
|
340
|
+
click.option(
|
341
|
+
"--custom-dataset-dir",
|
342
|
+
help="Custom dataset directory",
|
343
|
+
callback=check_custom_case_parameters,
|
344
|
+
),
|
345
|
+
]
|
346
|
+
custom_dataset_size: Annotated[
|
347
|
+
int,
|
348
|
+
click.option(
|
349
|
+
"--custom-dataset-size",
|
350
|
+
help="Custom dataset size",
|
351
|
+
callback=check_custom_case_parameters,
|
352
|
+
),
|
353
|
+
]
|
354
|
+
custom_dataset_dim: Annotated[
|
355
|
+
int,
|
356
|
+
click.option(
|
357
|
+
"--custom-dataset-dim",
|
358
|
+
help="Custom dataset dimension",
|
359
|
+
callback=check_custom_case_parameters,
|
360
|
+
),
|
361
|
+
]
|
362
|
+
custom_dataset_metric_type: Annotated[
|
363
|
+
str,
|
364
|
+
click.option(
|
365
|
+
"--custom-dataset-metric-type",
|
366
|
+
help="Custom dataset metric type",
|
367
|
+
default=MetricType.COSINE.name,
|
368
|
+
show_default=True,
|
369
|
+
),
|
370
|
+
]
|
371
|
+
custom_dataset_file_count: Annotated[
|
372
|
+
int,
|
373
|
+
click.option(
|
374
|
+
"--custom-dataset-file-count",
|
375
|
+
help="Custom dataset file count",
|
376
|
+
callback=check_custom_case_parameters,
|
377
|
+
),
|
378
|
+
]
|
379
|
+
custom_dataset_use_shuffled: Annotated[
|
380
|
+
bool,
|
381
|
+
click.option(
|
382
|
+
"--custom-dataset-use-shuffled/--skip-custom-dataset-use-shuffled",
|
383
|
+
help="Custom dataset use shuffled",
|
384
|
+
default=False,
|
385
|
+
show_default=True,
|
386
|
+
),
|
387
|
+
]
|
388
|
+
custom_dataset_with_gt: Annotated[
|
389
|
+
bool,
|
390
|
+
click.option(
|
391
|
+
"--custom-dataset-with-gt/--skip-custom-dataset-with-gt",
|
392
|
+
help="Custom dataset with ground truth",
|
393
|
+
default=True,
|
394
|
+
show_default=True,
|
395
|
+
),
|
396
|
+
]
|
261
397
|
|
262
398
|
|
263
399
|
class HNSWBaseTypedDict(TypedDict):
|
@@ -343,6 +479,7 @@ def run(
|
|
343
479
|
concurrency_duration=parameters["concurrency_duration"],
|
344
480
|
num_concurrency=[int(s) for s in parameters["num_concurrency"]],
|
345
481
|
),
|
482
|
+
custom_case=parameters.get("custom_case", {}),
|
346
483
|
),
|
347
484
|
stages=parse_task_stages(
|
348
485
|
(
|
@@ -1,5 +1,7 @@
|
|
1
1
|
from ..backend.clients.pgvector.cli import PgVectorHNSW
|
2
2
|
from ..backend.clients.pgvecto_rs.cli import PgVectoRSHNSW, PgVectoRSIVFFlat
|
3
|
+
from ..backend.clients.pgvectorscale.cli import PgVectorScaleDiskAnn
|
4
|
+
from ..backend.clients.pgdiskann.cli import PgDiskAnn
|
3
5
|
from ..backend.clients.redis.cli import Redis
|
4
6
|
from ..backend.clients.memorydb.cli import MemoryDB
|
5
7
|
from ..backend.clients.test.cli import Test
|
@@ -8,7 +10,6 @@ from ..backend.clients.zilliz_cloud.cli import ZillizAutoIndex
|
|
8
10
|
from ..backend.clients.milvus.cli import MilvusAutoIndex
|
9
11
|
from ..backend.clients.aws_opensearch.cli import AWSOpenSearch
|
10
12
|
|
11
|
-
|
12
13
|
from .cli import cli
|
13
14
|
|
14
15
|
cli.add_command(PgVectorHNSW)
|
@@ -21,6 +22,8 @@ cli.add_command(Test)
|
|
21
22
|
cli.add_command(ZillizAutoIndex)
|
22
23
|
cli.add_command(MilvusAutoIndex)
|
23
24
|
cli.add_command(AWSOpenSearch)
|
25
|
+
cli.add_command(PgVectorScaleDiskAnn)
|
26
|
+
cli.add_command(PgDiskAnn)
|
24
27
|
|
25
28
|
|
26
29
|
if __name__ == "__main__":
|
@@ -1,5 +1,7 @@
|
|
1
1
|
from vectordb_bench.backend.cases import Case
|
2
|
-
from vectordb_bench.frontend.components.check_results.expanderStyle import
|
2
|
+
from vectordb_bench.frontend.components.check_results.expanderStyle import (
|
3
|
+
initMainExpanderStyle,
|
4
|
+
)
|
3
5
|
from vectordb_bench.metric import metricOrder, isLowerIsBetterMetric, metricUnitMap
|
4
6
|
from vectordb_bench.frontend.config.styles import *
|
5
7
|
from vectordb_bench.models import ResultLabel
|
@@ -11,7 +13,7 @@ def drawCharts(st, allData, failedTasks, caseNames: list[str]):
|
|
11
13
|
for caseName in caseNames:
|
12
14
|
chartContainer = st.expander(caseName, True)
|
13
15
|
data = [data for data in allData if data["case_name"] == caseName]
|
14
|
-
drawChart(data, chartContainer)
|
16
|
+
drawChart(data, chartContainer, key_prefix=caseName)
|
15
17
|
|
16
18
|
errorDBs = failedTasks[caseName]
|
17
19
|
showFailedDBs(chartContainer, errorDBs)
|
@@ -35,7 +37,7 @@ def showFailedText(st, text, dbs):
|
|
35
37
|
)
|
36
38
|
|
37
39
|
|
38
|
-
def drawChart(data, st):
|
40
|
+
def drawChart(data, st, key_prefix: str):
|
39
41
|
metricsSet = set()
|
40
42
|
for d in data:
|
41
43
|
metricsSet = metricsSet.union(d["metricsSet"])
|
@@ -43,7 +45,8 @@ def drawChart(data, st):
|
|
43
45
|
|
44
46
|
for i, metric in enumerate(showMetrics):
|
45
47
|
container = st.container()
|
46
|
-
|
48
|
+
key = f"{key_prefix}-{metric}"
|
49
|
+
drawMetricChart(data, metric, container, key=key)
|
47
50
|
|
48
51
|
|
49
52
|
def getLabelToShapeMap(data):
|
@@ -75,7 +78,7 @@ def getLabelToShapeMap(data):
|
|
75
78
|
return labelToShapeMap
|
76
79
|
|
77
80
|
|
78
|
-
def drawMetricChart(data, metric, st):
|
81
|
+
def drawMetricChart(data, metric, st, key: str):
|
79
82
|
dataWithMetric = [d for d in data if d.get(metric, 0) > 1e-7]
|
80
83
|
# dataWithMetric = data
|
81
84
|
if len(dataWithMetric) == 0:
|
@@ -161,4 +164,4 @@ def drawMetricChart(data, metric, st):
|
|
161
164
|
),
|
162
165
|
)
|
163
166
|
|
164
|
-
chart.plotly_chart(fig, use_container_width=True)
|
167
|
+
chart.plotly_chart(fig, use_container_width=True, key=key)
|
@@ -22,7 +22,7 @@ def drawChartsByCase(allData, showCaseNames: list[str], st):
|
|
22
22
|
for caseData in caseDataList
|
23
23
|
for i in range(len(caseData["conc_num_list"]))
|
24
24
|
]
|
25
|
-
drawChart(data, chartContainer)
|
25
|
+
drawChart(data, chartContainer, key=f"{caseName}-qps-p99")
|
26
26
|
|
27
27
|
|
28
28
|
def getRange(metric, data, padding_multipliers):
|
@@ -36,7 +36,7 @@ def getRange(metric, data, padding_multipliers):
|
|
36
36
|
return rangeV
|
37
37
|
|
38
38
|
|
39
|
-
def drawChart(data, st):
|
39
|
+
def drawChart(data, st, key: str):
|
40
40
|
if len(data) == 0:
|
41
41
|
return
|
42
42
|
|
@@ -73,7 +73,4 @@ def drawChart(data, st):
|
|
73
73
|
fig.update_yaxes(range=yrange, title_text="QPS")
|
74
74
|
fig.update_traces(textposition="bottom right", texttemplate="conc-%{text:,.4~r}")
|
75
75
|
|
76
|
-
st.plotly_chart(
|
77
|
-
fig,
|
78
|
-
use_container_width=True,
|
79
|
-
)
|
76
|
+
st.plotly_chart(fig, use_container_width=True, key=key)
|
@@ -110,6 +110,12 @@ def caseConfigSetting(st, dbToCaseClusterConfigs, uiCaseItem: UICaseItem, active
|
|
110
110
|
value=config.inputConfig["value"],
|
111
111
|
help=config.inputHelp,
|
112
112
|
)
|
113
|
+
elif config.inputType == InputType.Bool:
|
114
|
+
caseConfig[config.label] = column.checkbox(
|
115
|
+
config.displayLabel if config.displayLabel else config.label.value,
|
116
|
+
value=config.inputConfig["value"],
|
117
|
+
help=config.inputHelp,
|
118
|
+
)
|
113
119
|
k += 1
|
114
120
|
if k == 0:
|
115
121
|
columns[1].write("Auto")
|
@@ -3,7 +3,7 @@ import typing
|
|
3
3
|
from pydantic import BaseModel
|
4
4
|
from vectordb_bench.backend.cases import CaseLabel, CaseType
|
5
5
|
from vectordb_bench.backend.clients import DB
|
6
|
-
from vectordb_bench.backend.clients.api import IndexType
|
6
|
+
from vectordb_bench.backend.clients.api import IndexType, MetricType
|
7
7
|
from vectordb_bench.frontend.components.custom.getCustomConfig import get_custom_configs
|
8
8
|
|
9
9
|
from vectordb_bench.models import CaseConfig, CaseConfigParamType
|
@@ -149,6 +149,7 @@ class InputType(IntEnum):
|
|
149
149
|
Number = 20002
|
150
150
|
Option = 20003
|
151
151
|
Float = 20004
|
152
|
+
Bool = 20005
|
152
153
|
|
153
154
|
|
154
155
|
class CaseConfigInput(BaseModel):
|
@@ -180,6 +181,16 @@ CaseConfigParamInput_IndexType = CaseConfigInput(
|
|
180
181
|
},
|
181
182
|
)
|
182
183
|
|
184
|
+
CaseConfigParamInput_IndexType_PgDiskANN = CaseConfigInput(
|
185
|
+
label=CaseConfigParamType.IndexType,
|
186
|
+
inputHelp="Select Index Type",
|
187
|
+
inputType=InputType.Option,
|
188
|
+
inputConfig={
|
189
|
+
"options": [
|
190
|
+
IndexType.DISKANN.value,
|
191
|
+
],
|
192
|
+
},
|
193
|
+
)
|
183
194
|
|
184
195
|
CaseConfigParamInput_IndexType_PgVectorScale = CaseConfigInput(
|
185
196
|
label=CaseConfigParamType.IndexType,
|
@@ -205,6 +216,42 @@ CaseConfigParamInput_storage_layout = CaseConfigInput(
|
|
205
216
|
},
|
206
217
|
)
|
207
218
|
|
219
|
+
CaseConfigParamInput_max_neighbors = CaseConfigInput(
|
220
|
+
label=CaseConfigParamType.max_neighbors,
|
221
|
+
inputType=InputType.Number,
|
222
|
+
inputConfig={
|
223
|
+
"min": 10,
|
224
|
+
"max": 300,
|
225
|
+
"value": 32,
|
226
|
+
},
|
227
|
+
isDisplayed=lambda config: config.get(CaseConfigParamType.IndexType, None)
|
228
|
+
== IndexType.DISKANN.value,
|
229
|
+
)
|
230
|
+
|
231
|
+
CaseConfigParamInput_l_value_ib = CaseConfigInput(
|
232
|
+
label=CaseConfigParamType.l_value_ib,
|
233
|
+
inputType=InputType.Number,
|
234
|
+
inputConfig={
|
235
|
+
"min": 10,
|
236
|
+
"max": 300,
|
237
|
+
"value": 50,
|
238
|
+
},
|
239
|
+
isDisplayed=lambda config: config.get(CaseConfigParamType.IndexType, None)
|
240
|
+
== IndexType.DISKANN.value,
|
241
|
+
)
|
242
|
+
|
243
|
+
CaseConfigParamInput_l_value_is = CaseConfigInput(
|
244
|
+
label=CaseConfigParamType.l_value_is,
|
245
|
+
inputType=InputType.Number,
|
246
|
+
inputConfig={
|
247
|
+
"min": 10,
|
248
|
+
"max": 300,
|
249
|
+
"value": 40,
|
250
|
+
},
|
251
|
+
isDisplayed=lambda config: config.get(CaseConfigParamType.IndexType, None)
|
252
|
+
== IndexType.DISKANN.value,
|
253
|
+
)
|
254
|
+
|
208
255
|
CaseConfigParamInput_num_neighbors = CaseConfigInput(
|
209
256
|
label=CaseConfigParamType.num_neighbors,
|
210
257
|
inputType=InputType.Number,
|
@@ -360,6 +407,37 @@ CaseConfigParamInput_EFConstruction_ES = CaseConfigInput(
|
|
360
407
|
},
|
361
408
|
)
|
362
409
|
|
410
|
+
CaseConfigParamInput_EFConstruction_AWSOpensearch = CaseConfigInput(
|
411
|
+
label=CaseConfigParamType.EFConstruction,
|
412
|
+
inputType=InputType.Number,
|
413
|
+
inputConfig={
|
414
|
+
"min": 100,
|
415
|
+
"max": 1024,
|
416
|
+
"value": 256,
|
417
|
+
},
|
418
|
+
)
|
419
|
+
|
420
|
+
CaseConfigParamInput_M_AWSOpensearch = CaseConfigInput(
|
421
|
+
label=CaseConfigParamType.M,
|
422
|
+
inputType=InputType.Number,
|
423
|
+
inputConfig={
|
424
|
+
"min": 4,
|
425
|
+
"max": 64,
|
426
|
+
"value": 16,
|
427
|
+
},
|
428
|
+
)
|
429
|
+
|
430
|
+
CaseConfigParamInput_EF_SEARCH_AWSOpensearch = CaseConfigInput(
|
431
|
+
label=CaseConfigParamType.ef_search,
|
432
|
+
inputType=InputType.Number,
|
433
|
+
inputConfig={
|
434
|
+
"min": 100,
|
435
|
+
"max": 1024,
|
436
|
+
"value": 256,
|
437
|
+
},
|
438
|
+
)
|
439
|
+
|
440
|
+
|
363
441
|
CaseConfigParamInput_maintenance_work_mem_PgVector = CaseConfigInput(
|
364
442
|
label=CaseConfigParamType.maintenance_work_mem,
|
365
443
|
inputHelp="Recommended value: 1.33x the index size, not to exceed the available free memory."
|
@@ -738,6 +816,19 @@ CaseConfigParamInput_QuantizationType_PgVectoRS = CaseConfigInput(
|
|
738
816
|
],
|
739
817
|
)
|
740
818
|
|
819
|
+
CaseConfigParamInput_QuantizationType_PgVector = CaseConfigInput(
|
820
|
+
label=CaseConfigParamType.quantizationType,
|
821
|
+
inputType=InputType.Option,
|
822
|
+
inputConfig={
|
823
|
+
"options": ["none", "bit", "halfvec"],
|
824
|
+
},
|
825
|
+
isDisplayed=lambda config: config.get(CaseConfigParamType.IndexType, None)
|
826
|
+
in [
|
827
|
+
IndexType.HNSW.value,
|
828
|
+
IndexType.IVFFlat.value,
|
829
|
+
],
|
830
|
+
)
|
831
|
+
|
741
832
|
CaseConfigParamInput_QuantizationRatio_PgVectoRS = CaseConfigInput(
|
742
833
|
label=CaseConfigParamType.quantizationRatio,
|
743
834
|
inputType=InputType.Option,
|
@@ -775,6 +866,46 @@ CaseConfigParamInput_ZillizLevel = CaseConfigInput(
|
|
775
866
|
},
|
776
867
|
)
|
777
868
|
|
869
|
+
CaseConfigParamInput_reranking_PgVector = CaseConfigInput(
|
870
|
+
label=CaseConfigParamType.reranking,
|
871
|
+
inputType=InputType.Bool,
|
872
|
+
displayLabel="Enable Reranking",
|
873
|
+
inputHelp="Enable if you want to use reranking while performing \
|
874
|
+
similarity search in binary quantization",
|
875
|
+
inputConfig={
|
876
|
+
"value": False,
|
877
|
+
},
|
878
|
+
isDisplayed=lambda config: config.get(CaseConfigParamType.quantizationType, None)
|
879
|
+
== "bit"
|
880
|
+
)
|
881
|
+
|
882
|
+
CaseConfigParamInput_quantized_fetch_limit_PgVector = CaseConfigInput(
|
883
|
+
label=CaseConfigParamType.quantizedFetchLimit,
|
884
|
+
displayLabel="Quantized vector fetch limit",
|
885
|
+
inputHelp="Limit top-k vectors using the quantized vector comparison --bound by ef_search",
|
886
|
+
inputType=InputType.Number,
|
887
|
+
inputConfig={
|
888
|
+
"min": 20,
|
889
|
+
"max": 1000,
|
890
|
+
"value": 200,
|
891
|
+
},
|
892
|
+
isDisplayed=lambda config: config.get(CaseConfigParamType.quantizationType, None)
|
893
|
+
== "bit" and config.get(CaseConfigParamType.reranking, False)
|
894
|
+
)
|
895
|
+
|
896
|
+
|
897
|
+
CaseConfigParamInput_reranking_metric_PgVector = CaseConfigInput(
|
898
|
+
label=CaseConfigParamType.rerankingMetric,
|
899
|
+
inputType=InputType.Option,
|
900
|
+
inputConfig={
|
901
|
+
"options": [
|
902
|
+
metric.value for metric in MetricType if metric.value not in ["HAMMING", "JACCARD"]
|
903
|
+
],
|
904
|
+
},
|
905
|
+
isDisplayed=lambda config: config.get(CaseConfigParamType.quantizationType, None)
|
906
|
+
== "bit" and config.get(CaseConfigParamType.reranking, False)
|
907
|
+
)
|
908
|
+
|
778
909
|
MilvusLoadConfig = [
|
779
910
|
CaseConfigParamInput_IndexType,
|
780
911
|
CaseConfigParamInput_M,
|
@@ -826,11 +957,19 @@ ESPerformanceConfig = [
|
|
826
957
|
CaseConfigParamInput_NumCandidates_ES,
|
827
958
|
]
|
828
959
|
|
960
|
+
AWSOpensearchLoadingConfig = [CaseConfigParamInput_EFConstruction_AWSOpensearch, CaseConfigParamInput_M_AWSOpensearch]
|
961
|
+
AWSOpenSearchPerformanceConfig = [
|
962
|
+
CaseConfigParamInput_EFConstruction_AWSOpensearch,
|
963
|
+
CaseConfigParamInput_M_AWSOpensearch,
|
964
|
+
CaseConfigParamInput_EF_SEARCH_AWSOpensearch,
|
965
|
+
]
|
966
|
+
|
829
967
|
PgVectorLoadingConfig = [
|
830
968
|
CaseConfigParamInput_IndexType_PgVector,
|
831
969
|
CaseConfigParamInput_Lists_PgVector,
|
832
970
|
CaseConfigParamInput_m,
|
833
971
|
CaseConfigParamInput_EFConstruction_PgVector,
|
972
|
+
CaseConfigParamInput_QuantizationType_PgVector,
|
834
973
|
CaseConfigParamInput_maintenance_work_mem_PgVector,
|
835
974
|
CaseConfigParamInput_max_parallel_workers_PgVector,
|
836
975
|
]
|
@@ -841,8 +980,12 @@ PgVectorPerformanceConfig = [
|
|
841
980
|
CaseConfigParamInput_EFSearch_PgVector,
|
842
981
|
CaseConfigParamInput_Lists_PgVector,
|
843
982
|
CaseConfigParamInput_Probes_PgVector,
|
983
|
+
CaseConfigParamInput_QuantizationType_PgVector,
|
844
984
|
CaseConfigParamInput_maintenance_work_mem_PgVector,
|
845
985
|
CaseConfigParamInput_max_parallel_workers_PgVector,
|
986
|
+
CaseConfigParamInput_reranking_PgVector,
|
987
|
+
CaseConfigParamInput_reranking_metric_PgVector,
|
988
|
+
CaseConfigParamInput_quantized_fetch_limit_PgVector,
|
846
989
|
]
|
847
990
|
|
848
991
|
PgVectoRSLoadingConfig = [
|
@@ -889,6 +1032,19 @@ PgVectorScalePerformanceConfig = [
|
|
889
1032
|
CaseConfigParamInput_query_search_list_size,
|
890
1033
|
]
|
891
1034
|
|
1035
|
+
PgDiskANNLoadConfig = [
|
1036
|
+
CaseConfigParamInput_IndexType_PgDiskANN,
|
1037
|
+
CaseConfigParamInput_max_neighbors,
|
1038
|
+
CaseConfigParamInput_l_value_ib,
|
1039
|
+
]
|
1040
|
+
|
1041
|
+
PgDiskANNPerformanceConfig = [
|
1042
|
+
CaseConfigParamInput_IndexType_PgDiskANN,
|
1043
|
+
CaseConfigParamInput_max_neighbors,
|
1044
|
+
CaseConfigParamInput_l_value_ib,
|
1045
|
+
CaseConfigParamInput_l_value_is,
|
1046
|
+
]
|
1047
|
+
|
892
1048
|
CASE_CONFIG_MAP = {
|
893
1049
|
DB.Milvus: {
|
894
1050
|
CaseLabel.Load: MilvusLoadConfig,
|
@@ -905,6 +1061,10 @@ CASE_CONFIG_MAP = {
|
|
905
1061
|
CaseLabel.Load: ESLoadingConfig,
|
906
1062
|
CaseLabel.Performance: ESPerformanceConfig,
|
907
1063
|
},
|
1064
|
+
DB.AWSOpenSearch: {
|
1065
|
+
CaseLabel.Load: AWSOpensearchLoadingConfig,
|
1066
|
+
CaseLabel.Performance: AWSOpenSearchPerformanceConfig,
|
1067
|
+
},
|
908
1068
|
DB.PgVector: {
|
909
1069
|
CaseLabel.Load: PgVectorLoadingConfig,
|
910
1070
|
CaseLabel.Performance: PgVectorPerformanceConfig,
|
@@ -917,4 +1077,8 @@ CASE_CONFIG_MAP = {
|
|
917
1077
|
CaseLabel.Load: PgVectorScaleLoadingConfig,
|
918
1078
|
CaseLabel.Performance: PgVectorScalePerformanceConfig,
|
919
1079
|
},
|
1080
|
+
DB.PgDiskANN: {
|
1081
|
+
CaseLabel.Load: PgDiskANNLoadConfig,
|
1082
|
+
CaseLabel.Performance: PgDiskANNPerformanceConfig,
|
1083
|
+
},
|
920
1084
|
}
|
@@ -1,10 +1,17 @@
|
|
1
1
|
import streamlit as st
|
2
2
|
from vectordb_bench.frontend.components.check_results.footer import footer
|
3
|
-
from vectordb_bench.frontend.components.check_results.expanderStyle import
|
3
|
+
from vectordb_bench.frontend.components.check_results.expanderStyle import (
|
4
|
+
initMainExpanderStyle,
|
5
|
+
)
|
4
6
|
from vectordb_bench.frontend.components.check_results.priceTable import priceTable
|
5
|
-
from vectordb_bench.frontend.components.check_results.stPageConfig import
|
7
|
+
from vectordb_bench.frontend.components.check_results.stPageConfig import (
|
8
|
+
initResultsPageConfig,
|
9
|
+
)
|
6
10
|
from vectordb_bench.frontend.components.check_results.headerIcon import drawHeaderIcon
|
7
|
-
from vectordb_bench.frontend.components.check_results.nav import
|
11
|
+
from vectordb_bench.frontend.components.check_results.nav import (
|
12
|
+
NavToResults,
|
13
|
+
NavToRunTest,
|
14
|
+
)
|
8
15
|
from vectordb_bench.frontend.components.check_results.charts import drawMetricChart
|
9
16
|
from vectordb_bench.frontend.components.check_results.filters import getshownData
|
10
17
|
from vectordb_bench.frontend.components.get_results.saveAsImage import getResults
|
@@ -16,7 +23,7 @@ from vectordb_bench.metric import QURIES_PER_DOLLAR_METRIC
|
|
16
23
|
def main():
|
17
24
|
# set page config
|
18
25
|
initResultsPageConfig(st)
|
19
|
-
|
26
|
+
|
20
27
|
# header
|
21
28
|
drawHeaderIcon(st)
|
22
29
|
|
@@ -57,7 +64,8 @@ def main():
|
|
57
64
|
dataWithMetric.append(d)
|
58
65
|
if len(dataWithMetric) > 0:
|
59
66
|
chartContainer = st.expander(caseName, True)
|
60
|
-
|
67
|
+
key = f"{caseName}-{metric}"
|
68
|
+
drawMetricChart(data, metric, chartContainer, key=key)
|
61
69
|
|
62
70
|
# footer
|
63
71
|
footer(st.container())
|
@@ -1,8 +1,13 @@
|
|
1
1
|
import streamlit as st
|
2
2
|
from vectordb_bench.frontend.components.check_results.footer import footer
|
3
|
-
from vectordb_bench.frontend.components.check_results.stPageConfig import
|
3
|
+
from vectordb_bench.frontend.components.check_results.stPageConfig import (
|
4
|
+
initResultsPageConfig,
|
5
|
+
)
|
4
6
|
from vectordb_bench.frontend.components.check_results.headerIcon import drawHeaderIcon
|
5
|
-
from vectordb_bench.frontend.components.check_results.nav import
|
7
|
+
from vectordb_bench.frontend.components.check_results.nav import (
|
8
|
+
NavToQuriesPerDollar,
|
9
|
+
NavToRunTest,
|
10
|
+
)
|
6
11
|
from vectordb_bench.frontend.components.check_results.charts import drawCharts
|
7
12
|
from vectordb_bench.frontend.components.check_results.filters import getshownData
|
8
13
|
from vectordb_bench.frontend.components.get_results.saveAsImage import getResults
|
@@ -20,7 +25,10 @@ def main():
|
|
20
25
|
allResults = benchMarkRunner.get_results()
|
21
26
|
|
22
27
|
st.title("Vector Database Benchmark")
|
23
|
-
st.caption(
|
28
|
+
st.caption(
|
29
|
+
"Except for zillizcloud-v2024.1, which was tested in _January 2024_, all other tests were completed before _August 2023_."
|
30
|
+
)
|
31
|
+
st.caption("All tested milvus are in _standalone_ mode.")
|
24
32
|
|
25
33
|
# results selector and filter
|
26
34
|
resultSelectorContainer = st.sidebar.container()
|
vectordb_bench/models.py
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
import logging
|
2
2
|
import pathlib
|
3
|
-
from datetime import date
|
3
|
+
from datetime import date, datetime
|
4
4
|
from enum import Enum, StrEnum, auto
|
5
5
|
from typing import List, Self
|
6
6
|
|
@@ -47,6 +47,9 @@ class CaseConfigParamType(Enum):
|
|
47
47
|
probes = "probes"
|
48
48
|
quantizationType = "quantization_type"
|
49
49
|
quantizationRatio = "quantization_ratio"
|
50
|
+
reranking = "reranking"
|
51
|
+
rerankingMetric = "reranking_metric"
|
52
|
+
quantizedFetchLimit = "quantized_fetch_limit"
|
50
53
|
m = "m"
|
51
54
|
nbits = "nbits"
|
52
55
|
intermediate_graph_degree = "intermediate_graph_degree"
|
@@ -64,6 +67,9 @@ class CaseConfigParamType(Enum):
|
|
64
67
|
max_parallel_workers = "max_parallel_workers"
|
65
68
|
storage_layout = "storage_layout"
|
66
69
|
num_neighbors = "num_neighbors"
|
70
|
+
max_neighbors = "max_neighbors"
|
71
|
+
l_value_ib = "l_value_ib"
|
72
|
+
l_value_is = "l_value_is"
|
67
73
|
search_list_size = "search_list_size"
|
68
74
|
max_alpha = "max_alpha"
|
69
75
|
num_dimensions = "num_dimensions"
|
@@ -163,16 +169,20 @@ class TestResult(BaseModel):
|
|
163
169
|
results: list[CaseResult]
|
164
170
|
|
165
171
|
file_fmt: str = "result_{}_{}_{}.json" # result_20230718_statndard_milvus.json
|
172
|
+
timestamp: float = 0.0
|
166
173
|
|
167
174
|
def flush(self):
|
168
175
|
db2case = self.get_db_results()
|
169
|
-
|
176
|
+
timestamp = datetime.combine(date.today(), datetime.min.time()).timestamp()
|
170
177
|
result_root = config.RESULTS_LOCAL_DIR
|
171
178
|
for db, result in db2case.items():
|
172
179
|
self.write_db_file(
|
173
180
|
result_dir=result_root.joinpath(db.value),
|
174
181
|
partial=TestResult(
|
175
|
-
run_id=self.run_id,
|
182
|
+
run_id=self.run_id,
|
183
|
+
task_label=self.task_label,
|
184
|
+
results=result,
|
185
|
+
timestamp=timestamp,
|
176
186
|
),
|
177
187
|
db=db.value.lower(),
|
178
188
|
)
|