vectordb-bench 0.0.13__py3-none-any.whl → 0.0.14__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vectordb_bench/backend/clients/aws_opensearch/aws_opensearch.py +47 -6
- vectordb_bench/backend/clients/aws_opensearch/config.py +12 -6
- vectordb_bench/backend/clients/aws_opensearch/run.py +34 -3
- vectordb_bench/backend/clients/pgvector/cli.py +17 -2
- vectordb_bench/backend/clients/pgvector/config.py +20 -5
- vectordb_bench/backend/clients/pgvector/pgvector.py +95 -25
- vectordb_bench/backend/clients/pgvectorscale/cli.py +108 -0
- vectordb_bench/backend/clients/pgvectorscale/pgvectorscale.py +22 -4
- vectordb_bench/backend/clients/pinecone/config.py +0 -2
- vectordb_bench/backend/clients/pinecone/pinecone.py +34 -36
- vectordb_bench/backend/clients/redis/cli.py +8 -0
- vectordb_bench/backend/clients/redis/config.py +37 -6
- vectordb_bench/backend/runner/mp_runner.py +2 -1
- vectordb_bench/cli/cli.py +137 -0
- vectordb_bench/cli/vectordbbench.py +2 -1
- vectordb_bench/frontend/components/check_results/charts.py +9 -6
- vectordb_bench/frontend/components/concurrent/charts.py +3 -6
- vectordb_bench/frontend/config/dbCaseConfigs.py +57 -0
- vectordb_bench/frontend/pages/quries_per_dollar.py +13 -5
- vectordb_bench/frontend/vdb_benchmark.py +11 -3
- vectordb_bench/models.py +7 -3
- vectordb_bench/results/Milvus/result_20230727_standard_milvus.json +53 -1
- vectordb_bench/results/Milvus/result_20230808_standard_milvus.json +48 -0
- vectordb_bench/results/ZillizCloud/result_20230727_standard_zillizcloud.json +29 -1
- vectordb_bench/results/ZillizCloud/result_20230808_standard_zillizcloud.json +24 -0
- vectordb_bench/results/ZillizCloud/result_20240105_standard_202401_zillizcloud.json +98 -49
- vectordb_bench/results/getLeaderboardData.py +17 -7
- vectordb_bench/results/leaderboard.json +1 -1
- {vectordb_bench-0.0.13.dist-info → vectordb_bench-0.0.14.dist-info}/METADATA +60 -35
- {vectordb_bench-0.0.13.dist-info → vectordb_bench-0.0.14.dist-info}/RECORD +34 -33
- {vectordb_bench-0.0.13.dist-info → vectordb_bench-0.0.14.dist-info}/WHEEL +1 -1
- {vectordb_bench-0.0.13.dist-info → vectordb_bench-0.0.14.dist-info}/LICENSE +0 -0
- {vectordb_bench-0.0.13.dist-info → vectordb_bench-0.0.14.dist-info}/entry_points.txt +0 -0
- {vectordb_bench-0.0.13.dist-info → vectordb_bench-0.0.14.dist-info}/top_level.txt +0 -0
@@ -3,7 +3,7 @@
|
|
3
3
|
import logging
|
4
4
|
from contextlib import contextmanager
|
5
5
|
from typing import Type
|
6
|
-
|
6
|
+
import pinecone
|
7
7
|
from ..api import VectorDB, DBConfig, DBCaseConfig, EmptyDBCaseConfig, IndexType
|
8
8
|
from .config import PineconeConfig
|
9
9
|
|
@@ -11,7 +11,8 @@ from .config import PineconeConfig
|
|
11
11
|
log = logging.getLogger(__name__)
|
12
12
|
|
13
13
|
PINECONE_MAX_NUM_PER_BATCH = 1000
|
14
|
-
PINECONE_MAX_SIZE_PER_BATCH = 2 * 1024 * 1024
|
14
|
+
PINECONE_MAX_SIZE_PER_BATCH = 2 * 1024 * 1024 # 2MB
|
15
|
+
|
15
16
|
|
16
17
|
class Pinecone(VectorDB):
|
17
18
|
def __init__(
|
@@ -23,30 +24,25 @@ class Pinecone(VectorDB):
|
|
23
24
|
**kwargs,
|
24
25
|
):
|
25
26
|
"""Initialize wrapper around the milvus vector database."""
|
26
|
-
self.index_name = db_config
|
27
|
-
self.api_key = db_config
|
28
|
-
self.
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
27
|
+
self.index_name = db_config.get("index_name", "")
|
28
|
+
self.api_key = db_config.get("api_key", "")
|
29
|
+
self.batch_size = int(
|
30
|
+
min(PINECONE_MAX_SIZE_PER_BATCH / (dim * 5), PINECONE_MAX_NUM_PER_BATCH)
|
31
|
+
)
|
32
|
+
|
33
|
+
pc = pinecone.Pinecone(api_key=self.api_key)
|
34
|
+
index = pc.Index(self.index_name)
|
35
|
+
|
35
36
|
if drop_old:
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
index_dim = index.describe_index_stats()["dimension"]
|
40
|
-
if (index_dim != dim):
|
41
|
-
raise ValueError(
|
42
|
-
f"Pinecone index {self.index_name} dimension mismatch, expected {index_dim} got {dim}")
|
43
|
-
log.info(
|
44
|
-
f"Pinecone client delete old index: {self.index_name}")
|
45
|
-
index.delete(delete_all=True)
|
46
|
-
index.close()
|
47
|
-
else:
|
37
|
+
index_stats = index.describe_index_stats()
|
38
|
+
index_dim = index_stats["dimension"]
|
39
|
+
if index_dim != dim:
|
48
40
|
raise ValueError(
|
49
|
-
f"Pinecone index {self.index_name}
|
41
|
+
f"Pinecone index {self.index_name} dimension mismatch, expected {index_dim} got {dim}"
|
42
|
+
)
|
43
|
+
for namespace in index_stats["namespaces"]:
|
44
|
+
log.info(f"Pinecone index delete namespace: {namespace}")
|
45
|
+
index.delete(delete_all=True, namespace=namespace)
|
50
46
|
|
51
47
|
self._metadata_key = "meta"
|
52
48
|
|
@@ -59,13 +55,10 @@ class Pinecone(VectorDB):
|
|
59
55
|
return EmptyDBCaseConfig
|
60
56
|
|
61
57
|
@contextmanager
|
62
|
-
def init(self)
|
63
|
-
|
64
|
-
|
65
|
-
api_key=self.api_key, environment=self.environment)
|
66
|
-
self.index = pinecone.Index(self.index_name)
|
58
|
+
def init(self):
|
59
|
+
pc = pinecone.Pinecone(api_key=self.api_key)
|
60
|
+
self.index = pc.Index(self.index_name)
|
67
61
|
yield
|
68
|
-
self.index.close()
|
69
62
|
|
70
63
|
def ready_to_load(self):
|
71
64
|
pass
|
@@ -83,11 +76,16 @@ class Pinecone(VectorDB):
|
|
83
76
|
insert_count = 0
|
84
77
|
try:
|
85
78
|
for batch_start_offset in range(0, len(embeddings), self.batch_size):
|
86
|
-
batch_end_offset = min(
|
79
|
+
batch_end_offset = min(
|
80
|
+
batch_start_offset + self.batch_size, len(embeddings)
|
81
|
+
)
|
87
82
|
insert_datas = []
|
88
83
|
for i in range(batch_start_offset, batch_end_offset):
|
89
|
-
insert_data = (
|
90
|
-
|
84
|
+
insert_data = (
|
85
|
+
str(metadata[i]),
|
86
|
+
embeddings[i],
|
87
|
+
{self._metadata_key: metadata[i]},
|
88
|
+
)
|
91
89
|
insert_datas.append(insert_data)
|
92
90
|
self.index.upsert(insert_datas)
|
93
91
|
insert_count += batch_end_offset - batch_start_offset
|
@@ -101,7 +99,7 @@ class Pinecone(VectorDB):
|
|
101
99
|
k: int = 100,
|
102
100
|
filters: dict | None = None,
|
103
101
|
timeout: int | None = None,
|
104
|
-
) -> list[
|
102
|
+
) -> list[int]:
|
105
103
|
if filters is None:
|
106
104
|
pinecone_filters = {}
|
107
105
|
else:
|
@@ -111,9 +109,9 @@ class Pinecone(VectorDB):
|
|
111
109
|
top_k=k,
|
112
110
|
vector=query,
|
113
111
|
filter=pinecone_filters,
|
114
|
-
)[
|
112
|
+
)["matches"]
|
115
113
|
except Exception as e:
|
116
114
|
print(f"Error querying index: {e}")
|
117
115
|
raise e
|
118
|
-
id_res = [int(one_res[
|
116
|
+
id_res = [int(one_res["id"]) for one_res in res]
|
119
117
|
return id_res
|
@@ -3,6 +3,9 @@ from typing import Annotated, TypedDict, Unpack
|
|
3
3
|
import click
|
4
4
|
from pydantic import SecretStr
|
5
5
|
|
6
|
+
from .config import RedisHNSWConfig
|
7
|
+
|
8
|
+
|
6
9
|
from ....cli.cli import (
|
7
10
|
CommonTypedDict,
|
8
11
|
HNSWFlavor2,
|
@@ -69,6 +72,11 @@ def Redis(**parameters: Unpack[RedisHNSWTypedDict]):
|
|
69
72
|
ssl=parameters["ssl"],
|
70
73
|
ssl_ca_certs=parameters["ssl_ca_certs"],
|
71
74
|
cmd=parameters["cmd"],
|
75
|
+
),
|
76
|
+
db_case_config=RedisHNSWConfig(
|
77
|
+
M=parameters["m"],
|
78
|
+
efConstruction=parameters["ef_construction"],
|
79
|
+
ef=parameters["ef_runtime"],
|
72
80
|
),
|
73
81
|
**parameters,
|
74
82
|
)
|
@@ -1,14 +1,45 @@
|
|
1
|
-
from pydantic import SecretStr
|
2
|
-
from ..api import DBConfig
|
1
|
+
from pydantic import SecretStr, BaseModel
|
2
|
+
from ..api import DBConfig, DBCaseConfig, MetricType, IndexType
|
3
3
|
|
4
4
|
class RedisConfig(DBConfig):
|
5
|
-
password: SecretStr
|
5
|
+
password: SecretStr | None = None
|
6
6
|
host: SecretStr
|
7
|
-
port: int = None
|
7
|
+
port: int | None = None
|
8
8
|
|
9
9
|
def to_dict(self) -> dict:
|
10
10
|
return {
|
11
11
|
"host": self.host.get_secret_value(),
|
12
12
|
"port": self.port,
|
13
|
-
"password": self.password.get_secret_value(),
|
14
|
-
}
|
13
|
+
"password": self.password.get_secret_value() if self.password is not None else None,
|
14
|
+
}
|
15
|
+
|
16
|
+
|
17
|
+
|
18
|
+
class RedisIndexConfig(BaseModel):
|
19
|
+
"""Base config for milvus"""
|
20
|
+
|
21
|
+
metric_type: MetricType | None = None
|
22
|
+
|
23
|
+
def parse_metric(self) -> str:
|
24
|
+
if not self.metric_type:
|
25
|
+
return ""
|
26
|
+
return self.metric_type.value
|
27
|
+
|
28
|
+
class RedisHNSWConfig(RedisIndexConfig, DBCaseConfig):
|
29
|
+
M: int
|
30
|
+
efConstruction: int
|
31
|
+
ef: int | None = None
|
32
|
+
index: IndexType = IndexType.HNSW
|
33
|
+
|
34
|
+
def index_param(self) -> dict:
|
35
|
+
return {
|
36
|
+
"metric_type": self.parse_metric(),
|
37
|
+
"index_type": self.index.value,
|
38
|
+
"params": {"M": self.M, "efConstruction": self.efConstruction},
|
39
|
+
}
|
40
|
+
|
41
|
+
def search_param(self) -> dict:
|
42
|
+
return {
|
43
|
+
"metric_type": self.parse_metric(),
|
44
|
+
"params": {"ef": self.ef},
|
45
|
+
}
|
@@ -2,6 +2,7 @@ import time
|
|
2
2
|
import traceback
|
3
3
|
import concurrent
|
4
4
|
import multiprocessing as mp
|
5
|
+
import random
|
5
6
|
import logging
|
6
7
|
from typing import Iterable
|
7
8
|
import numpy as np
|
@@ -46,7 +47,7 @@ class MultiProcessingSearchRunner:
|
|
46
47
|
cond.wait()
|
47
48
|
|
48
49
|
with self.db.init():
|
49
|
-
num, idx = len(test_data), 0
|
50
|
+
num, idx = len(test_data), random.randint(0, len(test_data) - 1)
|
50
51
|
|
51
52
|
start_time = time.perf_counter()
|
52
53
|
count = 0
|
vectordb_bench/cli/cli.py
CHANGED
@@ -17,6 +17,8 @@ from typing import (
|
|
17
17
|
Any,
|
18
18
|
)
|
19
19
|
import click
|
20
|
+
|
21
|
+
from vectordb_bench.backend.clients.api import MetricType
|
20
22
|
from .. import config
|
21
23
|
from ..backend.clients import DB
|
22
24
|
from ..interface import benchMarkRunner, global_result_future
|
@@ -147,6 +149,37 @@ def parse_task_stages(
|
|
147
149
|
return stages
|
148
150
|
|
149
151
|
|
152
|
+
def check_custom_case_parameters(ctx, param, value):
|
153
|
+
if ctx.params.get("case_type") == "PerformanceCustomDataset":
|
154
|
+
if value is None:
|
155
|
+
raise click.BadParameter("Custom case parameters\
|
156
|
+
\n--custom-case-name\n--custom-dataset-name\n--custom-dataset-dir\n--custom-dataset-size \
|
157
|
+
\n--custom-dataset-dim\n--custom-dataset-file-count\n are required")
|
158
|
+
return value
|
159
|
+
|
160
|
+
|
161
|
+
def get_custom_case_config(parameters: dict) -> dict:
|
162
|
+
custom_case_config = {}
|
163
|
+
if parameters["case_type"] == "PerformanceCustomDataset":
|
164
|
+
custom_case_config = {
|
165
|
+
"name": parameters["custom_case_name"],
|
166
|
+
"description": parameters["custom_case_description"],
|
167
|
+
"load_timeout": parameters["custom_case_load_timeout"],
|
168
|
+
"optimize_timeout": parameters["custom_case_optimize_timeout"],
|
169
|
+
"dataset_config": {
|
170
|
+
"name": parameters["custom_dataset_name"],
|
171
|
+
"dir": parameters["custom_dataset_dir"],
|
172
|
+
"size": parameters["custom_dataset_size"],
|
173
|
+
"dim": parameters["custom_dataset_dim"],
|
174
|
+
"metric_type": parameters["custom_dataset_metric_type"],
|
175
|
+
"file_count": parameters["custom_dataset_file_count"],
|
176
|
+
"use_shuffled": parameters["custom_dataset_use_shuffled"],
|
177
|
+
"with_gt": parameters["custom_dataset_with_gt"],
|
178
|
+
}
|
179
|
+
}
|
180
|
+
return custom_case_config
|
181
|
+
|
182
|
+
|
150
183
|
log = logging.getLogger(__name__)
|
151
184
|
|
152
185
|
|
@@ -205,6 +238,7 @@ class CommonTypedDict(TypedDict):
|
|
205
238
|
click.option(
|
206
239
|
"--case-type",
|
207
240
|
type=click.Choice([ct.name for ct in CaseType if ct.name != "Custom"]),
|
241
|
+
is_eager=True,
|
208
242
|
default="Performance1536D50K",
|
209
243
|
help="Case type",
|
210
244
|
),
|
@@ -258,6 +292,108 @@ class CommonTypedDict(TypedDict):
|
|
258
292
|
callback=lambda *args: list(map(int, click_arg_split(*args))),
|
259
293
|
),
|
260
294
|
]
|
295
|
+
custom_case_name: Annotated[
|
296
|
+
str,
|
297
|
+
click.option(
|
298
|
+
"--custom-case-name",
|
299
|
+
help="Custom dataset case name",
|
300
|
+
callback=check_custom_case_parameters,
|
301
|
+
)
|
302
|
+
]
|
303
|
+
custom_case_description: Annotated[
|
304
|
+
str,
|
305
|
+
click.option(
|
306
|
+
"--custom-case-description",
|
307
|
+
help="Custom dataset case description",
|
308
|
+
default="This is a customized dataset.",
|
309
|
+
show_default=True,
|
310
|
+
)
|
311
|
+
]
|
312
|
+
custom_case_load_timeout: Annotated[
|
313
|
+
int,
|
314
|
+
click.option(
|
315
|
+
"--custom-case-load-timeout",
|
316
|
+
help="Custom dataset case load timeout",
|
317
|
+
default=36000,
|
318
|
+
show_default=True,
|
319
|
+
)
|
320
|
+
]
|
321
|
+
custom_case_optimize_timeout: Annotated[
|
322
|
+
int,
|
323
|
+
click.option(
|
324
|
+
"--custom-case-optimize-timeout",
|
325
|
+
help="Custom dataset case optimize timeout",
|
326
|
+
default=36000,
|
327
|
+
show_default=True,
|
328
|
+
)
|
329
|
+
]
|
330
|
+
custom_dataset_name: Annotated[
|
331
|
+
str,
|
332
|
+
click.option(
|
333
|
+
"--custom-dataset-name",
|
334
|
+
help="Custom dataset name",
|
335
|
+
callback=check_custom_case_parameters,
|
336
|
+
),
|
337
|
+
]
|
338
|
+
custom_dataset_dir: Annotated[
|
339
|
+
str,
|
340
|
+
click.option(
|
341
|
+
"--custom-dataset-dir",
|
342
|
+
help="Custom dataset directory",
|
343
|
+
callback=check_custom_case_parameters,
|
344
|
+
),
|
345
|
+
]
|
346
|
+
custom_dataset_size: Annotated[
|
347
|
+
int,
|
348
|
+
click.option(
|
349
|
+
"--custom-dataset-size",
|
350
|
+
help="Custom dataset size",
|
351
|
+
callback=check_custom_case_parameters,
|
352
|
+
),
|
353
|
+
]
|
354
|
+
custom_dataset_dim: Annotated[
|
355
|
+
int,
|
356
|
+
click.option(
|
357
|
+
"--custom-dataset-dim",
|
358
|
+
help="Custom dataset dimension",
|
359
|
+
callback=check_custom_case_parameters,
|
360
|
+
),
|
361
|
+
]
|
362
|
+
custom_dataset_metric_type: Annotated[
|
363
|
+
str,
|
364
|
+
click.option(
|
365
|
+
"--custom-dataset-metric-type",
|
366
|
+
help="Custom dataset metric type",
|
367
|
+
default=MetricType.COSINE.name,
|
368
|
+
show_default=True,
|
369
|
+
),
|
370
|
+
]
|
371
|
+
custom_dataset_file_count: Annotated[
|
372
|
+
int,
|
373
|
+
click.option(
|
374
|
+
"--custom-dataset-file-count",
|
375
|
+
help="Custom dataset file count",
|
376
|
+
callback=check_custom_case_parameters,
|
377
|
+
),
|
378
|
+
]
|
379
|
+
custom_dataset_use_shuffled: Annotated[
|
380
|
+
bool,
|
381
|
+
click.option(
|
382
|
+
"--custom-dataset-use-shuffled/--skip-custom-dataset-use-shuffled",
|
383
|
+
help="Custom dataset use shuffled",
|
384
|
+
default=False,
|
385
|
+
show_default=True,
|
386
|
+
),
|
387
|
+
]
|
388
|
+
custom_dataset_with_gt: Annotated[
|
389
|
+
bool,
|
390
|
+
click.option(
|
391
|
+
"--custom-dataset-with-gt/--skip-custom-dataset-with-gt",
|
392
|
+
help="Custom dataset with ground truth",
|
393
|
+
default=True,
|
394
|
+
show_default=True,
|
395
|
+
),
|
396
|
+
]
|
261
397
|
|
262
398
|
|
263
399
|
class HNSWBaseTypedDict(TypedDict):
|
@@ -343,6 +479,7 @@ def run(
|
|
343
479
|
concurrency_duration=parameters["concurrency_duration"],
|
344
480
|
num_concurrency=[int(s) for s in parameters["num_concurrency"]],
|
345
481
|
),
|
482
|
+
custom_case=parameters.get("custom_case", {}),
|
346
483
|
),
|
347
484
|
stages=parse_task_stages(
|
348
485
|
(
|
@@ -1,5 +1,6 @@
|
|
1
1
|
from ..backend.clients.pgvector.cli import PgVectorHNSW
|
2
2
|
from ..backend.clients.pgvecto_rs.cli import PgVectoRSHNSW, PgVectoRSIVFFlat
|
3
|
+
from ..backend.clients.pgvectorscale.cli import PgVectorScaleDiskAnn
|
3
4
|
from ..backend.clients.redis.cli import Redis
|
4
5
|
from ..backend.clients.memorydb.cli import MemoryDB
|
5
6
|
from ..backend.clients.test.cli import Test
|
@@ -8,7 +9,6 @@ from ..backend.clients.zilliz_cloud.cli import ZillizAutoIndex
|
|
8
9
|
from ..backend.clients.milvus.cli import MilvusAutoIndex
|
9
10
|
from ..backend.clients.aws_opensearch.cli import AWSOpenSearch
|
10
11
|
|
11
|
-
|
12
12
|
from .cli import cli
|
13
13
|
|
14
14
|
cli.add_command(PgVectorHNSW)
|
@@ -21,6 +21,7 @@ cli.add_command(Test)
|
|
21
21
|
cli.add_command(ZillizAutoIndex)
|
22
22
|
cli.add_command(MilvusAutoIndex)
|
23
23
|
cli.add_command(AWSOpenSearch)
|
24
|
+
cli.add_command(PgVectorScaleDiskAnn)
|
24
25
|
|
25
26
|
|
26
27
|
if __name__ == "__main__":
|
@@ -1,5 +1,7 @@
|
|
1
1
|
from vectordb_bench.backend.cases import Case
|
2
|
-
from vectordb_bench.frontend.components.check_results.expanderStyle import
|
2
|
+
from vectordb_bench.frontend.components.check_results.expanderStyle import (
|
3
|
+
initMainExpanderStyle,
|
4
|
+
)
|
3
5
|
from vectordb_bench.metric import metricOrder, isLowerIsBetterMetric, metricUnitMap
|
4
6
|
from vectordb_bench.frontend.config.styles import *
|
5
7
|
from vectordb_bench.models import ResultLabel
|
@@ -11,7 +13,7 @@ def drawCharts(st, allData, failedTasks, caseNames: list[str]):
|
|
11
13
|
for caseName in caseNames:
|
12
14
|
chartContainer = st.expander(caseName, True)
|
13
15
|
data = [data for data in allData if data["case_name"] == caseName]
|
14
|
-
drawChart(data, chartContainer)
|
16
|
+
drawChart(data, chartContainer, key_prefix=caseName)
|
15
17
|
|
16
18
|
errorDBs = failedTasks[caseName]
|
17
19
|
showFailedDBs(chartContainer, errorDBs)
|
@@ -35,7 +37,7 @@ def showFailedText(st, text, dbs):
|
|
35
37
|
)
|
36
38
|
|
37
39
|
|
38
|
-
def drawChart(data, st):
|
40
|
+
def drawChart(data, st, key_prefix: str):
|
39
41
|
metricsSet = set()
|
40
42
|
for d in data:
|
41
43
|
metricsSet = metricsSet.union(d["metricsSet"])
|
@@ -43,7 +45,8 @@ def drawChart(data, st):
|
|
43
45
|
|
44
46
|
for i, metric in enumerate(showMetrics):
|
45
47
|
container = st.container()
|
46
|
-
|
48
|
+
key = f"{key_prefix}-{metric}"
|
49
|
+
drawMetricChart(data, metric, container, key=key)
|
47
50
|
|
48
51
|
|
49
52
|
def getLabelToShapeMap(data):
|
@@ -75,7 +78,7 @@ def getLabelToShapeMap(data):
|
|
75
78
|
return labelToShapeMap
|
76
79
|
|
77
80
|
|
78
|
-
def drawMetricChart(data, metric, st):
|
81
|
+
def drawMetricChart(data, metric, st, key: str):
|
79
82
|
dataWithMetric = [d for d in data if d.get(metric, 0) > 1e-7]
|
80
83
|
# dataWithMetric = data
|
81
84
|
if len(dataWithMetric) == 0:
|
@@ -161,4 +164,4 @@ def drawMetricChart(data, metric, st):
|
|
161
164
|
),
|
162
165
|
)
|
163
166
|
|
164
|
-
chart.plotly_chart(fig, use_container_width=True)
|
167
|
+
chart.plotly_chart(fig, use_container_width=True, key=key)
|
@@ -22,7 +22,7 @@ def drawChartsByCase(allData, showCaseNames: list[str], st):
|
|
22
22
|
for caseData in caseDataList
|
23
23
|
for i in range(len(caseData["conc_num_list"]))
|
24
24
|
]
|
25
|
-
drawChart(data, chartContainer)
|
25
|
+
drawChart(data, chartContainer, key=f"{caseName}-qps-p99")
|
26
26
|
|
27
27
|
|
28
28
|
def getRange(metric, data, padding_multipliers):
|
@@ -36,7 +36,7 @@ def getRange(metric, data, padding_multipliers):
|
|
36
36
|
return rangeV
|
37
37
|
|
38
38
|
|
39
|
-
def drawChart(data, st):
|
39
|
+
def drawChart(data, st, key: str):
|
40
40
|
if len(data) == 0:
|
41
41
|
return
|
42
42
|
|
@@ -73,7 +73,4 @@ def drawChart(data, st):
|
|
73
73
|
fig.update_yaxes(range=yrange, title_text="QPS")
|
74
74
|
fig.update_traces(textposition="bottom right", texttemplate="conc-%{text:,.4~r}")
|
75
75
|
|
76
|
-
st.plotly_chart(
|
77
|
-
fig,
|
78
|
-
use_container_width=True,
|
79
|
-
)
|
76
|
+
st.plotly_chart(fig, use_container_width=True, key=key)
|
@@ -360,6 +360,37 @@ CaseConfigParamInput_EFConstruction_ES = CaseConfigInput(
|
|
360
360
|
},
|
361
361
|
)
|
362
362
|
|
363
|
+
CaseConfigParamInput_EFConstruction_AWSOpensearch = CaseConfigInput(
|
364
|
+
label=CaseConfigParamType.EFConstruction,
|
365
|
+
inputType=InputType.Number,
|
366
|
+
inputConfig={
|
367
|
+
"min": 100,
|
368
|
+
"max": 1024,
|
369
|
+
"value": 256,
|
370
|
+
},
|
371
|
+
)
|
372
|
+
|
373
|
+
CaseConfigParamInput_M_AWSOpensearch = CaseConfigInput(
|
374
|
+
label=CaseConfigParamType.M,
|
375
|
+
inputType=InputType.Number,
|
376
|
+
inputConfig={
|
377
|
+
"min": 4,
|
378
|
+
"max": 64,
|
379
|
+
"value": 16,
|
380
|
+
},
|
381
|
+
)
|
382
|
+
|
383
|
+
CaseConfigParamInput_EF_SEARCH_AWSOpensearch = CaseConfigInput(
|
384
|
+
label=CaseConfigParamType.ef_search,
|
385
|
+
inputType=InputType.Number,
|
386
|
+
inputConfig={
|
387
|
+
"min": 100,
|
388
|
+
"max": 1024,
|
389
|
+
"value": 256,
|
390
|
+
},
|
391
|
+
)
|
392
|
+
|
393
|
+
|
363
394
|
CaseConfigParamInput_maintenance_work_mem_PgVector = CaseConfigInput(
|
364
395
|
label=CaseConfigParamType.maintenance_work_mem,
|
365
396
|
inputHelp="Recommended value: 1.33x the index size, not to exceed the available free memory."
|
@@ -738,6 +769,19 @@ CaseConfigParamInput_QuantizationType_PgVectoRS = CaseConfigInput(
|
|
738
769
|
],
|
739
770
|
)
|
740
771
|
|
772
|
+
CaseConfigParamInput_QuantizationType_PgVector = CaseConfigInput(
|
773
|
+
label=CaseConfigParamType.quantizationType,
|
774
|
+
inputType=InputType.Option,
|
775
|
+
inputConfig={
|
776
|
+
"options": ["none", "halfvec"],
|
777
|
+
},
|
778
|
+
isDisplayed=lambda config: config.get(CaseConfigParamType.IndexType, None)
|
779
|
+
in [
|
780
|
+
IndexType.HNSW.value,
|
781
|
+
IndexType.IVFFlat.value,
|
782
|
+
],
|
783
|
+
)
|
784
|
+
|
741
785
|
CaseConfigParamInput_QuantizationRatio_PgVectoRS = CaseConfigInput(
|
742
786
|
label=CaseConfigParamType.quantizationRatio,
|
743
787
|
inputType=InputType.Option,
|
@@ -826,11 +870,19 @@ ESPerformanceConfig = [
|
|
826
870
|
CaseConfigParamInput_NumCandidates_ES,
|
827
871
|
]
|
828
872
|
|
873
|
+
AWSOpensearchLoadingConfig = [CaseConfigParamInput_EFConstruction_AWSOpensearch, CaseConfigParamInput_M_AWSOpensearch]
|
874
|
+
AWSOpenSearchPerformanceConfig = [
|
875
|
+
CaseConfigParamInput_EFConstruction_AWSOpensearch,
|
876
|
+
CaseConfigParamInput_M_AWSOpensearch,
|
877
|
+
CaseConfigParamInput_EF_SEARCH_AWSOpensearch,
|
878
|
+
]
|
879
|
+
|
829
880
|
PgVectorLoadingConfig = [
|
830
881
|
CaseConfigParamInput_IndexType_PgVector,
|
831
882
|
CaseConfigParamInput_Lists_PgVector,
|
832
883
|
CaseConfigParamInput_m,
|
833
884
|
CaseConfigParamInput_EFConstruction_PgVector,
|
885
|
+
CaseConfigParamInput_QuantizationType_PgVector,
|
834
886
|
CaseConfigParamInput_maintenance_work_mem_PgVector,
|
835
887
|
CaseConfigParamInput_max_parallel_workers_PgVector,
|
836
888
|
]
|
@@ -841,6 +893,7 @@ PgVectorPerformanceConfig = [
|
|
841
893
|
CaseConfigParamInput_EFSearch_PgVector,
|
842
894
|
CaseConfigParamInput_Lists_PgVector,
|
843
895
|
CaseConfigParamInput_Probes_PgVector,
|
896
|
+
CaseConfigParamInput_QuantizationType_PgVector,
|
844
897
|
CaseConfigParamInput_maintenance_work_mem_PgVector,
|
845
898
|
CaseConfigParamInput_max_parallel_workers_PgVector,
|
846
899
|
]
|
@@ -905,6 +958,10 @@ CASE_CONFIG_MAP = {
|
|
905
958
|
CaseLabel.Load: ESLoadingConfig,
|
906
959
|
CaseLabel.Performance: ESPerformanceConfig,
|
907
960
|
},
|
961
|
+
DB.AWSOpenSearch: {
|
962
|
+
CaseLabel.Load: AWSOpensearchLoadingConfig,
|
963
|
+
CaseLabel.Performance: AWSOpenSearchPerformanceConfig,
|
964
|
+
},
|
908
965
|
DB.PgVector: {
|
909
966
|
CaseLabel.Load: PgVectorLoadingConfig,
|
910
967
|
CaseLabel.Performance: PgVectorPerformanceConfig,
|
@@ -1,10 +1,17 @@
|
|
1
1
|
import streamlit as st
|
2
2
|
from vectordb_bench.frontend.components.check_results.footer import footer
|
3
|
-
from vectordb_bench.frontend.components.check_results.expanderStyle import
|
3
|
+
from vectordb_bench.frontend.components.check_results.expanderStyle import (
|
4
|
+
initMainExpanderStyle,
|
5
|
+
)
|
4
6
|
from vectordb_bench.frontend.components.check_results.priceTable import priceTable
|
5
|
-
from vectordb_bench.frontend.components.check_results.stPageConfig import
|
7
|
+
from vectordb_bench.frontend.components.check_results.stPageConfig import (
|
8
|
+
initResultsPageConfig,
|
9
|
+
)
|
6
10
|
from vectordb_bench.frontend.components.check_results.headerIcon import drawHeaderIcon
|
7
|
-
from vectordb_bench.frontend.components.check_results.nav import
|
11
|
+
from vectordb_bench.frontend.components.check_results.nav import (
|
12
|
+
NavToResults,
|
13
|
+
NavToRunTest,
|
14
|
+
)
|
8
15
|
from vectordb_bench.frontend.components.check_results.charts import drawMetricChart
|
9
16
|
from vectordb_bench.frontend.components.check_results.filters import getshownData
|
10
17
|
from vectordb_bench.frontend.components.get_results.saveAsImage import getResults
|
@@ -16,7 +23,7 @@ from vectordb_bench.metric import QURIES_PER_DOLLAR_METRIC
|
|
16
23
|
def main():
|
17
24
|
# set page config
|
18
25
|
initResultsPageConfig(st)
|
19
|
-
|
26
|
+
|
20
27
|
# header
|
21
28
|
drawHeaderIcon(st)
|
22
29
|
|
@@ -57,7 +64,8 @@ def main():
|
|
57
64
|
dataWithMetric.append(d)
|
58
65
|
if len(dataWithMetric) > 0:
|
59
66
|
chartContainer = st.expander(caseName, True)
|
60
|
-
|
67
|
+
key = f"{caseName}-{metric}"
|
68
|
+
drawMetricChart(data, metric, chartContainer, key=key)
|
61
69
|
|
62
70
|
# footer
|
63
71
|
footer(st.container())
|
@@ -1,8 +1,13 @@
|
|
1
1
|
import streamlit as st
|
2
2
|
from vectordb_bench.frontend.components.check_results.footer import footer
|
3
|
-
from vectordb_bench.frontend.components.check_results.stPageConfig import
|
3
|
+
from vectordb_bench.frontend.components.check_results.stPageConfig import (
|
4
|
+
initResultsPageConfig,
|
5
|
+
)
|
4
6
|
from vectordb_bench.frontend.components.check_results.headerIcon import drawHeaderIcon
|
5
|
-
from vectordb_bench.frontend.components.check_results.nav import
|
7
|
+
from vectordb_bench.frontend.components.check_results.nav import (
|
8
|
+
NavToQuriesPerDollar,
|
9
|
+
NavToRunTest,
|
10
|
+
)
|
6
11
|
from vectordb_bench.frontend.components.check_results.charts import drawCharts
|
7
12
|
from vectordb_bench.frontend.components.check_results.filters import getshownData
|
8
13
|
from vectordb_bench.frontend.components.get_results.saveAsImage import getResults
|
@@ -20,7 +25,10 @@ def main():
|
|
20
25
|
allResults = benchMarkRunner.get_results()
|
21
26
|
|
22
27
|
st.title("Vector Database Benchmark")
|
23
|
-
st.caption(
|
28
|
+
st.caption(
|
29
|
+
"Except for zillizcloud-v2024.1, which was tested in _January 2024_, all other tests were completed before _August 2023_."
|
30
|
+
)
|
31
|
+
st.caption("All tested milvus are in _standalone_ mode.")
|
24
32
|
|
25
33
|
# results selector and filter
|
26
34
|
resultSelectorContainer = st.sidebar.container()
|
vectordb_bench/models.py
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
import logging
|
2
2
|
import pathlib
|
3
|
-
from datetime import date
|
3
|
+
from datetime import date, datetime
|
4
4
|
from enum import Enum, StrEnum, auto
|
5
5
|
from typing import List, Self
|
6
6
|
|
@@ -163,16 +163,20 @@ class TestResult(BaseModel):
|
|
163
163
|
results: list[CaseResult]
|
164
164
|
|
165
165
|
file_fmt: str = "result_{}_{}_{}.json" # result_20230718_statndard_milvus.json
|
166
|
+
timestamp: float = 0.0
|
166
167
|
|
167
168
|
def flush(self):
|
168
169
|
db2case = self.get_db_results()
|
169
|
-
|
170
|
+
timestamp = datetime.combine(date.today(), datetime.min.time()).timestamp()
|
170
171
|
result_root = config.RESULTS_LOCAL_DIR
|
171
172
|
for db, result in db2case.items():
|
172
173
|
self.write_db_file(
|
173
174
|
result_dir=result_root.joinpath(db.value),
|
174
175
|
partial=TestResult(
|
175
|
-
run_id=self.run_id,
|
176
|
+
run_id=self.run_id,
|
177
|
+
task_label=self.task_label,
|
178
|
+
results=result,
|
179
|
+
timestamp=timestamp,
|
176
180
|
),
|
177
181
|
db=db.value.lower(),
|
178
182
|
)
|