vectordb-bench 0.0.10__py3-none-any.whl → 0.0.11__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vectordb_bench/__init__.py +18 -5
- vectordb_bench/backend/cases.py +32 -12
- vectordb_bench/backend/clients/__init__.py +1 -0
- vectordb_bench/backend/clients/api.py +1 -1
- vectordb_bench/backend/clients/milvus/cli.py +291 -0
- vectordb_bench/backend/clients/milvus/milvus.py +13 -6
- vectordb_bench/backend/clients/pgvector/cli.py +116 -0
- vectordb_bench/backend/clients/pgvector/config.py +1 -1
- vectordb_bench/backend/clients/pgvector/pgvector.py +7 -4
- vectordb_bench/backend/clients/redis/cli.py +74 -0
- vectordb_bench/backend/clients/test/cli.py +25 -0
- vectordb_bench/backend/clients/test/config.py +18 -0
- vectordb_bench/backend/clients/test/test.py +62 -0
- vectordb_bench/backend/clients/weaviate_cloud/cli.py +41 -0
- vectordb_bench/backend/clients/zilliz_cloud/cli.py +55 -0
- vectordb_bench/backend/runner/mp_runner.py +14 -3
- vectordb_bench/backend/runner/serial_runner.py +7 -3
- vectordb_bench/backend/task_runner.py +76 -26
- vectordb_bench/cli/__init__.py +0 -0
- vectordb_bench/cli/cli.py +362 -0
- vectordb_bench/cli/vectordbbench.py +20 -0
- vectordb_bench/config-files/sample_config.yml +17 -0
- vectordb_bench/frontend/components/check_results/data.py +11 -8
- vectordb_bench/frontend/components/concurrent/charts.py +82 -0
- vectordb_bench/frontend/components/run_test/dbSelector.py +7 -1
- vectordb_bench/frontend/components/run_test/submitTask.py +12 -4
- vectordb_bench/frontend/components/tables/data.py +44 -0
- vectordb_bench/frontend/const/dbCaseConfigs.py +2 -1
- vectordb_bench/frontend/pages/concurrent.py +72 -0
- vectordb_bench/frontend/pages/tables.py +24 -0
- vectordb_bench/interface.py +21 -25
- vectordb_bench/metric.py +23 -1
- vectordb_bench/models.py +45 -5
- {vectordb_bench-0.0.10.dist-info → vectordb_bench-0.0.11.dist-info}/METADATA +193 -2
- {vectordb_bench-0.0.10.dist-info → vectordb_bench-0.0.11.dist-info}/RECORD +39 -23
- {vectordb_bench-0.0.10.dist-info → vectordb_bench-0.0.11.dist-info}/WHEEL +1 -1
- {vectordb_bench-0.0.10.dist-info → vectordb_bench-0.0.11.dist-info}/entry_points.txt +1 -0
- {vectordb_bench-0.0.10.dist-info → vectordb_bench-0.0.11.dist-info}/LICENSE +0 -0
- {vectordb_bench-0.0.10.dist-info → vectordb_bench-0.0.11.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,362 @@
|
|
1
|
+
import logging
|
2
|
+
import time
|
3
|
+
from concurrent.futures import wait
|
4
|
+
from datetime import datetime
|
5
|
+
from pprint import pformat
|
6
|
+
from typing import (
|
7
|
+
Annotated,
|
8
|
+
Callable,
|
9
|
+
List,
|
10
|
+
Optional,
|
11
|
+
Type,
|
12
|
+
TypedDict,
|
13
|
+
Unpack,
|
14
|
+
get_origin,
|
15
|
+
get_type_hints,
|
16
|
+
Dict,
|
17
|
+
Any,
|
18
|
+
)
|
19
|
+
import click
|
20
|
+
from .. import config
|
21
|
+
from ..backend.clients import DB
|
22
|
+
from ..interface import benchMarkRunner, global_result_future
|
23
|
+
from ..models import (
|
24
|
+
CaseConfig,
|
25
|
+
CaseType,
|
26
|
+
ConcurrencySearchConfig,
|
27
|
+
DBCaseConfig,
|
28
|
+
DBConfig,
|
29
|
+
TaskConfig,
|
30
|
+
TaskStage,
|
31
|
+
)
|
32
|
+
import os
|
33
|
+
from yaml import load
|
34
|
+
try:
|
35
|
+
from yaml import CLoader as Loader
|
36
|
+
except ImportError:
|
37
|
+
from yaml import Loader
|
38
|
+
|
39
|
+
|
40
|
+
def click_get_defaults_from_file(ctx, param, value):
|
41
|
+
if value:
|
42
|
+
if os.path.exists(value):
|
43
|
+
input_file = value
|
44
|
+
else:
|
45
|
+
input_file = os.path.join(config.CONFIG_LOCAL_DIR, value)
|
46
|
+
try:
|
47
|
+
with open(input_file, 'r') as f:
|
48
|
+
_config: Dict[str, Dict[str, Any]] = load(f.read(), Loader=Loader)
|
49
|
+
ctx.default_map = _config.get(ctx.command.name, {})
|
50
|
+
except Exception as e:
|
51
|
+
raise click.BadParameter(f"Failed to load config file: {e}")
|
52
|
+
return value
|
53
|
+
|
54
|
+
|
55
|
+
def click_parameter_decorators_from_typed_dict(
|
56
|
+
typed_dict: Type,
|
57
|
+
) -> Callable[[click.decorators.FC], click.decorators.FC]:
|
58
|
+
"""A convenience method decorator that will read in a TypedDict with parameters defined by Annotated types.
|
59
|
+
from .models import CaseConfig, CaseType, DBCaseConfig, DBConfig, TaskConfig, TaskStage
|
60
|
+
The click.options will be collected and re-composed as a single decorator to apply to the click.command.
|
61
|
+
|
62
|
+
Args:
|
63
|
+
typed_dict (TypedDict) with Annotated[..., click.option()] keys
|
64
|
+
|
65
|
+
Returns:
|
66
|
+
a fully decorated method
|
67
|
+
|
68
|
+
|
69
|
+
For clarity, the key names of the TypedDict will be used to determine the type hints for the input parameters.
|
70
|
+
The actual function parameters are controlled by the click.option definitions. You must manually ensure these are aligned in a sensible way!
|
71
|
+
|
72
|
+
Example:
|
73
|
+
```
|
74
|
+
class CommonTypedDict(TypedDict):
|
75
|
+
z: Annotated[int, click.option("--z/--no-z", is_flag=True, type=bool, help="help z", default=True, show_default=True)]
|
76
|
+
name: Annotated[str, click.argument("name", required=False, default="Jeff")]
|
77
|
+
|
78
|
+
class FooTypedDict(CommonTypedDict):
|
79
|
+
x: Annotated[int, click.option("--x", type=int, help="help x", default=1, show_default=True)]
|
80
|
+
y: Annotated[str, click.option("--y", type=str, help="help y", default="foo", show_default=True)]
|
81
|
+
|
82
|
+
@cli.command()
|
83
|
+
@click_parameter_decorators_from_typed_dict(FooTypedDict)
|
84
|
+
def foo(**parameters: Unpack[FooTypedDict]):
|
85
|
+
"Foo docstring"
|
86
|
+
print(f"input parameters: {parameters["x"]}")
|
87
|
+
```
|
88
|
+
"""
|
89
|
+
decorators = []
|
90
|
+
for _, t in get_type_hints(typed_dict, include_extras=True).items():
|
91
|
+
assert get_origin(t) is Annotated
|
92
|
+
if (
|
93
|
+
len(t.__metadata__) == 1
|
94
|
+
and t.__metadata__[0].__module__ == "click.decorators"
|
95
|
+
):
|
96
|
+
# happy path -- only accept Annotated[..., Union[click.option,click.argument,...]] with no additional metadata defined (len=1)
|
97
|
+
decorators.append(t.__metadata__[0])
|
98
|
+
else:
|
99
|
+
raise RuntimeError(
|
100
|
+
"Click-TypedDict decorator parsing must only contain root type and a click decorator like click.option. See docstring"
|
101
|
+
)
|
102
|
+
|
103
|
+
def deco(f):
|
104
|
+
for dec in reversed(decorators):
|
105
|
+
f = dec(f)
|
106
|
+
return f
|
107
|
+
|
108
|
+
return deco
|
109
|
+
|
110
|
+
|
111
|
+
def click_arg_split(ctx: click.Context, param: click.core.Option, value):
|
112
|
+
"""Will split a comma-separated list input into an actual list.
|
113
|
+
|
114
|
+
Args:
|
115
|
+
ctx (...): unused click arg
|
116
|
+
param (...): unused click arg
|
117
|
+
value (str): input comma-separated list
|
118
|
+
|
119
|
+
Returns:
|
120
|
+
value (List[str]): list of original
|
121
|
+
"""
|
122
|
+
# split columns by ',' and remove whitespace
|
123
|
+
if value is None:
|
124
|
+
return []
|
125
|
+
return [c.strip() for c in value.split(",") if c.strip()]
|
126
|
+
|
127
|
+
|
128
|
+
def parse_task_stages(
|
129
|
+
drop_old: bool,
|
130
|
+
load: bool,
|
131
|
+
search_serial: bool,
|
132
|
+
search_concurrent: bool,
|
133
|
+
) -> List[TaskStage]:
|
134
|
+
stages = []
|
135
|
+
if load and not drop_old:
|
136
|
+
raise RuntimeError("Dropping old data cannot be skipped if loading data")
|
137
|
+
elif drop_old and not load:
|
138
|
+
raise RuntimeError("Load cannot be skipped if dropping old data")
|
139
|
+
if drop_old:
|
140
|
+
stages.append(TaskStage.DROP_OLD)
|
141
|
+
if load:
|
142
|
+
stages.append(TaskStage.LOAD)
|
143
|
+
if search_serial:
|
144
|
+
stages.append(TaskStage.SEARCH_SERIAL)
|
145
|
+
if search_concurrent:
|
146
|
+
stages.append(TaskStage.SEARCH_CONCURRENT)
|
147
|
+
return stages
|
148
|
+
|
149
|
+
|
150
|
+
log = logging.getLogger(__name__)
|
151
|
+
|
152
|
+
|
153
|
+
class CommonTypedDict(TypedDict):
|
154
|
+
config_file: Annotated[
|
155
|
+
bool,
|
156
|
+
click.option('--config-file',
|
157
|
+
type=click.Path(),
|
158
|
+
callback=click_get_defaults_from_file,
|
159
|
+
is_eager=True,
|
160
|
+
expose_value=False,
|
161
|
+
help='Read configuration from yaml file'),
|
162
|
+
]
|
163
|
+
drop_old: Annotated[
|
164
|
+
bool,
|
165
|
+
click.option(
|
166
|
+
"--drop-old/--skip-drop-old",
|
167
|
+
type=bool,
|
168
|
+
default=True,
|
169
|
+
help="Drop old or skip",
|
170
|
+
show_default=True,
|
171
|
+
),
|
172
|
+
]
|
173
|
+
load: Annotated[
|
174
|
+
bool,
|
175
|
+
click.option(
|
176
|
+
"--load/--skip-load",
|
177
|
+
type=bool,
|
178
|
+
default=True,
|
179
|
+
help="Load or skip",
|
180
|
+
show_default=True,
|
181
|
+
),
|
182
|
+
]
|
183
|
+
search_serial: Annotated[
|
184
|
+
bool,
|
185
|
+
click.option(
|
186
|
+
"--search-serial/--skip-search-serial",
|
187
|
+
type=bool,
|
188
|
+
default=True,
|
189
|
+
help="Search serial or skip",
|
190
|
+
show_default=True,
|
191
|
+
),
|
192
|
+
]
|
193
|
+
search_concurrent: Annotated[
|
194
|
+
bool,
|
195
|
+
click.option(
|
196
|
+
"--search-concurrent/--skip-search-concurrent",
|
197
|
+
type=bool,
|
198
|
+
default=True,
|
199
|
+
help="Search concurrent or skip",
|
200
|
+
show_default=True,
|
201
|
+
),
|
202
|
+
]
|
203
|
+
case_type: Annotated[
|
204
|
+
str,
|
205
|
+
click.option(
|
206
|
+
"--case-type",
|
207
|
+
type=click.Choice([ct.name for ct in CaseType if ct.name != "Custom"]),
|
208
|
+
default="Performance1536D50K",
|
209
|
+
help="Case type",
|
210
|
+
),
|
211
|
+
]
|
212
|
+
db_label: Annotated[
|
213
|
+
str,
|
214
|
+
click.option(
|
215
|
+
"--db-label", type=str, help="Db label, default: date in ISO format",
|
216
|
+
show_default=True,
|
217
|
+
default=datetime.now().isoformat()
|
218
|
+
),
|
219
|
+
]
|
220
|
+
dry_run: Annotated[
|
221
|
+
bool,
|
222
|
+
click.option(
|
223
|
+
"--dry-run",
|
224
|
+
type=bool,
|
225
|
+
default=False,
|
226
|
+
is_flag=True,
|
227
|
+
help="Print just the configuration and exit without running the tasks",
|
228
|
+
),
|
229
|
+
]
|
230
|
+
k: Annotated[
|
231
|
+
int,
|
232
|
+
click.option(
|
233
|
+
"--k",
|
234
|
+
type=int,
|
235
|
+
default=config.K_DEFAULT,
|
236
|
+
show_default=True,
|
237
|
+
help="K value for number of nearest neighbors to search",
|
238
|
+
),
|
239
|
+
]
|
240
|
+
concurrency_duration: Annotated[
|
241
|
+
int,
|
242
|
+
click.option(
|
243
|
+
"--concurrency-duration",
|
244
|
+
type=int,
|
245
|
+
default=config.CONCURRENCY_DURATION,
|
246
|
+
show_default=True,
|
247
|
+
help="Adjusts the duration in seconds of each concurrency search",
|
248
|
+
),
|
249
|
+
]
|
250
|
+
num_concurrency: Annotated[
|
251
|
+
List[str],
|
252
|
+
click.option(
|
253
|
+
"--num-concurrency",
|
254
|
+
type=str,
|
255
|
+
help="Comma-separated list of concurrency values to test during concurrent search",
|
256
|
+
show_default=True,
|
257
|
+
default=",".join(map(str, config.NUM_CONCURRENCY)),
|
258
|
+
callback=lambda *args: list(map(int, click_arg_split(*args))),
|
259
|
+
),
|
260
|
+
]
|
261
|
+
|
262
|
+
|
263
|
+
class HNSWBaseTypedDict(TypedDict):
|
264
|
+
m: Annotated[Optional[int], click.option("--m", type=int, help="hnsw m")]
|
265
|
+
ef_construction: Annotated[
|
266
|
+
Optional[int],
|
267
|
+
click.option("--ef-construction", type=int, help="hnsw ef-construction"),
|
268
|
+
]
|
269
|
+
|
270
|
+
|
271
|
+
class HNSWBaseRequiredTypedDict(TypedDict):
|
272
|
+
m: Annotated[Optional[int], click.option("--m", type=int, help="hnsw m", required=True)]
|
273
|
+
ef_construction: Annotated[
|
274
|
+
Optional[int],
|
275
|
+
click.option("--ef-construction", type=int, help="hnsw ef-construction", required=True),
|
276
|
+
]
|
277
|
+
|
278
|
+
|
279
|
+
class HNSWFlavor1(HNSWBaseTypedDict):
|
280
|
+
ef_search: Annotated[
|
281
|
+
Optional[int], click.option("--ef-search", type=int, help="hnsw ef-search")
|
282
|
+
]
|
283
|
+
|
284
|
+
|
285
|
+
class HNSWFlavor2(HNSWBaseTypedDict):
|
286
|
+
ef_runtime: Annotated[
|
287
|
+
Optional[int], click.option("--ef-runtime", type=int, help="hnsw ef-runtime")
|
288
|
+
]
|
289
|
+
|
290
|
+
|
291
|
+
class HNSWFlavor3(HNSWBaseRequiredTypedDict):
|
292
|
+
ef_search: Annotated[
|
293
|
+
Optional[int], click.option("--ef-search", type=int, help="hnsw ef-search", required=True)
|
294
|
+
]
|
295
|
+
|
296
|
+
|
297
|
+
class IVFFlatTypedDict(TypedDict):
|
298
|
+
lists: Annotated[
|
299
|
+
Optional[int], click.option("--lists", type=int, help="ivfflat lists")
|
300
|
+
]
|
301
|
+
probes: Annotated[
|
302
|
+
Optional[int], click.option("--probes", type=int, help="ivfflat probes")
|
303
|
+
]
|
304
|
+
|
305
|
+
|
306
|
+
class IVFFlatTypedDictN(TypedDict):
|
307
|
+
nlist: Annotated[
|
308
|
+
Optional[int], click.option("--lists", "nlist", type=int, help="ivfflat lists", required=True)
|
309
|
+
]
|
310
|
+
nprobe: Annotated[
|
311
|
+
Optional[int], click.option("--probes", "nprobe", type=int, help="ivfflat probes", required=True)
|
312
|
+
]
|
313
|
+
|
314
|
+
|
315
|
+
@click.group()
|
316
|
+
def cli():
|
317
|
+
...
|
318
|
+
|
319
|
+
|
320
|
+
def run(
|
321
|
+
db: DB,
|
322
|
+
db_config: DBConfig,
|
323
|
+
db_case_config: DBCaseConfig,
|
324
|
+
**parameters: Unpack[CommonTypedDict],
|
325
|
+
):
|
326
|
+
"""Builds a single VectorDBBench Task and runs it, awaiting the task until finished.
|
327
|
+
|
328
|
+
Args:
|
329
|
+
db (DB)
|
330
|
+
db_config (DBConfig)
|
331
|
+
db_case_config (DBCaseConfig)
|
332
|
+
**parameters: expects keys from CommonTypedDict
|
333
|
+
"""
|
334
|
+
|
335
|
+
task = TaskConfig(
|
336
|
+
db=db,
|
337
|
+
db_config=db_config,
|
338
|
+
db_case_config=db_case_config,
|
339
|
+
case_config=CaseConfig(
|
340
|
+
case_id=CaseType[parameters["case_type"]],
|
341
|
+
k=parameters["k"],
|
342
|
+
concurrency_search_config=ConcurrencySearchConfig(
|
343
|
+
concurrency_duration=parameters["concurrency_duration"],
|
344
|
+
num_concurrency=[int(s) for s in parameters["num_concurrency"]],
|
345
|
+
),
|
346
|
+
),
|
347
|
+
stages=parse_task_stages(
|
348
|
+
(
|
349
|
+
False if not parameters["load"] else parameters["drop_old"]
|
350
|
+
), # only drop old data if loading new data
|
351
|
+
parameters["load"],
|
352
|
+
parameters["search_serial"],
|
353
|
+
parameters["search_concurrent"],
|
354
|
+
),
|
355
|
+
)
|
356
|
+
|
357
|
+
log.info(f"Task:\n{pformat(task)}\n")
|
358
|
+
if not parameters["dry_run"]:
|
359
|
+
benchMarkRunner.run([task])
|
360
|
+
time.sleep(5)
|
361
|
+
if global_result_future:
|
362
|
+
wait([global_result_future])
|
@@ -0,0 +1,20 @@
|
|
1
|
+
from ..backend.clients.pgvector.cli import PgVectorHNSW
|
2
|
+
from ..backend.clients.redis.cli import Redis
|
3
|
+
from ..backend.clients.test.cli import Test
|
4
|
+
from ..backend.clients.weaviate_cloud.cli import Weaviate
|
5
|
+
from ..backend.clients.zilliz_cloud.cli import ZillizAutoIndex
|
6
|
+
from ..backend.clients.milvus.cli import MilvusAutoIndex
|
7
|
+
|
8
|
+
|
9
|
+
from .cli import cli
|
10
|
+
|
11
|
+
cli.add_command(PgVectorHNSW)
|
12
|
+
cli.add_command(Redis)
|
13
|
+
cli.add_command(Weaviate)
|
14
|
+
cli.add_command(Test)
|
15
|
+
cli.add_command(ZillizAutoIndex)
|
16
|
+
cli.add_command(MilvusAutoIndex)
|
17
|
+
|
18
|
+
|
19
|
+
if __name__ == "__main__":
|
20
|
+
cli()
|
@@ -0,0 +1,17 @@
|
|
1
|
+
pgvectorhnsw:
|
2
|
+
db_label: pgConfigTest
|
3
|
+
user_name: vectordbbench
|
4
|
+
db_name: vectordbbench
|
5
|
+
host: localhost
|
6
|
+
m: 16
|
7
|
+
ef_construction: 128
|
8
|
+
ef_search: 128
|
9
|
+
milvushnsw:
|
10
|
+
skip_search_serial: True
|
11
|
+
case_type: Performance1536D50K
|
12
|
+
uri: http://localhost:19530
|
13
|
+
m: 16
|
14
|
+
ef_construction: 128
|
15
|
+
ef_search: 128
|
16
|
+
drop_old: False
|
17
|
+
load: False
|
@@ -87,15 +87,18 @@ def mergeMetrics(metrics_1: dict, metrics_2: dict) -> dict:
|
|
87
87
|
|
88
88
|
|
89
89
|
def getBetterMetric(metric, value_1, value_2):
|
90
|
-
|
91
|
-
|
92
|
-
|
90
|
+
try:
|
91
|
+
if value_1 < 1e-7:
|
92
|
+
return value_2
|
93
|
+
if value_2 < 1e-7:
|
94
|
+
return value_1
|
95
|
+
return (
|
96
|
+
min(value_1, value_2)
|
97
|
+
if isLowerIsBetterMetric(metric)
|
98
|
+
else max(value_1, value_2)
|
99
|
+
)
|
100
|
+
except Exception:
|
93
101
|
return value_1
|
94
|
-
return (
|
95
|
-
min(value_1, value_2)
|
96
|
-
if isLowerIsBetterMetric(metric)
|
97
|
-
else max(value_1, value_2)
|
98
|
-
)
|
99
102
|
|
100
103
|
|
101
104
|
def getBetterLabel(label_1: ResultLabel, label_2: ResultLabel):
|
@@ -0,0 +1,82 @@
|
|
1
|
+
|
2
|
+
|
3
|
+
from vectordb_bench.backend.cases import Case
|
4
|
+
from vectordb_bench.frontend.components.check_results.expanderStyle import initMainExpanderStyle
|
5
|
+
import plotly.express as px
|
6
|
+
|
7
|
+
from vectordb_bench.frontend.const.styles import COLOR_MAP
|
8
|
+
|
9
|
+
|
10
|
+
def drawChartsByCase(allData, cases: list[Case], st):
|
11
|
+
initMainExpanderStyle(st)
|
12
|
+
for case in cases:
|
13
|
+
chartContainer = st.expander(case.name, True)
|
14
|
+
caseDataList = [
|
15
|
+
data for data in allData if data["case_name"] == case.name]
|
16
|
+
data = [{
|
17
|
+
"conc_num": caseData["conc_num_list"][i],
|
18
|
+
"qps": caseData["conc_qps_list"][i],
|
19
|
+
"latency_p99": caseData["conc_latency_p99_list"][i] * 1000,
|
20
|
+
"db_name": caseData["db_name"],
|
21
|
+
"db": caseData["db"]
|
22
|
+
|
23
|
+
} for caseData in caseDataList for i in range(len(caseData["conc_num_list"]))]
|
24
|
+
drawChart(data, chartContainer)
|
25
|
+
|
26
|
+
|
27
|
+
def getRange(metric, data, padding_multipliers):
|
28
|
+
minV = min([d.get(metric, 0) for d in data])
|
29
|
+
maxV = max([d.get(metric, 0) for d in data])
|
30
|
+
padding = maxV - minV
|
31
|
+
rangeV = [
|
32
|
+
minV - padding * padding_multipliers[0],
|
33
|
+
maxV + padding * padding_multipliers[1],
|
34
|
+
]
|
35
|
+
return rangeV
|
36
|
+
|
37
|
+
|
38
|
+
def drawChart(data, st):
|
39
|
+
if len(data) == 0:
|
40
|
+
return
|
41
|
+
|
42
|
+
x = "latency_p99"
|
43
|
+
xrange = getRange(x, data, [0.05, 0.1])
|
44
|
+
|
45
|
+
y = "qps"
|
46
|
+
yrange = getRange(y, data, [0.2, 0.1])
|
47
|
+
|
48
|
+
color = "db"
|
49
|
+
color_discrete_map = COLOR_MAP
|
50
|
+
color = "db_name"
|
51
|
+
color_discrete_map = None
|
52
|
+
line_group = "db_name"
|
53
|
+
text = "conc_num"
|
54
|
+
|
55
|
+
data.sort(key=lambda a: a["conc_num"])
|
56
|
+
|
57
|
+
fig = px.line(
|
58
|
+
data,
|
59
|
+
x=x,
|
60
|
+
y=y,
|
61
|
+
color=color,
|
62
|
+
color_discrete_map=color_discrete_map,
|
63
|
+
line_group=line_group,
|
64
|
+
text=text,
|
65
|
+
markers=True,
|
66
|
+
# color_discrete_map=color_discrete_map,
|
67
|
+
hover_data={
|
68
|
+
"conc_num": True,
|
69
|
+
},
|
70
|
+
height=720,
|
71
|
+
)
|
72
|
+
fig.update_xaxes(range=xrange, title_text="Latency P99 (ms)")
|
73
|
+
fig.update_yaxes(range=yrange, title_text="QPS")
|
74
|
+
fig.update_traces(textposition="bottom right",
|
75
|
+
texttemplate="conc-%{text:,.4~r}")
|
76
|
+
# fig.update_layout(
|
77
|
+
# margin=dict(l=0, r=0, t=40, b=0, pad=8),
|
78
|
+
# legend=dict(
|
79
|
+
# orientation="h", yanchor="bottom", y=1, xanchor="right", x=1, title=""
|
80
|
+
# ),
|
81
|
+
# )
|
82
|
+
st.plotly_chart(fig, use_container_width=True,)
|
@@ -1,3 +1,5 @@
|
|
1
|
+
from streamlit.runtime.media_file_storage import MediaFileStorageError
|
2
|
+
|
1
3
|
from vectordb_bench.frontend.const.styles import *
|
2
4
|
from vectordb_bench.frontend.const.dbCaseConfigs import DB_LIST
|
3
5
|
|
@@ -30,7 +32,11 @@ def dbSelector(st):
|
|
30
32
|
for i, db in enumerate(DB_LIST):
|
31
33
|
column = dbContainerColumns[i % DB_SELECTOR_COLUMNS]
|
32
34
|
dbIsActived[db] = column.checkbox(db.name)
|
33
|
-
|
35
|
+
try:
|
36
|
+
column.image(DB_TO_ICON.get(db, ""))
|
37
|
+
except MediaFileStorageError as e:
|
38
|
+
column.warning(f"{db.name} image not available")
|
39
|
+
pass
|
34
40
|
activedDbList = [db for db in DB_LIST if dbIsActived[db]]
|
35
41
|
|
36
42
|
return activedDbList
|
@@ -37,22 +37,30 @@ def taskLabelInput(st):
|
|
37
37
|
def advancedSettings(st):
|
38
38
|
container = st.columns([1, 2])
|
39
39
|
index_already_exists = container[0].checkbox("Index already exists", value=False)
|
40
|
-
container[1].caption("if
|
40
|
+
container[1].caption("if selected, inserting and building will be skipped.")
|
41
41
|
|
42
42
|
container = st.columns([1, 2])
|
43
43
|
use_aliyun = container[0].checkbox("Dataset from Aliyun (Shanghai)", value=False)
|
44
44
|
container[1].caption(
|
45
|
-
"if
|
45
|
+
"if selected, the dataset will be downloaded from Aliyun OSS shanghai, default AWS S3 aws-us-west."
|
46
46
|
)
|
47
47
|
|
48
|
-
|
48
|
+
container = st.columns([1, 2])
|
49
|
+
k = container[0].number_input("k",min_value=1, value=100, label_visibility="collapsed")
|
50
|
+
container[1].caption(
|
51
|
+
"K value for number of nearest neighbors to search"
|
52
|
+
)
|
53
|
+
|
54
|
+
return index_already_exists, use_aliyun, k
|
49
55
|
|
50
56
|
|
51
57
|
def controlPanel(st, tasks, taskLabel, isAllValid):
|
52
|
-
index_already_exists, use_aliyun = advancedSettings(st)
|
58
|
+
index_already_exists, use_aliyun, k = advancedSettings(st)
|
53
59
|
|
54
60
|
def runHandler():
|
55
61
|
benchMarkRunner.set_drop_old(not index_already_exists)
|
62
|
+
for task in tasks:
|
63
|
+
task.case_config.k = k
|
56
64
|
benchMarkRunner.set_download_address(use_aliyun)
|
57
65
|
benchMarkRunner.run(tasks, taskLabel)
|
58
66
|
|
@@ -0,0 +1,44 @@
|
|
1
|
+
from dataclasses import asdict
|
2
|
+
from vectordb_bench.backend.cases import CaseType
|
3
|
+
from vectordb_bench.interface import benchMarkRunner
|
4
|
+
from vectordb_bench.models import CaseResult, ResultLabel
|
5
|
+
import pandas as pd
|
6
|
+
|
7
|
+
|
8
|
+
def getNewResults():
|
9
|
+
allResults = benchMarkRunner.get_results()
|
10
|
+
newResults: list[CaseResult] = []
|
11
|
+
|
12
|
+
for res in allResults:
|
13
|
+
results = res.results
|
14
|
+
for result in results:
|
15
|
+
if result.label == ResultLabel.NORMAL:
|
16
|
+
newResults.append(result)
|
17
|
+
|
18
|
+
|
19
|
+
df = pd.DataFrame(formatData(newResults))
|
20
|
+
return df
|
21
|
+
|
22
|
+
|
23
|
+
def formatData(caseResults: list[CaseResult]):
|
24
|
+
data = []
|
25
|
+
for caseResult in caseResults:
|
26
|
+
db = caseResult.task_config.db.value
|
27
|
+
db_label = caseResult.task_config.db_config.db_label
|
28
|
+
case_config = caseResult.task_config.case_config
|
29
|
+
db_case_config = caseResult.task_config.db_case_config
|
30
|
+
case = case_config.case_id.case_cls()
|
31
|
+
filter_rate = case.filter_rate
|
32
|
+
dataset = case.dataset.data.name
|
33
|
+
metrics = asdict(caseResult.metrics)
|
34
|
+
data.append(
|
35
|
+
{
|
36
|
+
"db": db,
|
37
|
+
"db_label": db_label,
|
38
|
+
"case_name": case.name,
|
39
|
+
"dataset": dataset,
|
40
|
+
"filter_rate": filter_rate,
|
41
|
+
**metrics,
|
42
|
+
}
|
43
|
+
)
|
44
|
+
return data
|
@@ -9,7 +9,7 @@ from vectordb_bench.models import CaseConfigParamType
|
|
9
9
|
|
10
10
|
MAX_STREAMLIT_INT = (1 << 53) - 1
|
11
11
|
|
12
|
-
DB_LIST = [d for d in DB]
|
12
|
+
DB_LIST = [d for d in DB if d != DB.Test]
|
13
13
|
|
14
14
|
DIVIDER = "DIVIDER"
|
15
15
|
CASE_LIST_WITH_DIVIDER = [
|
@@ -19,6 +19,7 @@ CASE_LIST_WITH_DIVIDER = [
|
|
19
19
|
DIVIDER,
|
20
20
|
CaseType.Performance1536D5M,
|
21
21
|
CaseType.Performance1536D500K,
|
22
|
+
CaseType.Performance1536D50K,
|
22
23
|
DIVIDER,
|
23
24
|
CaseType.Performance768D10M1P,
|
24
25
|
CaseType.Performance768D1M1P,
|
@@ -0,0 +1,72 @@
|
|
1
|
+
|
2
|
+
|
3
|
+
|
4
|
+
import streamlit as st
|
5
|
+
from vectordb_bench.backend.cases import CaseType
|
6
|
+
from vectordb_bench.frontend.components.check_results.footer import footer
|
7
|
+
from vectordb_bench.frontend.components.check_results.expanderStyle import initMainExpanderStyle
|
8
|
+
from vectordb_bench.frontend.components.check_results.priceTable import priceTable
|
9
|
+
from vectordb_bench.frontend.components.check_results.headerIcon import drawHeaderIcon
|
10
|
+
from vectordb_bench.frontend.components.check_results.nav import NavToResults, NavToRunTest
|
11
|
+
from vectordb_bench.frontend.components.check_results.charts import drawMetricChart
|
12
|
+
from vectordb_bench.frontend.components.check_results.filters import getshownData
|
13
|
+
from vectordb_bench.frontend.components.concurrent.charts import drawChartsByCase
|
14
|
+
from vectordb_bench.frontend.components.get_results.saveAsImage import getResults
|
15
|
+
from vectordb_bench.frontend.const.styles import *
|
16
|
+
from vectordb_bench.interface import benchMarkRunner
|
17
|
+
from vectordb_bench.models import TestResult
|
18
|
+
|
19
|
+
|
20
|
+
def main():
|
21
|
+
# set page config
|
22
|
+
st.set_page_config(
|
23
|
+
page_title="VDBBench Conc Perf",
|
24
|
+
page_icon=FAVICON,
|
25
|
+
layout="wide",
|
26
|
+
# initial_sidebar_state="collapsed",
|
27
|
+
)
|
28
|
+
|
29
|
+
# header
|
30
|
+
drawHeaderIcon(st)
|
31
|
+
|
32
|
+
allResults = benchMarkRunner.get_results()
|
33
|
+
|
34
|
+
def check_conc_data(res: TestResult):
|
35
|
+
case_results = res.results
|
36
|
+
count = 0
|
37
|
+
for case_result in case_results:
|
38
|
+
if len(case_result.metrics.conc_num_list) > 0:
|
39
|
+
count += 1
|
40
|
+
|
41
|
+
return count > 0
|
42
|
+
|
43
|
+
checkedResults = [res for res in allResults if check_conc_data(res)]
|
44
|
+
|
45
|
+
|
46
|
+
st.title("VectorDB Benchmark (Concurrent Performance)")
|
47
|
+
|
48
|
+
# results selector
|
49
|
+
resultSelectorContainer = st.sidebar.container()
|
50
|
+
shownData, _, showCases = getshownData(
|
51
|
+
checkedResults, resultSelectorContainer)
|
52
|
+
|
53
|
+
|
54
|
+
resultSelectorContainer.divider()
|
55
|
+
|
56
|
+
# nav
|
57
|
+
navContainer = st.sidebar.container()
|
58
|
+
NavToRunTest(navContainer)
|
59
|
+
NavToResults(navContainer)
|
60
|
+
|
61
|
+
# save or share
|
62
|
+
resultesContainer = st.sidebar.container()
|
63
|
+
getResults(resultesContainer, "vectordb_bench_concurrent")
|
64
|
+
|
65
|
+
drawChartsByCase(shownData, showCases, st.container())
|
66
|
+
|
67
|
+
# footer
|
68
|
+
footer(st.container())
|
69
|
+
|
70
|
+
|
71
|
+
if __name__ == "__main__":
|
72
|
+
main()
|