vectordb-bench 0.0.9__py3-none-any.whl → 0.0.11__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. vectordb_bench/__init__.py +22 -9
  2. vectordb_bench/backend/cases.py +32 -12
  3. vectordb_bench/backend/clients/__init__.py +1 -0
  4. vectordb_bench/backend/clients/api.py +1 -1
  5. vectordb_bench/backend/clients/milvus/cli.py +291 -0
  6. vectordb_bench/backend/clients/milvus/config.py +6 -2
  7. vectordb_bench/backend/clients/milvus/milvus.py +16 -5
  8. vectordb_bench/backend/clients/pgvector/cli.py +116 -0
  9. vectordb_bench/backend/clients/pgvector/config.py +1 -1
  10. vectordb_bench/backend/clients/pgvector/pgvector.py +7 -4
  11. vectordb_bench/backend/clients/redis/cli.py +74 -0
  12. vectordb_bench/backend/clients/test/cli.py +25 -0
  13. vectordb_bench/backend/clients/test/config.py +18 -0
  14. vectordb_bench/backend/clients/test/test.py +62 -0
  15. vectordb_bench/backend/clients/weaviate_cloud/cli.py +41 -0
  16. vectordb_bench/backend/clients/zilliz_cloud/cli.py +55 -0
  17. vectordb_bench/backend/runner/mp_runner.py +14 -3
  18. vectordb_bench/backend/runner/serial_runner.py +7 -3
  19. vectordb_bench/backend/task_runner.py +76 -26
  20. vectordb_bench/cli/__init__.py +0 -0
  21. vectordb_bench/cli/cli.py +362 -0
  22. vectordb_bench/cli/vectordbbench.py +20 -0
  23. vectordb_bench/config-files/sample_config.yml +17 -0
  24. vectordb_bench/frontend/components/check_results/data.py +11 -8
  25. vectordb_bench/frontend/components/concurrent/charts.py +82 -0
  26. vectordb_bench/frontend/components/run_test/dbSelector.py +7 -1
  27. vectordb_bench/frontend/components/run_test/submitTask.py +12 -4
  28. vectordb_bench/frontend/components/tables/data.py +44 -0
  29. vectordb_bench/frontend/const/dbCaseConfigs.py +2 -1
  30. vectordb_bench/frontend/pages/concurrent.py +72 -0
  31. vectordb_bench/frontend/pages/tables.py +24 -0
  32. vectordb_bench/interface.py +21 -25
  33. vectordb_bench/metric.py +23 -1
  34. vectordb_bench/models.py +45 -5
  35. {vectordb_bench-0.0.9.dist-info → vectordb_bench-0.0.11.dist-info}/METADATA +193 -2
  36. {vectordb_bench-0.0.9.dist-info → vectordb_bench-0.0.11.dist-info}/RECORD +40 -24
  37. {vectordb_bench-0.0.9.dist-info → vectordb_bench-0.0.11.dist-info}/WHEEL +1 -1
  38. {vectordb_bench-0.0.9.dist-info → vectordb_bench-0.0.11.dist-info}/entry_points.txt +1 -0
  39. {vectordb_bench-0.0.9.dist-info → vectordb_bench-0.0.11.dist-info}/LICENSE +0 -0
  40. {vectordb_bench-0.0.9.dist-info → vectordb_bench-0.0.11.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,362 @@
1
+ import logging
2
+ import time
3
+ from concurrent.futures import wait
4
+ from datetime import datetime
5
+ from pprint import pformat
6
+ from typing import (
7
+ Annotated,
8
+ Callable,
9
+ List,
10
+ Optional,
11
+ Type,
12
+ TypedDict,
13
+ Unpack,
14
+ get_origin,
15
+ get_type_hints,
16
+ Dict,
17
+ Any,
18
+ )
19
+ import click
20
+ from .. import config
21
+ from ..backend.clients import DB
22
+ from ..interface import benchMarkRunner, global_result_future
23
+ from ..models import (
24
+ CaseConfig,
25
+ CaseType,
26
+ ConcurrencySearchConfig,
27
+ DBCaseConfig,
28
+ DBConfig,
29
+ TaskConfig,
30
+ TaskStage,
31
+ )
32
+ import os
33
+ from yaml import load
34
+ try:
35
+ from yaml import CLoader as Loader
36
+ except ImportError:
37
+ from yaml import Loader
38
+
39
+
40
+ def click_get_defaults_from_file(ctx, param, value):
41
+ if value:
42
+ if os.path.exists(value):
43
+ input_file = value
44
+ else:
45
+ input_file = os.path.join(config.CONFIG_LOCAL_DIR, value)
46
+ try:
47
+ with open(input_file, 'r') as f:
48
+ _config: Dict[str, Dict[str, Any]] = load(f.read(), Loader=Loader)
49
+ ctx.default_map = _config.get(ctx.command.name, {})
50
+ except Exception as e:
51
+ raise click.BadParameter(f"Failed to load config file: {e}")
52
+ return value
53
+
54
+
55
+ def click_parameter_decorators_from_typed_dict(
56
+ typed_dict: Type,
57
+ ) -> Callable[[click.decorators.FC], click.decorators.FC]:
58
+ """A convenience method decorator that will read in a TypedDict with parameters defined by Annotated types.
59
+ from .models import CaseConfig, CaseType, DBCaseConfig, DBConfig, TaskConfig, TaskStage
60
+ The click.options will be collected and re-composed as a single decorator to apply to the click.command.
61
+
62
+ Args:
63
+ typed_dict (TypedDict) with Annotated[..., click.option()] keys
64
+
65
+ Returns:
66
+ a fully decorated method
67
+
68
+
69
+ For clarity, the key names of the TypedDict will be used to determine the type hints for the input parameters.
70
+ The actual function parameters are controlled by the click.option definitions. You must manually ensure these are aligned in a sensible way!
71
+
72
+ Example:
73
+ ```
74
+ class CommonTypedDict(TypedDict):
75
+ z: Annotated[int, click.option("--z/--no-z", is_flag=True, type=bool, help="help z", default=True, show_default=True)]
76
+ name: Annotated[str, click.argument("name", required=False, default="Jeff")]
77
+
78
+ class FooTypedDict(CommonTypedDict):
79
+ x: Annotated[int, click.option("--x", type=int, help="help x", default=1, show_default=True)]
80
+ y: Annotated[str, click.option("--y", type=str, help="help y", default="foo", show_default=True)]
81
+
82
+ @cli.command()
83
+ @click_parameter_decorators_from_typed_dict(FooTypedDict)
84
+ def foo(**parameters: Unpack[FooTypedDict]):
85
+ "Foo docstring"
86
+ print(f"input parameters: {parameters["x"]}")
87
+ ```
88
+ """
89
+ decorators = []
90
+ for _, t in get_type_hints(typed_dict, include_extras=True).items():
91
+ assert get_origin(t) is Annotated
92
+ if (
93
+ len(t.__metadata__) == 1
94
+ and t.__metadata__[0].__module__ == "click.decorators"
95
+ ):
96
+ # happy path -- only accept Annotated[..., Union[click.option,click.argument,...]] with no additional metadata defined (len=1)
97
+ decorators.append(t.__metadata__[0])
98
+ else:
99
+ raise RuntimeError(
100
+ "Click-TypedDict decorator parsing must only contain root type and a click decorator like click.option. See docstring"
101
+ )
102
+
103
+ def deco(f):
104
+ for dec in reversed(decorators):
105
+ f = dec(f)
106
+ return f
107
+
108
+ return deco
109
+
110
+
111
+ def click_arg_split(ctx: click.Context, param: click.core.Option, value):
112
+ """Will split a comma-separated list input into an actual list.
113
+
114
+ Args:
115
+ ctx (...): unused click arg
116
+ param (...): unused click arg
117
+ value (str): input comma-separated list
118
+
119
+ Returns:
120
+ value (List[str]): list of original
121
+ """
122
+ # split columns by ',' and remove whitespace
123
+ if value is None:
124
+ return []
125
+ return [c.strip() for c in value.split(",") if c.strip()]
126
+
127
+
128
+ def parse_task_stages(
129
+ drop_old: bool,
130
+ load: bool,
131
+ search_serial: bool,
132
+ search_concurrent: bool,
133
+ ) -> List[TaskStage]:
134
+ stages = []
135
+ if load and not drop_old:
136
+ raise RuntimeError("Dropping old data cannot be skipped if loading data")
137
+ elif drop_old and not load:
138
+ raise RuntimeError("Load cannot be skipped if dropping old data")
139
+ if drop_old:
140
+ stages.append(TaskStage.DROP_OLD)
141
+ if load:
142
+ stages.append(TaskStage.LOAD)
143
+ if search_serial:
144
+ stages.append(TaskStage.SEARCH_SERIAL)
145
+ if search_concurrent:
146
+ stages.append(TaskStage.SEARCH_CONCURRENT)
147
+ return stages
148
+
149
+
150
+ log = logging.getLogger(__name__)
151
+
152
+
153
+ class CommonTypedDict(TypedDict):
154
+ config_file: Annotated[
155
+ bool,
156
+ click.option('--config-file',
157
+ type=click.Path(),
158
+ callback=click_get_defaults_from_file,
159
+ is_eager=True,
160
+ expose_value=False,
161
+ help='Read configuration from yaml file'),
162
+ ]
163
+ drop_old: Annotated[
164
+ bool,
165
+ click.option(
166
+ "--drop-old/--skip-drop-old",
167
+ type=bool,
168
+ default=True,
169
+ help="Drop old or skip",
170
+ show_default=True,
171
+ ),
172
+ ]
173
+ load: Annotated[
174
+ bool,
175
+ click.option(
176
+ "--load/--skip-load",
177
+ type=bool,
178
+ default=True,
179
+ help="Load or skip",
180
+ show_default=True,
181
+ ),
182
+ ]
183
+ search_serial: Annotated[
184
+ bool,
185
+ click.option(
186
+ "--search-serial/--skip-search-serial",
187
+ type=bool,
188
+ default=True,
189
+ help="Search serial or skip",
190
+ show_default=True,
191
+ ),
192
+ ]
193
+ search_concurrent: Annotated[
194
+ bool,
195
+ click.option(
196
+ "--search-concurrent/--skip-search-concurrent",
197
+ type=bool,
198
+ default=True,
199
+ help="Search concurrent or skip",
200
+ show_default=True,
201
+ ),
202
+ ]
203
+ case_type: Annotated[
204
+ str,
205
+ click.option(
206
+ "--case-type",
207
+ type=click.Choice([ct.name for ct in CaseType if ct.name != "Custom"]),
208
+ default="Performance1536D50K",
209
+ help="Case type",
210
+ ),
211
+ ]
212
+ db_label: Annotated[
213
+ str,
214
+ click.option(
215
+ "--db-label", type=str, help="Db label, default: date in ISO format",
216
+ show_default=True,
217
+ default=datetime.now().isoformat()
218
+ ),
219
+ ]
220
+ dry_run: Annotated[
221
+ bool,
222
+ click.option(
223
+ "--dry-run",
224
+ type=bool,
225
+ default=False,
226
+ is_flag=True,
227
+ help="Print just the configuration and exit without running the tasks",
228
+ ),
229
+ ]
230
+ k: Annotated[
231
+ int,
232
+ click.option(
233
+ "--k",
234
+ type=int,
235
+ default=config.K_DEFAULT,
236
+ show_default=True,
237
+ help="K value for number of nearest neighbors to search",
238
+ ),
239
+ ]
240
+ concurrency_duration: Annotated[
241
+ int,
242
+ click.option(
243
+ "--concurrency-duration",
244
+ type=int,
245
+ default=config.CONCURRENCY_DURATION,
246
+ show_default=True,
247
+ help="Adjusts the duration in seconds of each concurrency search",
248
+ ),
249
+ ]
250
+ num_concurrency: Annotated[
251
+ List[str],
252
+ click.option(
253
+ "--num-concurrency",
254
+ type=str,
255
+ help="Comma-separated list of concurrency values to test during concurrent search",
256
+ show_default=True,
257
+ default=",".join(map(str, config.NUM_CONCURRENCY)),
258
+ callback=lambda *args: list(map(int, click_arg_split(*args))),
259
+ ),
260
+ ]
261
+
262
+
263
+ class HNSWBaseTypedDict(TypedDict):
264
+ m: Annotated[Optional[int], click.option("--m", type=int, help="hnsw m")]
265
+ ef_construction: Annotated[
266
+ Optional[int],
267
+ click.option("--ef-construction", type=int, help="hnsw ef-construction"),
268
+ ]
269
+
270
+
271
+ class HNSWBaseRequiredTypedDict(TypedDict):
272
+ m: Annotated[Optional[int], click.option("--m", type=int, help="hnsw m", required=True)]
273
+ ef_construction: Annotated[
274
+ Optional[int],
275
+ click.option("--ef-construction", type=int, help="hnsw ef-construction", required=True),
276
+ ]
277
+
278
+
279
+ class HNSWFlavor1(HNSWBaseTypedDict):
280
+ ef_search: Annotated[
281
+ Optional[int], click.option("--ef-search", type=int, help="hnsw ef-search")
282
+ ]
283
+
284
+
285
+ class HNSWFlavor2(HNSWBaseTypedDict):
286
+ ef_runtime: Annotated[
287
+ Optional[int], click.option("--ef-runtime", type=int, help="hnsw ef-runtime")
288
+ ]
289
+
290
+
291
+ class HNSWFlavor3(HNSWBaseRequiredTypedDict):
292
+ ef_search: Annotated[
293
+ Optional[int], click.option("--ef-search", type=int, help="hnsw ef-search", required=True)
294
+ ]
295
+
296
+
297
+ class IVFFlatTypedDict(TypedDict):
298
+ lists: Annotated[
299
+ Optional[int], click.option("--lists", type=int, help="ivfflat lists")
300
+ ]
301
+ probes: Annotated[
302
+ Optional[int], click.option("--probes", type=int, help="ivfflat probes")
303
+ ]
304
+
305
+
306
+ class IVFFlatTypedDictN(TypedDict):
307
+ nlist: Annotated[
308
+ Optional[int], click.option("--lists", "nlist", type=int, help="ivfflat lists", required=True)
309
+ ]
310
+ nprobe: Annotated[
311
+ Optional[int], click.option("--probes", "nprobe", type=int, help="ivfflat probes", required=True)
312
+ ]
313
+
314
+
315
+ @click.group()
316
+ def cli():
317
+ ...
318
+
319
+
320
+ def run(
321
+ db: DB,
322
+ db_config: DBConfig,
323
+ db_case_config: DBCaseConfig,
324
+ **parameters: Unpack[CommonTypedDict],
325
+ ):
326
+ """Builds a single VectorDBBench Task and runs it, awaiting the task until finished.
327
+
328
+ Args:
329
+ db (DB)
330
+ db_config (DBConfig)
331
+ db_case_config (DBCaseConfig)
332
+ **parameters: expects keys from CommonTypedDict
333
+ """
334
+
335
+ task = TaskConfig(
336
+ db=db,
337
+ db_config=db_config,
338
+ db_case_config=db_case_config,
339
+ case_config=CaseConfig(
340
+ case_id=CaseType[parameters["case_type"]],
341
+ k=parameters["k"],
342
+ concurrency_search_config=ConcurrencySearchConfig(
343
+ concurrency_duration=parameters["concurrency_duration"],
344
+ num_concurrency=[int(s) for s in parameters["num_concurrency"]],
345
+ ),
346
+ ),
347
+ stages=parse_task_stages(
348
+ (
349
+ False if not parameters["load"] else parameters["drop_old"]
350
+ ), # only drop old data if loading new data
351
+ parameters["load"],
352
+ parameters["search_serial"],
353
+ parameters["search_concurrent"],
354
+ ),
355
+ )
356
+
357
+ log.info(f"Task:\n{pformat(task)}\n")
358
+ if not parameters["dry_run"]:
359
+ benchMarkRunner.run([task])
360
+ time.sleep(5)
361
+ if global_result_future:
362
+ wait([global_result_future])
@@ -0,0 +1,20 @@
1
+ from ..backend.clients.pgvector.cli import PgVectorHNSW
2
+ from ..backend.clients.redis.cli import Redis
3
+ from ..backend.clients.test.cli import Test
4
+ from ..backend.clients.weaviate_cloud.cli import Weaviate
5
+ from ..backend.clients.zilliz_cloud.cli import ZillizAutoIndex
6
+ from ..backend.clients.milvus.cli import MilvusAutoIndex
7
+
8
+
9
+ from .cli import cli
10
+
11
+ cli.add_command(PgVectorHNSW)
12
+ cli.add_command(Redis)
13
+ cli.add_command(Weaviate)
14
+ cli.add_command(Test)
15
+ cli.add_command(ZillizAutoIndex)
16
+ cli.add_command(MilvusAutoIndex)
17
+
18
+
19
+ if __name__ == "__main__":
20
+ cli()
@@ -0,0 +1,17 @@
1
+ pgvectorhnsw:
2
+ db_label: pgConfigTest
3
+ user_name: vectordbbench
4
+ db_name: vectordbbench
5
+ host: localhost
6
+ m: 16
7
+ ef_construction: 128
8
+ ef_search: 128
9
+ milvushnsw:
10
+ skip_search_serial: True
11
+ case_type: Performance1536D50K
12
+ uri: http://localhost:19530
13
+ m: 16
14
+ ef_construction: 128
15
+ ef_search: 128
16
+ drop_old: False
17
+ load: False
@@ -87,15 +87,18 @@ def mergeMetrics(metrics_1: dict, metrics_2: dict) -> dict:
87
87
 
88
88
 
89
89
  def getBetterMetric(metric, value_1, value_2):
90
- if value_1 < 1e-7:
91
- return value_2
92
- if value_2 < 1e-7:
90
+ try:
91
+ if value_1 < 1e-7:
92
+ return value_2
93
+ if value_2 < 1e-7:
94
+ return value_1
95
+ return (
96
+ min(value_1, value_2)
97
+ if isLowerIsBetterMetric(metric)
98
+ else max(value_1, value_2)
99
+ )
100
+ except Exception:
93
101
  return value_1
94
- return (
95
- min(value_1, value_2)
96
- if isLowerIsBetterMetric(metric)
97
- else max(value_1, value_2)
98
- )
99
102
 
100
103
 
101
104
  def getBetterLabel(label_1: ResultLabel, label_2: ResultLabel):
@@ -0,0 +1,82 @@
1
+
2
+
3
+ from vectordb_bench.backend.cases import Case
4
+ from vectordb_bench.frontend.components.check_results.expanderStyle import initMainExpanderStyle
5
+ import plotly.express as px
6
+
7
+ from vectordb_bench.frontend.const.styles import COLOR_MAP
8
+
9
+
10
+ def drawChartsByCase(allData, cases: list[Case], st):
11
+ initMainExpanderStyle(st)
12
+ for case in cases:
13
+ chartContainer = st.expander(case.name, True)
14
+ caseDataList = [
15
+ data for data in allData if data["case_name"] == case.name]
16
+ data = [{
17
+ "conc_num": caseData["conc_num_list"][i],
18
+ "qps": caseData["conc_qps_list"][i],
19
+ "latency_p99": caseData["conc_latency_p99_list"][i] * 1000,
20
+ "db_name": caseData["db_name"],
21
+ "db": caseData["db"]
22
+
23
+ } for caseData in caseDataList for i in range(len(caseData["conc_num_list"]))]
24
+ drawChart(data, chartContainer)
25
+
26
+
27
+ def getRange(metric, data, padding_multipliers):
28
+ minV = min([d.get(metric, 0) for d in data])
29
+ maxV = max([d.get(metric, 0) for d in data])
30
+ padding = maxV - minV
31
+ rangeV = [
32
+ minV - padding * padding_multipliers[0],
33
+ maxV + padding * padding_multipliers[1],
34
+ ]
35
+ return rangeV
36
+
37
+
38
+ def drawChart(data, st):
39
+ if len(data) == 0:
40
+ return
41
+
42
+ x = "latency_p99"
43
+ xrange = getRange(x, data, [0.05, 0.1])
44
+
45
+ y = "qps"
46
+ yrange = getRange(y, data, [0.2, 0.1])
47
+
48
+ color = "db"
49
+ color_discrete_map = COLOR_MAP
50
+ color = "db_name"
51
+ color_discrete_map = None
52
+ line_group = "db_name"
53
+ text = "conc_num"
54
+
55
+ data.sort(key=lambda a: a["conc_num"])
56
+
57
+ fig = px.line(
58
+ data,
59
+ x=x,
60
+ y=y,
61
+ color=color,
62
+ color_discrete_map=color_discrete_map,
63
+ line_group=line_group,
64
+ text=text,
65
+ markers=True,
66
+ # color_discrete_map=color_discrete_map,
67
+ hover_data={
68
+ "conc_num": True,
69
+ },
70
+ height=720,
71
+ )
72
+ fig.update_xaxes(range=xrange, title_text="Latency P99 (ms)")
73
+ fig.update_yaxes(range=yrange, title_text="QPS")
74
+ fig.update_traces(textposition="bottom right",
75
+ texttemplate="conc-%{text:,.4~r}")
76
+ # fig.update_layout(
77
+ # margin=dict(l=0, r=0, t=40, b=0, pad=8),
78
+ # legend=dict(
79
+ # orientation="h", yanchor="bottom", y=1, xanchor="right", x=1, title=""
80
+ # ),
81
+ # )
82
+ st.plotly_chart(fig, use_container_width=True,)
@@ -1,3 +1,5 @@
1
+ from streamlit.runtime.media_file_storage import MediaFileStorageError
2
+
1
3
  from vectordb_bench.frontend.const.styles import *
2
4
  from vectordb_bench.frontend.const.dbCaseConfigs import DB_LIST
3
5
 
@@ -30,7 +32,11 @@ def dbSelector(st):
30
32
  for i, db in enumerate(DB_LIST):
31
33
  column = dbContainerColumns[i % DB_SELECTOR_COLUMNS]
32
34
  dbIsActived[db] = column.checkbox(db.name)
33
- column.image(DB_TO_ICON.get(db, ""))
35
+ try:
36
+ column.image(DB_TO_ICON.get(db, ""))
37
+ except MediaFileStorageError as e:
38
+ column.warning(f"{db.name} image not available")
39
+ pass
34
40
  activedDbList = [db for db in DB_LIST if dbIsActived[db]]
35
41
 
36
42
  return activedDbList
@@ -37,22 +37,30 @@ def taskLabelInput(st):
37
37
  def advancedSettings(st):
38
38
  container = st.columns([1, 2])
39
39
  index_already_exists = container[0].checkbox("Index already exists", value=False)
40
- container[1].caption("if actived, inserting and building will be skipped.")
40
+ container[1].caption("if selected, inserting and building will be skipped.")
41
41
 
42
42
  container = st.columns([1, 2])
43
43
  use_aliyun = container[0].checkbox("Dataset from Aliyun (Shanghai)", value=False)
44
44
  container[1].caption(
45
- "if actived, the dataset will be downloaded from Aliyun OSS shanghai, default AWS S3 aws-us-west."
45
+ "if selected, the dataset will be downloaded from Aliyun OSS shanghai, default AWS S3 aws-us-west."
46
46
  )
47
47
 
48
- return index_already_exists, use_aliyun
48
+ container = st.columns([1, 2])
49
+ k = container[0].number_input("k",min_value=1, value=100, label_visibility="collapsed")
50
+ container[1].caption(
51
+ "K value for number of nearest neighbors to search"
52
+ )
53
+
54
+ return index_already_exists, use_aliyun, k
49
55
 
50
56
 
51
57
  def controlPanel(st, tasks, taskLabel, isAllValid):
52
- index_already_exists, use_aliyun = advancedSettings(st)
58
+ index_already_exists, use_aliyun, k = advancedSettings(st)
53
59
 
54
60
  def runHandler():
55
61
  benchMarkRunner.set_drop_old(not index_already_exists)
62
+ for task in tasks:
63
+ task.case_config.k = k
56
64
  benchMarkRunner.set_download_address(use_aliyun)
57
65
  benchMarkRunner.run(tasks, taskLabel)
58
66
 
@@ -0,0 +1,44 @@
1
+ from dataclasses import asdict
2
+ from vectordb_bench.backend.cases import CaseType
3
+ from vectordb_bench.interface import benchMarkRunner
4
+ from vectordb_bench.models import CaseResult, ResultLabel
5
+ import pandas as pd
6
+
7
+
8
+ def getNewResults():
9
+ allResults = benchMarkRunner.get_results()
10
+ newResults: list[CaseResult] = []
11
+
12
+ for res in allResults:
13
+ results = res.results
14
+ for result in results:
15
+ if result.label == ResultLabel.NORMAL:
16
+ newResults.append(result)
17
+
18
+
19
+ df = pd.DataFrame(formatData(newResults))
20
+ return df
21
+
22
+
23
+ def formatData(caseResults: list[CaseResult]):
24
+ data = []
25
+ for caseResult in caseResults:
26
+ db = caseResult.task_config.db.value
27
+ db_label = caseResult.task_config.db_config.db_label
28
+ case_config = caseResult.task_config.case_config
29
+ db_case_config = caseResult.task_config.db_case_config
30
+ case = case_config.case_id.case_cls()
31
+ filter_rate = case.filter_rate
32
+ dataset = case.dataset.data.name
33
+ metrics = asdict(caseResult.metrics)
34
+ data.append(
35
+ {
36
+ "db": db,
37
+ "db_label": db_label,
38
+ "case_name": case.name,
39
+ "dataset": dataset,
40
+ "filter_rate": filter_rate,
41
+ **metrics,
42
+ }
43
+ )
44
+ return data
@@ -9,7 +9,7 @@ from vectordb_bench.models import CaseConfigParamType
9
9
 
10
10
  MAX_STREAMLIT_INT = (1 << 53) - 1
11
11
 
12
- DB_LIST = [d for d in DB]
12
+ DB_LIST = [d for d in DB if d != DB.Test]
13
13
 
14
14
  DIVIDER = "DIVIDER"
15
15
  CASE_LIST_WITH_DIVIDER = [
@@ -19,6 +19,7 @@ CASE_LIST_WITH_DIVIDER = [
19
19
  DIVIDER,
20
20
  CaseType.Performance1536D5M,
21
21
  CaseType.Performance1536D500K,
22
+ CaseType.Performance1536D50K,
22
23
  DIVIDER,
23
24
  CaseType.Performance768D10M1P,
24
25
  CaseType.Performance768D1M1P,
@@ -0,0 +1,72 @@
1
+
2
+
3
+
4
+ import streamlit as st
5
+ from vectordb_bench.backend.cases import CaseType
6
+ from vectordb_bench.frontend.components.check_results.footer import footer
7
+ from vectordb_bench.frontend.components.check_results.expanderStyle import initMainExpanderStyle
8
+ from vectordb_bench.frontend.components.check_results.priceTable import priceTable
9
+ from vectordb_bench.frontend.components.check_results.headerIcon import drawHeaderIcon
10
+ from vectordb_bench.frontend.components.check_results.nav import NavToResults, NavToRunTest
11
+ from vectordb_bench.frontend.components.check_results.charts import drawMetricChart
12
+ from vectordb_bench.frontend.components.check_results.filters import getshownData
13
+ from vectordb_bench.frontend.components.concurrent.charts import drawChartsByCase
14
+ from vectordb_bench.frontend.components.get_results.saveAsImage import getResults
15
+ from vectordb_bench.frontend.const.styles import *
16
+ from vectordb_bench.interface import benchMarkRunner
17
+ from vectordb_bench.models import TestResult
18
+
19
+
20
+ def main():
21
+ # set page config
22
+ st.set_page_config(
23
+ page_title="VDBBench Conc Perf",
24
+ page_icon=FAVICON,
25
+ layout="wide",
26
+ # initial_sidebar_state="collapsed",
27
+ )
28
+
29
+ # header
30
+ drawHeaderIcon(st)
31
+
32
+ allResults = benchMarkRunner.get_results()
33
+
34
+ def check_conc_data(res: TestResult):
35
+ case_results = res.results
36
+ count = 0
37
+ for case_result in case_results:
38
+ if len(case_result.metrics.conc_num_list) > 0:
39
+ count += 1
40
+
41
+ return count > 0
42
+
43
+ checkedResults = [res for res in allResults if check_conc_data(res)]
44
+
45
+
46
+ st.title("VectorDB Benchmark (Concurrent Performance)")
47
+
48
+ # results selector
49
+ resultSelectorContainer = st.sidebar.container()
50
+ shownData, _, showCases = getshownData(
51
+ checkedResults, resultSelectorContainer)
52
+
53
+
54
+ resultSelectorContainer.divider()
55
+
56
+ # nav
57
+ navContainer = st.sidebar.container()
58
+ NavToRunTest(navContainer)
59
+ NavToResults(navContainer)
60
+
61
+ # save or share
62
+ resultesContainer = st.sidebar.container()
63
+ getResults(resultesContainer, "vectordb_bench_concurrent")
64
+
65
+ drawChartsByCase(shownData, showCases, st.container())
66
+
67
+ # footer
68
+ footer(st.container())
69
+
70
+
71
+ if __name__ == "__main__":
72
+ main()