vectordb-bench 0.0.13__py3-none-any.whl → 0.0.15__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. vectordb_bench/__init__.py +14 -13
  2. vectordb_bench/backend/clients/__init__.py +13 -0
  3. vectordb_bench/backend/clients/api.py +2 -0
  4. vectordb_bench/backend/clients/aws_opensearch/aws_opensearch.py +47 -6
  5. vectordb_bench/backend/clients/aws_opensearch/config.py +12 -6
  6. vectordb_bench/backend/clients/aws_opensearch/run.py +34 -3
  7. vectordb_bench/backend/clients/pgdiskann/cli.py +99 -0
  8. vectordb_bench/backend/clients/pgdiskann/config.py +145 -0
  9. vectordb_bench/backend/clients/pgdiskann/pgdiskann.py +350 -0
  10. vectordb_bench/backend/clients/pgvector/cli.py +62 -1
  11. vectordb_bench/backend/clients/pgvector/config.py +48 -10
  12. vectordb_bench/backend/clients/pgvector/pgvector.py +145 -26
  13. vectordb_bench/backend/clients/pgvectorscale/cli.py +108 -0
  14. vectordb_bench/backend/clients/pgvectorscale/pgvectorscale.py +22 -4
  15. vectordb_bench/backend/clients/pinecone/config.py +0 -2
  16. vectordb_bench/backend/clients/pinecone/pinecone.py +34 -36
  17. vectordb_bench/backend/clients/redis/cli.py +8 -0
  18. vectordb_bench/backend/clients/redis/config.py +37 -6
  19. vectordb_bench/backend/clients/weaviate_cloud/weaviate_cloud.py +1 -1
  20. vectordb_bench/backend/runner/mp_runner.py +2 -1
  21. vectordb_bench/cli/cli.py +137 -0
  22. vectordb_bench/cli/vectordbbench.py +4 -1
  23. vectordb_bench/frontend/components/check_results/charts.py +9 -6
  24. vectordb_bench/frontend/components/concurrent/charts.py +3 -6
  25. vectordb_bench/frontend/components/run_test/caseSelector.py +6 -0
  26. vectordb_bench/frontend/config/dbCaseConfigs.py +165 -1
  27. vectordb_bench/frontend/pages/quries_per_dollar.py +13 -5
  28. vectordb_bench/frontend/vdb_benchmark.py +11 -3
  29. vectordb_bench/models.py +13 -3
  30. vectordb_bench/results/Milvus/result_20230727_standard_milvus.json +53 -1
  31. vectordb_bench/results/Milvus/result_20230808_standard_milvus.json +48 -0
  32. vectordb_bench/results/ZillizCloud/result_20230727_standard_zillizcloud.json +29 -1
  33. vectordb_bench/results/ZillizCloud/result_20230808_standard_zillizcloud.json +24 -0
  34. vectordb_bench/results/ZillizCloud/result_20240105_standard_202401_zillizcloud.json +98 -49
  35. vectordb_bench/results/getLeaderboardData.py +17 -7
  36. vectordb_bench/results/leaderboard.json +1 -1
  37. {vectordb_bench-0.0.13.dist-info → vectordb_bench-0.0.15.dist-info}/METADATA +65 -35
  38. {vectordb_bench-0.0.13.dist-info → vectordb_bench-0.0.15.dist-info}/RECORD +42 -38
  39. {vectordb_bench-0.0.13.dist-info → vectordb_bench-0.0.15.dist-info}/WHEEL +1 -1
  40. {vectordb_bench-0.0.13.dist-info → vectordb_bench-0.0.15.dist-info}/LICENSE +0 -0
  41. {vectordb_bench-0.0.13.dist-info → vectordb_bench-0.0.15.dist-info}/entry_points.txt +0 -0
  42. {vectordb_bench-0.0.13.dist-info → vectordb_bench-0.0.15.dist-info}/top_level.txt +0 -0
vectordb_bench/cli/cli.py CHANGED
@@ -17,6 +17,8 @@ from typing import (
17
17
  Any,
18
18
  )
19
19
  import click
20
+
21
+ from vectordb_bench.backend.clients.api import MetricType
20
22
  from .. import config
21
23
  from ..backend.clients import DB
22
24
  from ..interface import benchMarkRunner, global_result_future
@@ -147,6 +149,37 @@ def parse_task_stages(
147
149
  return stages
148
150
 
149
151
 
152
+ def check_custom_case_parameters(ctx, param, value):
153
+ if ctx.params.get("case_type") == "PerformanceCustomDataset":
154
+ if value is None:
155
+ raise click.BadParameter("Custom case parameters\
156
+ \n--custom-case-name\n--custom-dataset-name\n--custom-dataset-dir\n--custom-dataset-size \
157
+ \n--custom-dataset-dim\n--custom-dataset-file-count\n are required")
158
+ return value
159
+
160
+
161
+ def get_custom_case_config(parameters: dict) -> dict:
162
+ custom_case_config = {}
163
+ if parameters["case_type"] == "PerformanceCustomDataset":
164
+ custom_case_config = {
165
+ "name": parameters["custom_case_name"],
166
+ "description": parameters["custom_case_description"],
167
+ "load_timeout": parameters["custom_case_load_timeout"],
168
+ "optimize_timeout": parameters["custom_case_optimize_timeout"],
169
+ "dataset_config": {
170
+ "name": parameters["custom_dataset_name"],
171
+ "dir": parameters["custom_dataset_dir"],
172
+ "size": parameters["custom_dataset_size"],
173
+ "dim": parameters["custom_dataset_dim"],
174
+ "metric_type": parameters["custom_dataset_metric_type"],
175
+ "file_count": parameters["custom_dataset_file_count"],
176
+ "use_shuffled": parameters["custom_dataset_use_shuffled"],
177
+ "with_gt": parameters["custom_dataset_with_gt"],
178
+ }
179
+ }
180
+ return custom_case_config
181
+
182
+
150
183
  log = logging.getLogger(__name__)
151
184
 
152
185
 
@@ -205,6 +238,7 @@ class CommonTypedDict(TypedDict):
205
238
  click.option(
206
239
  "--case-type",
207
240
  type=click.Choice([ct.name for ct in CaseType if ct.name != "Custom"]),
241
+ is_eager=True,
208
242
  default="Performance1536D50K",
209
243
  help="Case type",
210
244
  ),
@@ -258,6 +292,108 @@ class CommonTypedDict(TypedDict):
258
292
  callback=lambda *args: list(map(int, click_arg_split(*args))),
259
293
  ),
260
294
  ]
295
+ custom_case_name: Annotated[
296
+ str,
297
+ click.option(
298
+ "--custom-case-name",
299
+ help="Custom dataset case name",
300
+ callback=check_custom_case_parameters,
301
+ )
302
+ ]
303
+ custom_case_description: Annotated[
304
+ str,
305
+ click.option(
306
+ "--custom-case-description",
307
+ help="Custom dataset case description",
308
+ default="This is a customized dataset.",
309
+ show_default=True,
310
+ )
311
+ ]
312
+ custom_case_load_timeout: Annotated[
313
+ int,
314
+ click.option(
315
+ "--custom-case-load-timeout",
316
+ help="Custom dataset case load timeout",
317
+ default=36000,
318
+ show_default=True,
319
+ )
320
+ ]
321
+ custom_case_optimize_timeout: Annotated[
322
+ int,
323
+ click.option(
324
+ "--custom-case-optimize-timeout",
325
+ help="Custom dataset case optimize timeout",
326
+ default=36000,
327
+ show_default=True,
328
+ )
329
+ ]
330
+ custom_dataset_name: Annotated[
331
+ str,
332
+ click.option(
333
+ "--custom-dataset-name",
334
+ help="Custom dataset name",
335
+ callback=check_custom_case_parameters,
336
+ ),
337
+ ]
338
+ custom_dataset_dir: Annotated[
339
+ str,
340
+ click.option(
341
+ "--custom-dataset-dir",
342
+ help="Custom dataset directory",
343
+ callback=check_custom_case_parameters,
344
+ ),
345
+ ]
346
+ custom_dataset_size: Annotated[
347
+ int,
348
+ click.option(
349
+ "--custom-dataset-size",
350
+ help="Custom dataset size",
351
+ callback=check_custom_case_parameters,
352
+ ),
353
+ ]
354
+ custom_dataset_dim: Annotated[
355
+ int,
356
+ click.option(
357
+ "--custom-dataset-dim",
358
+ help="Custom dataset dimension",
359
+ callback=check_custom_case_parameters,
360
+ ),
361
+ ]
362
+ custom_dataset_metric_type: Annotated[
363
+ str,
364
+ click.option(
365
+ "--custom-dataset-metric-type",
366
+ help="Custom dataset metric type",
367
+ default=MetricType.COSINE.name,
368
+ show_default=True,
369
+ ),
370
+ ]
371
+ custom_dataset_file_count: Annotated[
372
+ int,
373
+ click.option(
374
+ "--custom-dataset-file-count",
375
+ help="Custom dataset file count",
376
+ callback=check_custom_case_parameters,
377
+ ),
378
+ ]
379
+ custom_dataset_use_shuffled: Annotated[
380
+ bool,
381
+ click.option(
382
+ "--custom-dataset-use-shuffled/--skip-custom-dataset-use-shuffled",
383
+ help="Custom dataset use shuffled",
384
+ default=False,
385
+ show_default=True,
386
+ ),
387
+ ]
388
+ custom_dataset_with_gt: Annotated[
389
+ bool,
390
+ click.option(
391
+ "--custom-dataset-with-gt/--skip-custom-dataset-with-gt",
392
+ help="Custom dataset with ground truth",
393
+ default=True,
394
+ show_default=True,
395
+ ),
396
+ ]
261
397
 
262
398
 
263
399
  class HNSWBaseTypedDict(TypedDict):
@@ -343,6 +479,7 @@ def run(
343
479
  concurrency_duration=parameters["concurrency_duration"],
344
480
  num_concurrency=[int(s) for s in parameters["num_concurrency"]],
345
481
  ),
482
+ custom_case=parameters.get("custom_case", {}),
346
483
  ),
347
484
  stages=parse_task_stages(
348
485
  (
@@ -1,5 +1,7 @@
1
1
  from ..backend.clients.pgvector.cli import PgVectorHNSW
2
2
  from ..backend.clients.pgvecto_rs.cli import PgVectoRSHNSW, PgVectoRSIVFFlat
3
+ from ..backend.clients.pgvectorscale.cli import PgVectorScaleDiskAnn
4
+ from ..backend.clients.pgdiskann.cli import PgDiskAnn
3
5
  from ..backend.clients.redis.cli import Redis
4
6
  from ..backend.clients.memorydb.cli import MemoryDB
5
7
  from ..backend.clients.test.cli import Test
@@ -8,7 +10,6 @@ from ..backend.clients.zilliz_cloud.cli import ZillizAutoIndex
8
10
  from ..backend.clients.milvus.cli import MilvusAutoIndex
9
11
  from ..backend.clients.aws_opensearch.cli import AWSOpenSearch
10
12
 
11
-
12
13
  from .cli import cli
13
14
 
14
15
  cli.add_command(PgVectorHNSW)
@@ -21,6 +22,8 @@ cli.add_command(Test)
21
22
  cli.add_command(ZillizAutoIndex)
22
23
  cli.add_command(MilvusAutoIndex)
23
24
  cli.add_command(AWSOpenSearch)
25
+ cli.add_command(PgVectorScaleDiskAnn)
26
+ cli.add_command(PgDiskAnn)
24
27
 
25
28
 
26
29
  if __name__ == "__main__":
@@ -1,5 +1,7 @@
1
1
  from vectordb_bench.backend.cases import Case
2
- from vectordb_bench.frontend.components.check_results.expanderStyle import initMainExpanderStyle
2
+ from vectordb_bench.frontend.components.check_results.expanderStyle import (
3
+ initMainExpanderStyle,
4
+ )
3
5
  from vectordb_bench.metric import metricOrder, isLowerIsBetterMetric, metricUnitMap
4
6
  from vectordb_bench.frontend.config.styles import *
5
7
  from vectordb_bench.models import ResultLabel
@@ -11,7 +13,7 @@ def drawCharts(st, allData, failedTasks, caseNames: list[str]):
11
13
  for caseName in caseNames:
12
14
  chartContainer = st.expander(caseName, True)
13
15
  data = [data for data in allData if data["case_name"] == caseName]
14
- drawChart(data, chartContainer)
16
+ drawChart(data, chartContainer, key_prefix=caseName)
15
17
 
16
18
  errorDBs = failedTasks[caseName]
17
19
  showFailedDBs(chartContainer, errorDBs)
@@ -35,7 +37,7 @@ def showFailedText(st, text, dbs):
35
37
  )
36
38
 
37
39
 
38
- def drawChart(data, st):
40
+ def drawChart(data, st, key_prefix: str):
39
41
  metricsSet = set()
40
42
  for d in data:
41
43
  metricsSet = metricsSet.union(d["metricsSet"])
@@ -43,7 +45,8 @@ def drawChart(data, st):
43
45
 
44
46
  for i, metric in enumerate(showMetrics):
45
47
  container = st.container()
46
- drawMetricChart(data, metric, container)
48
+ key = f"{key_prefix}-{metric}"
49
+ drawMetricChart(data, metric, container, key=key)
47
50
 
48
51
 
49
52
  def getLabelToShapeMap(data):
@@ -75,7 +78,7 @@ def getLabelToShapeMap(data):
75
78
  return labelToShapeMap
76
79
 
77
80
 
78
- def drawMetricChart(data, metric, st):
81
+ def drawMetricChart(data, metric, st, key: str):
79
82
  dataWithMetric = [d for d in data if d.get(metric, 0) > 1e-7]
80
83
  # dataWithMetric = data
81
84
  if len(dataWithMetric) == 0:
@@ -161,4 +164,4 @@ def drawMetricChart(data, metric, st):
161
164
  ),
162
165
  )
163
166
 
164
- chart.plotly_chart(fig, use_container_width=True)
167
+ chart.plotly_chart(fig, use_container_width=True, key=key)
@@ -22,7 +22,7 @@ def drawChartsByCase(allData, showCaseNames: list[str], st):
22
22
  for caseData in caseDataList
23
23
  for i in range(len(caseData["conc_num_list"]))
24
24
  ]
25
- drawChart(data, chartContainer)
25
+ drawChart(data, chartContainer, key=f"{caseName}-qps-p99")
26
26
 
27
27
 
28
28
  def getRange(metric, data, padding_multipliers):
@@ -36,7 +36,7 @@ def getRange(metric, data, padding_multipliers):
36
36
  return rangeV
37
37
 
38
38
 
39
- def drawChart(data, st):
39
+ def drawChart(data, st, key: str):
40
40
  if len(data) == 0:
41
41
  return
42
42
 
@@ -73,7 +73,4 @@ def drawChart(data, st):
73
73
  fig.update_yaxes(range=yrange, title_text="QPS")
74
74
  fig.update_traces(textposition="bottom right", texttemplate="conc-%{text:,.4~r}")
75
75
 
76
- st.plotly_chart(
77
- fig,
78
- use_container_width=True,
79
- )
76
+ st.plotly_chart(fig, use_container_width=True, key=key)
@@ -110,6 +110,12 @@ def caseConfigSetting(st, dbToCaseClusterConfigs, uiCaseItem: UICaseItem, active
110
110
  value=config.inputConfig["value"],
111
111
  help=config.inputHelp,
112
112
  )
113
+ elif config.inputType == InputType.Bool:
114
+ caseConfig[config.label] = column.checkbox(
115
+ config.displayLabel if config.displayLabel else config.label.value,
116
+ value=config.inputConfig["value"],
117
+ help=config.inputHelp,
118
+ )
113
119
  k += 1
114
120
  if k == 0:
115
121
  columns[1].write("Auto")
@@ -3,7 +3,7 @@ import typing
3
3
  from pydantic import BaseModel
4
4
  from vectordb_bench.backend.cases import CaseLabel, CaseType
5
5
  from vectordb_bench.backend.clients import DB
6
- from vectordb_bench.backend.clients.api import IndexType
6
+ from vectordb_bench.backend.clients.api import IndexType, MetricType
7
7
  from vectordb_bench.frontend.components.custom.getCustomConfig import get_custom_configs
8
8
 
9
9
  from vectordb_bench.models import CaseConfig, CaseConfigParamType
@@ -149,6 +149,7 @@ class InputType(IntEnum):
149
149
  Number = 20002
150
150
  Option = 20003
151
151
  Float = 20004
152
+ Bool = 20005
152
153
 
153
154
 
154
155
  class CaseConfigInput(BaseModel):
@@ -180,6 +181,16 @@ CaseConfigParamInput_IndexType = CaseConfigInput(
180
181
  },
181
182
  )
182
183
 
184
+ CaseConfigParamInput_IndexType_PgDiskANN = CaseConfigInput(
185
+ label=CaseConfigParamType.IndexType,
186
+ inputHelp="Select Index Type",
187
+ inputType=InputType.Option,
188
+ inputConfig={
189
+ "options": [
190
+ IndexType.DISKANN.value,
191
+ ],
192
+ },
193
+ )
183
194
 
184
195
  CaseConfigParamInput_IndexType_PgVectorScale = CaseConfigInput(
185
196
  label=CaseConfigParamType.IndexType,
@@ -205,6 +216,42 @@ CaseConfigParamInput_storage_layout = CaseConfigInput(
205
216
  },
206
217
  )
207
218
 
219
+ CaseConfigParamInput_max_neighbors = CaseConfigInput(
220
+ label=CaseConfigParamType.max_neighbors,
221
+ inputType=InputType.Number,
222
+ inputConfig={
223
+ "min": 10,
224
+ "max": 300,
225
+ "value": 32,
226
+ },
227
+ isDisplayed=lambda config: config.get(CaseConfigParamType.IndexType, None)
228
+ == IndexType.DISKANN.value,
229
+ )
230
+
231
+ CaseConfigParamInput_l_value_ib = CaseConfigInput(
232
+ label=CaseConfigParamType.l_value_ib,
233
+ inputType=InputType.Number,
234
+ inputConfig={
235
+ "min": 10,
236
+ "max": 300,
237
+ "value": 50,
238
+ },
239
+ isDisplayed=lambda config: config.get(CaseConfigParamType.IndexType, None)
240
+ == IndexType.DISKANN.value,
241
+ )
242
+
243
+ CaseConfigParamInput_l_value_is = CaseConfigInput(
244
+ label=CaseConfigParamType.l_value_is,
245
+ inputType=InputType.Number,
246
+ inputConfig={
247
+ "min": 10,
248
+ "max": 300,
249
+ "value": 40,
250
+ },
251
+ isDisplayed=lambda config: config.get(CaseConfigParamType.IndexType, None)
252
+ == IndexType.DISKANN.value,
253
+ )
254
+
208
255
  CaseConfigParamInput_num_neighbors = CaseConfigInput(
209
256
  label=CaseConfigParamType.num_neighbors,
210
257
  inputType=InputType.Number,
@@ -360,6 +407,37 @@ CaseConfigParamInput_EFConstruction_ES = CaseConfigInput(
360
407
  },
361
408
  )
362
409
 
410
+ CaseConfigParamInput_EFConstruction_AWSOpensearch = CaseConfigInput(
411
+ label=CaseConfigParamType.EFConstruction,
412
+ inputType=InputType.Number,
413
+ inputConfig={
414
+ "min": 100,
415
+ "max": 1024,
416
+ "value": 256,
417
+ },
418
+ )
419
+
420
+ CaseConfigParamInput_M_AWSOpensearch = CaseConfigInput(
421
+ label=CaseConfigParamType.M,
422
+ inputType=InputType.Number,
423
+ inputConfig={
424
+ "min": 4,
425
+ "max": 64,
426
+ "value": 16,
427
+ },
428
+ )
429
+
430
+ CaseConfigParamInput_EF_SEARCH_AWSOpensearch = CaseConfigInput(
431
+ label=CaseConfigParamType.ef_search,
432
+ inputType=InputType.Number,
433
+ inputConfig={
434
+ "min": 100,
435
+ "max": 1024,
436
+ "value": 256,
437
+ },
438
+ )
439
+
440
+
363
441
  CaseConfigParamInput_maintenance_work_mem_PgVector = CaseConfigInput(
364
442
  label=CaseConfigParamType.maintenance_work_mem,
365
443
  inputHelp="Recommended value: 1.33x the index size, not to exceed the available free memory."
@@ -738,6 +816,19 @@ CaseConfigParamInput_QuantizationType_PgVectoRS = CaseConfigInput(
738
816
  ],
739
817
  )
740
818
 
819
+ CaseConfigParamInput_QuantizationType_PgVector = CaseConfigInput(
820
+ label=CaseConfigParamType.quantizationType,
821
+ inputType=InputType.Option,
822
+ inputConfig={
823
+ "options": ["none", "bit", "halfvec"],
824
+ },
825
+ isDisplayed=lambda config: config.get(CaseConfigParamType.IndexType, None)
826
+ in [
827
+ IndexType.HNSW.value,
828
+ IndexType.IVFFlat.value,
829
+ ],
830
+ )
831
+
741
832
  CaseConfigParamInput_QuantizationRatio_PgVectoRS = CaseConfigInput(
742
833
  label=CaseConfigParamType.quantizationRatio,
743
834
  inputType=InputType.Option,
@@ -775,6 +866,46 @@ CaseConfigParamInput_ZillizLevel = CaseConfigInput(
775
866
  },
776
867
  )
777
868
 
869
+ CaseConfigParamInput_reranking_PgVector = CaseConfigInput(
870
+ label=CaseConfigParamType.reranking,
871
+ inputType=InputType.Bool,
872
+ displayLabel="Enable Reranking",
873
+ inputHelp="Enable if you want to use reranking while performing \
874
+ similarity search in binary quantization",
875
+ inputConfig={
876
+ "value": False,
877
+ },
878
+ isDisplayed=lambda config: config.get(CaseConfigParamType.quantizationType, None)
879
+ == "bit"
880
+ )
881
+
882
+ CaseConfigParamInput_quantized_fetch_limit_PgVector = CaseConfigInput(
883
+ label=CaseConfigParamType.quantizedFetchLimit,
884
+ displayLabel="Quantized vector fetch limit",
885
+ inputHelp="Limit top-k vectors using the quantized vector comparison --bound by ef_search",
886
+ inputType=InputType.Number,
887
+ inputConfig={
888
+ "min": 20,
889
+ "max": 1000,
890
+ "value": 200,
891
+ },
892
+ isDisplayed=lambda config: config.get(CaseConfigParamType.quantizationType, None)
893
+ == "bit" and config.get(CaseConfigParamType.reranking, False)
894
+ )
895
+
896
+
897
+ CaseConfigParamInput_reranking_metric_PgVector = CaseConfigInput(
898
+ label=CaseConfigParamType.rerankingMetric,
899
+ inputType=InputType.Option,
900
+ inputConfig={
901
+ "options": [
902
+ metric.value for metric in MetricType if metric.value not in ["HAMMING", "JACCARD"]
903
+ ],
904
+ },
905
+ isDisplayed=lambda config: config.get(CaseConfigParamType.quantizationType, None)
906
+ == "bit" and config.get(CaseConfigParamType.reranking, False)
907
+ )
908
+
778
909
  MilvusLoadConfig = [
779
910
  CaseConfigParamInput_IndexType,
780
911
  CaseConfigParamInput_M,
@@ -826,11 +957,19 @@ ESPerformanceConfig = [
826
957
  CaseConfigParamInput_NumCandidates_ES,
827
958
  ]
828
959
 
960
+ AWSOpensearchLoadingConfig = [CaseConfigParamInput_EFConstruction_AWSOpensearch, CaseConfigParamInput_M_AWSOpensearch]
961
+ AWSOpenSearchPerformanceConfig = [
962
+ CaseConfigParamInput_EFConstruction_AWSOpensearch,
963
+ CaseConfigParamInput_M_AWSOpensearch,
964
+ CaseConfigParamInput_EF_SEARCH_AWSOpensearch,
965
+ ]
966
+
829
967
  PgVectorLoadingConfig = [
830
968
  CaseConfigParamInput_IndexType_PgVector,
831
969
  CaseConfigParamInput_Lists_PgVector,
832
970
  CaseConfigParamInput_m,
833
971
  CaseConfigParamInput_EFConstruction_PgVector,
972
+ CaseConfigParamInput_QuantizationType_PgVector,
834
973
  CaseConfigParamInput_maintenance_work_mem_PgVector,
835
974
  CaseConfigParamInput_max_parallel_workers_PgVector,
836
975
  ]
@@ -841,8 +980,12 @@ PgVectorPerformanceConfig = [
841
980
  CaseConfigParamInput_EFSearch_PgVector,
842
981
  CaseConfigParamInput_Lists_PgVector,
843
982
  CaseConfigParamInput_Probes_PgVector,
983
+ CaseConfigParamInput_QuantizationType_PgVector,
844
984
  CaseConfigParamInput_maintenance_work_mem_PgVector,
845
985
  CaseConfigParamInput_max_parallel_workers_PgVector,
986
+ CaseConfigParamInput_reranking_PgVector,
987
+ CaseConfigParamInput_reranking_metric_PgVector,
988
+ CaseConfigParamInput_quantized_fetch_limit_PgVector,
846
989
  ]
847
990
 
848
991
  PgVectoRSLoadingConfig = [
@@ -889,6 +1032,19 @@ PgVectorScalePerformanceConfig = [
889
1032
  CaseConfigParamInput_query_search_list_size,
890
1033
  ]
891
1034
 
1035
+ PgDiskANNLoadConfig = [
1036
+ CaseConfigParamInput_IndexType_PgDiskANN,
1037
+ CaseConfigParamInput_max_neighbors,
1038
+ CaseConfigParamInput_l_value_ib,
1039
+ ]
1040
+
1041
+ PgDiskANNPerformanceConfig = [
1042
+ CaseConfigParamInput_IndexType_PgDiskANN,
1043
+ CaseConfigParamInput_max_neighbors,
1044
+ CaseConfigParamInput_l_value_ib,
1045
+ CaseConfigParamInput_l_value_is,
1046
+ ]
1047
+
892
1048
  CASE_CONFIG_MAP = {
893
1049
  DB.Milvus: {
894
1050
  CaseLabel.Load: MilvusLoadConfig,
@@ -905,6 +1061,10 @@ CASE_CONFIG_MAP = {
905
1061
  CaseLabel.Load: ESLoadingConfig,
906
1062
  CaseLabel.Performance: ESPerformanceConfig,
907
1063
  },
1064
+ DB.AWSOpenSearch: {
1065
+ CaseLabel.Load: AWSOpensearchLoadingConfig,
1066
+ CaseLabel.Performance: AWSOpenSearchPerformanceConfig,
1067
+ },
908
1068
  DB.PgVector: {
909
1069
  CaseLabel.Load: PgVectorLoadingConfig,
910
1070
  CaseLabel.Performance: PgVectorPerformanceConfig,
@@ -917,4 +1077,8 @@ CASE_CONFIG_MAP = {
917
1077
  CaseLabel.Load: PgVectorScaleLoadingConfig,
918
1078
  CaseLabel.Performance: PgVectorScalePerformanceConfig,
919
1079
  },
1080
+ DB.PgDiskANN: {
1081
+ CaseLabel.Load: PgDiskANNLoadConfig,
1082
+ CaseLabel.Performance: PgDiskANNPerformanceConfig,
1083
+ },
920
1084
  }
@@ -1,10 +1,17 @@
1
1
  import streamlit as st
2
2
  from vectordb_bench.frontend.components.check_results.footer import footer
3
- from vectordb_bench.frontend.components.check_results.expanderStyle import initMainExpanderStyle
3
+ from vectordb_bench.frontend.components.check_results.expanderStyle import (
4
+ initMainExpanderStyle,
5
+ )
4
6
  from vectordb_bench.frontend.components.check_results.priceTable import priceTable
5
- from vectordb_bench.frontend.components.check_results.stPageConfig import initResultsPageConfig
7
+ from vectordb_bench.frontend.components.check_results.stPageConfig import (
8
+ initResultsPageConfig,
9
+ )
6
10
  from vectordb_bench.frontend.components.check_results.headerIcon import drawHeaderIcon
7
- from vectordb_bench.frontend.components.check_results.nav import NavToResults, NavToRunTest
11
+ from vectordb_bench.frontend.components.check_results.nav import (
12
+ NavToResults,
13
+ NavToRunTest,
14
+ )
8
15
  from vectordb_bench.frontend.components.check_results.charts import drawMetricChart
9
16
  from vectordb_bench.frontend.components.check_results.filters import getshownData
10
17
  from vectordb_bench.frontend.components.get_results.saveAsImage import getResults
@@ -16,7 +23,7 @@ from vectordb_bench.metric import QURIES_PER_DOLLAR_METRIC
16
23
  def main():
17
24
  # set page config
18
25
  initResultsPageConfig(st)
19
-
26
+
20
27
  # header
21
28
  drawHeaderIcon(st)
22
29
 
@@ -57,7 +64,8 @@ def main():
57
64
  dataWithMetric.append(d)
58
65
  if len(dataWithMetric) > 0:
59
66
  chartContainer = st.expander(caseName, True)
60
- drawMetricChart(data, metric, chartContainer)
67
+ key = f"{caseName}-{metric}"
68
+ drawMetricChart(data, metric, chartContainer, key=key)
61
69
 
62
70
  # footer
63
71
  footer(st.container())
@@ -1,8 +1,13 @@
1
1
  import streamlit as st
2
2
  from vectordb_bench.frontend.components.check_results.footer import footer
3
- from vectordb_bench.frontend.components.check_results.stPageConfig import initResultsPageConfig
3
+ from vectordb_bench.frontend.components.check_results.stPageConfig import (
4
+ initResultsPageConfig,
5
+ )
4
6
  from vectordb_bench.frontend.components.check_results.headerIcon import drawHeaderIcon
5
- from vectordb_bench.frontend.components.check_results.nav import NavToQuriesPerDollar, NavToRunTest
7
+ from vectordb_bench.frontend.components.check_results.nav import (
8
+ NavToQuriesPerDollar,
9
+ NavToRunTest,
10
+ )
6
11
  from vectordb_bench.frontend.components.check_results.charts import drawCharts
7
12
  from vectordb_bench.frontend.components.check_results.filters import getshownData
8
13
  from vectordb_bench.frontend.components.get_results.saveAsImage import getResults
@@ -20,7 +25,10 @@ def main():
20
25
  allResults = benchMarkRunner.get_results()
21
26
 
22
27
  st.title("Vector Database Benchmark")
23
- st.caption("Note that all testing was completed in July 2023, except for the times already noted.")
28
+ st.caption(
29
+ "Except for zillizcloud-v2024.1, which was tested in _January 2024_, all other tests were completed before _August 2023_."
30
+ )
31
+ st.caption("All tested milvus are in _standalone_ mode.")
24
32
 
25
33
  # results selector and filter
26
34
  resultSelectorContainer = st.sidebar.container()
vectordb_bench/models.py CHANGED
@@ -1,6 +1,6 @@
1
1
  import logging
2
2
  import pathlib
3
- from datetime import date
3
+ from datetime import date, datetime
4
4
  from enum import Enum, StrEnum, auto
5
5
  from typing import List, Self
6
6
 
@@ -47,6 +47,9 @@ class CaseConfigParamType(Enum):
47
47
  probes = "probes"
48
48
  quantizationType = "quantization_type"
49
49
  quantizationRatio = "quantization_ratio"
50
+ reranking = "reranking"
51
+ rerankingMetric = "reranking_metric"
52
+ quantizedFetchLimit = "quantized_fetch_limit"
50
53
  m = "m"
51
54
  nbits = "nbits"
52
55
  intermediate_graph_degree = "intermediate_graph_degree"
@@ -64,6 +67,9 @@ class CaseConfigParamType(Enum):
64
67
  max_parallel_workers = "max_parallel_workers"
65
68
  storage_layout = "storage_layout"
66
69
  num_neighbors = "num_neighbors"
70
+ max_neighbors = "max_neighbors"
71
+ l_value_ib = "l_value_ib"
72
+ l_value_is = "l_value_is"
67
73
  search_list_size = "search_list_size"
68
74
  max_alpha = "max_alpha"
69
75
  num_dimensions = "num_dimensions"
@@ -163,16 +169,20 @@ class TestResult(BaseModel):
163
169
  results: list[CaseResult]
164
170
 
165
171
  file_fmt: str = "result_{}_{}_{}.json" # result_20230718_statndard_milvus.json
172
+ timestamp: float = 0.0
166
173
 
167
174
  def flush(self):
168
175
  db2case = self.get_db_results()
169
-
176
+ timestamp = datetime.combine(date.today(), datetime.min.time()).timestamp()
170
177
  result_root = config.RESULTS_LOCAL_DIR
171
178
  for db, result in db2case.items():
172
179
  self.write_db_file(
173
180
  result_dir=result_root.joinpath(db.value),
174
181
  partial=TestResult(
175
- run_id=self.run_id, task_label=self.task_label, results=result
182
+ run_id=self.run_id,
183
+ task_label=self.task_label,
184
+ results=result,
185
+ timestamp=timestamp,
176
186
  ),
177
187
  db=db.value.lower(),
178
188
  )