vectordb-bench 1.0.4__py3-none-any.whl → 1.0.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (77) hide show
  1. vectordb_bench/__init__.py +1 -0
  2. vectordb_bench/backend/cases.py +45 -1
  3. vectordb_bench/backend/clients/__init__.py +47 -0
  4. vectordb_bench/backend/clients/api.py +2 -0
  5. vectordb_bench/backend/clients/aws_opensearch/aws_opensearch.py +104 -40
  6. vectordb_bench/backend/clients/aws_opensearch/cli.py +52 -15
  7. vectordb_bench/backend/clients/aws_opensearch/config.py +27 -7
  8. vectordb_bench/backend/clients/hologres/cli.py +50 -0
  9. vectordb_bench/backend/clients/hologres/config.py +121 -0
  10. vectordb_bench/backend/clients/hologres/hologres.py +365 -0
  11. vectordb_bench/backend/clients/lancedb/lancedb.py +1 -0
  12. vectordb_bench/backend/clients/milvus/cli.py +29 -9
  13. vectordb_bench/backend/clients/milvus/config.py +2 -0
  14. vectordb_bench/backend/clients/milvus/milvus.py +1 -1
  15. vectordb_bench/backend/clients/oceanbase/cli.py +1 -0
  16. vectordb_bench/backend/clients/oceanbase/config.py +3 -1
  17. vectordb_bench/backend/clients/oceanbase/oceanbase.py +20 -4
  18. vectordb_bench/backend/clients/oss_opensearch/cli.py +155 -0
  19. vectordb_bench/backend/clients/oss_opensearch/config.py +157 -0
  20. vectordb_bench/backend/clients/oss_opensearch/oss_opensearch.py +582 -0
  21. vectordb_bench/backend/clients/oss_opensearch/run.py +166 -0
  22. vectordb_bench/backend/clients/pgdiskann/cli.py +45 -0
  23. vectordb_bench/backend/clients/pgdiskann/config.py +16 -0
  24. vectordb_bench/backend/clients/pgdiskann/pgdiskann.py +94 -26
  25. vectordb_bench/backend/clients/s3_vectors/config.py +41 -0
  26. vectordb_bench/backend/clients/s3_vectors/s3_vectors.py +171 -0
  27. vectordb_bench/backend/clients/tidb/cli.py +0 -4
  28. vectordb_bench/backend/clients/tidb/config.py +22 -2
  29. vectordb_bench/backend/clients/zilliz_cloud/cli.py +14 -1
  30. vectordb_bench/backend/clients/zilliz_cloud/config.py +4 -1
  31. vectordb_bench/backend/dataset.py +70 -0
  32. vectordb_bench/backend/filter.py +17 -0
  33. vectordb_bench/backend/runner/mp_runner.py +4 -0
  34. vectordb_bench/backend/runner/rate_runner.py +23 -11
  35. vectordb_bench/backend/runner/read_write_runner.py +10 -9
  36. vectordb_bench/backend/runner/serial_runner.py +23 -7
  37. vectordb_bench/backend/task_runner.py +5 -4
  38. vectordb_bench/cli/cli.py +36 -0
  39. vectordb_bench/cli/vectordbbench.py +4 -0
  40. vectordb_bench/fig/custom_case_run_test.png +0 -0
  41. vectordb_bench/fig/custom_dataset.png +0 -0
  42. vectordb_bench/fig/homepage/bar-chart.png +0 -0
  43. vectordb_bench/fig/homepage/concurrent.png +0 -0
  44. vectordb_bench/fig/homepage/custom.png +0 -0
  45. vectordb_bench/fig/homepage/label_filter.png +0 -0
  46. vectordb_bench/fig/homepage/qp$.png +0 -0
  47. vectordb_bench/fig/homepage/run_test.png +0 -0
  48. vectordb_bench/fig/homepage/streaming.png +0 -0
  49. vectordb_bench/fig/homepage/table.png +0 -0
  50. vectordb_bench/fig/run_test_select_case.png +0 -0
  51. vectordb_bench/fig/run_test_select_db.png +0 -0
  52. vectordb_bench/fig/run_test_submit.png +0 -0
  53. vectordb_bench/frontend/components/check_results/filters.py +1 -4
  54. vectordb_bench/frontend/components/check_results/nav.py +2 -1
  55. vectordb_bench/frontend/components/concurrent/charts.py +5 -0
  56. vectordb_bench/frontend/components/int_filter/charts.py +60 -0
  57. vectordb_bench/frontend/components/streaming/data.py +7 -0
  58. vectordb_bench/frontend/components/welcome/welcomePrams.py +42 -4
  59. vectordb_bench/frontend/config/dbCaseConfigs.py +142 -16
  60. vectordb_bench/frontend/config/styles.py +4 -0
  61. vectordb_bench/frontend/pages/concurrent.py +1 -1
  62. vectordb_bench/frontend/pages/custom.py +1 -1
  63. vectordb_bench/frontend/pages/int_filter.py +56 -0
  64. vectordb_bench/frontend/pages/streaming.py +16 -3
  65. vectordb_bench/interface.py +5 -1
  66. vectordb_bench/metric.py +7 -0
  67. vectordb_bench/models.py +39 -4
  68. vectordb_bench/results/S3Vectors/result_20250722_standard_s3vectors.json +2509 -0
  69. vectordb_bench/results/getLeaderboardDataV2.py +23 -2
  70. vectordb_bench/results/leaderboard_v2.json +200 -0
  71. vectordb_bench/results/leaderboard_v2_streaming.json +128 -0
  72. {vectordb_bench-1.0.4.dist-info → vectordb_bench-1.0.7.dist-info}/METADATA +40 -8
  73. {vectordb_bench-1.0.4.dist-info → vectordb_bench-1.0.7.dist-info}/RECORD +77 -51
  74. {vectordb_bench-1.0.4.dist-info → vectordb_bench-1.0.7.dist-info}/WHEEL +0 -0
  75. {vectordb_bench-1.0.4.dist-info → vectordb_bench-1.0.7.dist-info}/entry_points.txt +0 -0
  76. {vectordb_bench-1.0.4.dist-info → vectordb_bench-1.0.7.dist-info}/licenses/LICENSE +0 -0
  77. {vectordb_bench-1.0.4.dist-info → vectordb_bench-1.0.7.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,60 @@
1
+ import plotly.express as px
2
+ from vectordb_bench.metric import metric_unit_map
3
+
4
+
5
+ def drawCharts(st, allData, **kwargs):
6
+ dataset_names = list(set([data["dataset_name"] for data in allData]))
7
+ dataset_names.sort()
8
+ for dataset_name in dataset_names:
9
+ container = st.container()
10
+ container.subheader(dataset_name)
11
+ data = [d for d in allData if d["dataset_name"] == dataset_name]
12
+ drawChartByMetric(container, data, **kwargs)
13
+
14
+
15
+ def drawChartByMetric(st, data, metrics=("qps", "recall"), **kwargs):
16
+ columns = st.columns(len(metrics))
17
+ for i, metric in enumerate(metrics):
18
+ container = columns[i]
19
+ container.markdown(f"#### {metric}")
20
+ drawChart(container, data, metric)
21
+
22
+
23
+ def getRange(metric, data, padding_multipliers):
24
+ minV = min([d.get(metric, 0) for d in data])
25
+ maxV = max([d.get(metric, 0) for d in data])
26
+ padding = maxV - minV
27
+ rangeV = [
28
+ minV - padding * padding_multipliers[0],
29
+ maxV + padding * padding_multipliers[1],
30
+ ]
31
+ return rangeV
32
+
33
+
34
+ def drawChart(st, data: list[object], metric):
35
+ unit = metric_unit_map.get(metric, "")
36
+ x = "filter_rate"
37
+ xrange = getRange(x, data, [0.05, 0.1])
38
+
39
+ y = metric
40
+ yrange = getRange(y, data, [0.2, 0.1])
41
+
42
+ data.sort(key=lambda a: a[x])
43
+
44
+ fig = px.line(
45
+ data,
46
+ x=x,
47
+ y=y,
48
+ color="db_name",
49
+ line_group="db_name",
50
+ text=metric,
51
+ markers=True,
52
+ )
53
+ fig.update_xaxes(range=xrange)
54
+ fig.update_yaxes(range=yrange)
55
+ fig.update_traces(textposition="bottom right", texttemplate="%{y:,.4~r}" + unit)
56
+ fig.update_layout(
57
+ margin=dict(l=0, r=0, t=40, b=0, pad=8),
58
+ legend=dict(orientation="h", yanchor="bottom", y=1, xanchor="right", x=1, title=""),
59
+ )
60
+ st.plotly_chart(fig, use_container_width=True)
@@ -13,6 +13,7 @@ class DisplayedMetric(StrEnum):
13
13
  adjusted_recall = "adjusted_recall"
14
14
  adjusted_ndcg = "adjusted_ndcg"
15
15
  latency_p99 = "latency_p99"
16
+ latency_p95 = "latency_p95"
16
17
  # st_ideal_insert_duration = "st_ideal_insert_duration"
17
18
  # st_search_time_list = "st_search_time_list"
18
19
  insert_duration = "insert_duration"
@@ -31,6 +32,7 @@ class StreamingData:
31
32
  adjusted_recall: float
32
33
  adjusted_ndcg: float
33
34
  latency_p99: float
35
+ latency_p95: float
34
36
  ideal_insert_duration: int
35
37
  insert_duration: float
36
38
  optimize_duration: float
@@ -53,6 +55,11 @@ def get_streaming_data(data) -> list[StreamingData]:
53
55
  adjusted_recall=round(d["st_recall_list"][i] / min(search_stage, 100) * 100, 4),
54
56
  adjusted_ndcg=round(d["st_ndcg_list"][i] / min(search_stage, 100) * 100, 4),
55
57
  latency_p99=round(d["st_serial_latency_p99_list"][i] * 1000, 2),
58
+ latency_p95=(
59
+ round(d["st_serial_latency_p95_list"][i] * 1000, 2)
60
+ if "st_serial_latency_p95_list" in d and i < len(d["st_serial_latency_p95_list"])
61
+ else 0.0
62
+ ),
56
63
  ideal_insert_duration=d["st_ideal_insert_duration"],
57
64
  insert_duration=d["insert_duration"],
58
65
  optimize_duration=d["optimize_duration"],
@@ -2,6 +2,8 @@ import base64
2
2
  from PIL import Image
3
3
  from io import BytesIO
4
4
  import os
5
+ from pathlib import Path
6
+ from importlib import resources
5
7
 
6
8
  from vectordb_bench.frontend.components.welcome.pagestyle import pagestyle
7
9
 
@@ -11,12 +13,38 @@ def get_image_as_base64(image_path):
11
13
  if image_path.startswith("http"):
12
14
  return image_path
13
15
 
16
+ # Try to load from package resources first (for pip installed package)
17
+ if image_path.startswith("fig/homepage/"):
18
+ try:
19
+ # Convert fig/homepage/xxx.png to vectordb_bench.fig.homepage
20
+ package_parts = ["vectordb_bench"] + image_path.split("/")[:-1]
21
+ package_name = ".".join(package_parts)
22
+ file_name = os.path.basename(image_path)
23
+
24
+ # Get the resource content using importlib.resources
25
+ files = resources.files(package_name)
26
+ img_data = (files / file_name).read_bytes()
27
+
28
+ img = Image.open(BytesIO(img_data))
29
+ buffered = BytesIO()
30
+ img.save(buffered, format="PNG")
31
+ return f"data:image/png;base64,{base64.b64encode(buffered.getvalue()).decode()}"
32
+ except Exception:
33
+ # If package resource fails, try the original path
34
+ pass
35
+
36
+ # Fallback to file system path (for development)
14
37
  path = os.path.expanduser(image_path)
38
+ if not os.path.isabs(path):
39
+ # Try relative to the vectordb_bench package directory
40
+ package_dir = Path(__file__).parent.parent.parent
41
+ path = package_dir / path
42
+
15
43
  img = Image.open(path)
16
44
  buffered = BytesIO()
17
45
  img.save(buffered, format="PNG")
18
46
  return f"data:image/png;base64,{base64.b64encode(buffered.getvalue()).decode()}"
19
- except Exception as e:
47
+ except Exception:
20
48
  return None
21
49
 
22
50
 
@@ -66,12 +94,22 @@ def welcomePrams(st):
66
94
  "title": "Label Filter Performance",
67
95
  "description": (
68
96
  "<span style='font-size: 17px;'>"
69
- "To view the perfomance of datasets under different filter ratios "
97
+ "To view the perfomance of datasets under different label filter ratios "
70
98
  "</span>"
71
99
  ),
72
100
  "image": "fig/homepage/label_filter.png",
73
101
  "link": "label_filter",
74
102
  },
103
+ {
104
+ "title": "Int Filter Performance",
105
+ "description": (
106
+ "<span style='font-size: 17px;'>"
107
+ "To view the perfomance of datasets under different int filter ratios "
108
+ "</span>"
109
+ ),
110
+ "image": "fig/homepage/label_filter.png",
111
+ "link": "int_filter",
112
+ },
75
113
  {
76
114
  "title": "Streaming Performance",
77
115
  "description": (
@@ -110,7 +148,7 @@ def welcomePrams(st):
110
148
  for option in options:
111
149
  option["image"] = get_image_as_base64(option["image"])
112
150
 
113
- for i, option in enumerate(options[:6]):
151
+ for option in options[:7]:
114
152
  html_content += f"""
115
153
  <a href="/{option['link']}" target="_self" style="text-decoration: none;">
116
154
  <div class="section-card">
@@ -129,7 +167,7 @@ def welcomePrams(st):
129
167
  <div class="last-row">
130
168
  """
131
169
 
132
- for option in options[6:8]:
170
+ for option in options[7:9]:
133
171
  html_content += f"""
134
172
  <a href="/{option['link']}" target="_self" style="text-decoration: none;">
135
173
  <div class="section-card">
@@ -219,6 +219,17 @@ def generate_label_filter_cases(dataset_with_size_type: DatasetWithSizeType) ->
219
219
  ]
220
220
 
221
221
 
222
+ def generate_int_filter_cases(dataset_with_size_type: DatasetWithSizeType) -> list[CaseConfig]:
223
+ filter_rates = dataset_with_size_type.get_manager().data.scalar_int_rates
224
+ return [
225
+ CaseConfig(
226
+ case_id=CaseType.NewIntFilterPerformanceCase,
227
+ custom_case=dict(dataset_with_size_type=dataset_with_size_type, filter_rate=filter_rate),
228
+ )
229
+ for filter_rate in filter_rates
230
+ ]
231
+
232
+
222
233
  UI_CASE_CLUSTERS: list[UICaseItemCluster] = [
223
234
  UICaseItemCluster(
224
235
  label="Search Performance Test",
@@ -249,6 +260,29 @@ UI_CASE_CLUSTERS: list[UICaseItemCluster] = [
249
260
  UICaseItem(cases=generate_normal_cases(CaseType.Performance1536D500K99P)),
250
261
  ],
251
262
  ),
263
+ UICaseItemCluster(
264
+ label="New-Int-Filter Search Performance Test",
265
+ uiCaseItems=[
266
+ UICaseItem(
267
+ label=f"Int-Filter Search Performance Test - {dataset_with_size_type.value}",
268
+ description=(
269
+ f"[Batch Cases]These cases test the search performance of a vector database "
270
+ f"with dataset {dataset_with_size_type.value}"
271
+ f"under filtering rates of {dataset_with_size_type.get_manager().data.scalar_int_rates}, at varying parallel levels."
272
+ f"Results will show index building time, recall, and maximum QPS."
273
+ ),
274
+ cases=generate_int_filter_cases(dataset_with_size_type),
275
+ )
276
+ for dataset_with_size_type in [
277
+ DatasetWithSizeType.CohereMedium,
278
+ DatasetWithSizeType.CohereLarge,
279
+ DatasetWithSizeType.OpenAIMedium,
280
+ DatasetWithSizeType.OpenAILarge,
281
+ DatasetWithSizeType.BioasqMedium,
282
+ DatasetWithSizeType.BioasqLarge,
283
+ ]
284
+ ],
285
+ ),
252
286
  UICaseItemCluster(
253
287
  label="Label-Filter Search Performance Test",
254
288
  uiCaseItems=[
@@ -389,8 +423,58 @@ CaseConfigParamInput_storage_layout = CaseConfigInput(
389
423
  },
390
424
  )
391
425
 
392
- CaseConfigParamInput_max_neighbors = CaseConfigInput(
426
+ CaseConfigParamInput_reranking_PgDiskANN = CaseConfigInput(
427
+ label=CaseConfigParamType.reranking,
428
+ inputType=InputType.Bool,
429
+ displayLabel="Enable Reranking",
430
+ inputHelp="Enable if you want to use reranking while performing \
431
+ similarity search with PQ",
432
+ inputConfig={
433
+ "value": False,
434
+ },
435
+ )
436
+
437
+ CaseConfigParamInput_quantized_fetch_limit_PgDiskANN = CaseConfigInput(
438
+ label=CaseConfigParamType.quantized_fetch_limit,
439
+ displayLabel="Quantized Fetch Limit",
440
+ inputHelp="Limit top-k vectors using the quantized vector comparison",
441
+ inputType=InputType.Number,
442
+ inputConfig={
443
+ "min": 20,
444
+ "max": 1000,
445
+ "value": 200,
446
+ },
447
+ isDisplayed=lambda config: config.get(CaseConfigParamType.reranking, False),
448
+ )
449
+
450
+ CaseConfigParamInput_pq_param_num_chunks_PgDiskANN = CaseConfigInput(
451
+ label=CaseConfigParamType.pq_param_num_chunks,
452
+ displayLabel="pq_param_num_chunks",
453
+ inputHelp="Number of chunks for product quantization (Defaults to 0). 0 means it is determined automatically, based on embedding dimensions.",
454
+ inputType=InputType.Number,
455
+ inputConfig={
456
+ "min": 0,
457
+ "max": 1028,
458
+ "value": 0,
459
+ },
460
+ isDisplayed=lambda config: config.get(CaseConfigParamType.reranking, False),
461
+ )
462
+
463
+
464
+ CaseConfigParamInput_reranking_metric_PgDiskANN = CaseConfigInput(
465
+ label=CaseConfigParamType.reranking_metric,
466
+ displayLabel="Reranking Metric",
467
+ inputType=InputType.Option,
468
+ inputConfig={
469
+ "options": [metric.value for metric in MetricType if metric.value not in ["HAMMING", "JACCARD", "DP"]],
470
+ },
471
+ isDisplayed=lambda config: config.get(CaseConfigParamType.reranking, False),
472
+ )
473
+
474
+
475
+ CaseConfigParamInput_max_neighbors_PgDiskANN = CaseConfigInput(
393
476
  label=CaseConfigParamType.max_neighbors,
477
+ displayLabel="max_neighbors",
394
478
  inputType=InputType.Number,
395
479
  inputConfig={
396
480
  "min": 10,
@@ -422,6 +506,29 @@ CaseConfigParamInput_l_value_is = CaseConfigInput(
422
506
  isDisplayed=lambda config: config.get(CaseConfigParamType.IndexType, None) == IndexType.DISKANN.value,
423
507
  )
424
508
 
509
+ CaseConfigParamInput_maintenance_work_mem_PgDiskANN = CaseConfigInput(
510
+ label=CaseConfigParamType.maintenance_work_mem,
511
+ inputHelp="Memory to use during index builds. Not to exceed the available free memory."
512
+ "Specify in gigabytes. e.g. 8GB",
513
+ inputType=InputType.Text,
514
+ inputConfig={
515
+ "value": "8GB",
516
+ },
517
+ )
518
+
519
+ CaseConfigParamInput_max_parallel_workers_PgDiskANN = CaseConfigInput(
520
+ label=CaseConfigParamType.max_parallel_workers,
521
+ displayLabel="Max parallel workers",
522
+ inputHelp="Recommended value: (cpu cores - 1). This will set the parameters: max_parallel_maintenance_workers,"
523
+ " max_parallel_workers & table(parallel_workers)",
524
+ inputType=InputType.Number,
525
+ inputConfig={
526
+ "min": 0,
527
+ "max": 1024,
528
+ "value": 16,
529
+ },
530
+ )
531
+
425
532
  CaseConfigParamInput_num_neighbors = CaseConfigInput(
426
533
  label=CaseConfigParamType.num_neighbors,
427
534
  inputType=InputType.Number,
@@ -632,6 +739,7 @@ CaseConfigParamInput_EFConstruction_ES = CaseConfigInput(
632
739
 
633
740
  CaseConfigParamInput_EFConstruction_AWSOpensearch = CaseConfigInput(
634
741
  label=CaseConfigParamType.EFConstruction,
742
+ displayLabel="EF Construction",
635
743
  inputType=InputType.Number,
636
744
  inputConfig={
637
745
  "min": 100,
@@ -642,6 +750,7 @@ CaseConfigParamInput_EFConstruction_AWSOpensearch = CaseConfigInput(
642
750
 
643
751
  CaseConfigParamInput_M_AWSOpensearch = CaseConfigInput(
644
752
  label=CaseConfigParamType.M,
753
+ displayLabel="M",
645
754
  inputType=InputType.Number,
646
755
  inputConfig={
647
756
  "min": 4,
@@ -652,6 +761,7 @@ CaseConfigParamInput_M_AWSOpensearch = CaseConfigInput(
652
761
 
653
762
  CaseConfigParamInput_EF_SEARCH_AWSOpensearch = CaseConfigInput(
654
763
  label=CaseConfigParamType.ef_search,
764
+ displayLabel="EF Search",
655
765
  inputType=InputType.Number,
656
766
  inputConfig={
657
767
  "min": 1,
@@ -1177,7 +1287,7 @@ CaseConfigParamInput_ZillizLevel = CaseConfigInput(
1177
1287
  inputType=InputType.Number,
1178
1288
  inputConfig={
1179
1289
  "min": 1,
1180
- "max": 3,
1290
+ "max": 10,
1181
1291
  "value": 1,
1182
1292
  },
1183
1293
  )
@@ -1587,6 +1697,14 @@ CaseConfigParamInput_METRIC_TYPE_NAME_AWSOpensearch = CaseConfigInput(
1587
1697
  },
1588
1698
  )
1589
1699
 
1700
+ CaseConfigParamInput_REFRESH_INTERVAL_AWSOpensearch = CaseConfigInput(
1701
+ label=CaseConfigParamType.refresh_interval,
1702
+ displayLabel="Refresh Interval",
1703
+ inputHelp="How often to make new data searchable. (e.g., 30s, 1m).",
1704
+ inputType=InputType.Text,
1705
+ inputConfig={"value": "60s", "placeholder": "e.g. 30s, 1m"},
1706
+ )
1707
+
1590
1708
  MilvusLoadConfig = [
1591
1709
  CaseConfigParamInput_IndexType,
1592
1710
  CaseConfigParamInput_M,
@@ -1751,15 +1869,21 @@ PgVectorScalePerformanceConfig = [
1751
1869
 
1752
1870
  PgDiskANNLoadConfig = [
1753
1871
  CaseConfigParamInput_IndexType_PgDiskANN,
1754
- CaseConfigParamInput_max_neighbors,
1872
+ CaseConfigParamInput_max_neighbors_PgDiskANN,
1755
1873
  CaseConfigParamInput_l_value_ib,
1756
1874
  ]
1757
1875
 
1758
1876
  PgDiskANNPerformanceConfig = [
1759
1877
  CaseConfigParamInput_IndexType_PgDiskANN,
1760
- CaseConfigParamInput_max_neighbors,
1878
+ CaseConfigParamInput_reranking_PgDiskANN,
1879
+ CaseConfigParamInput_max_neighbors_PgDiskANN,
1761
1880
  CaseConfigParamInput_l_value_ib,
1762
1881
  CaseConfigParamInput_l_value_is,
1882
+ CaseConfigParamInput_maintenance_work_mem_PgDiskANN,
1883
+ CaseConfigParamInput_max_parallel_workers_PgDiskANN,
1884
+ CaseConfigParamInput_pq_param_num_chunks_PgDiskANN,
1885
+ CaseConfigParamInput_quantized_fetch_limit_PgDiskANN,
1886
+ CaseConfigParamInput_reranking_metric_PgDiskANN,
1763
1887
  ]
1764
1888
 
1765
1889
 
@@ -1951,28 +2075,30 @@ LanceDBLoadConfig = [
1951
2075
  LanceDBPerformanceConfig = LanceDBLoadConfig
1952
2076
 
1953
2077
  AWSOpensearchLoadingConfig = [
1954
- CaseConfigParamInput_EFConstruction_AWSOpensearch,
1955
- CaseConfigParamInput_M_AWSOpensearch,
2078
+ CaseConfigParamInput_REFRESH_INTERVAL_AWSOpensearch,
1956
2079
  CaseConfigParamInput_ENGINE_NAME_AWSOpensearch,
1957
2080
  CaseConfigParamInput_METRIC_TYPE_NAME_AWSOpensearch,
1958
- CaseConfigParamInput_INDEX_THREAD_QTY_DURING_FORCE_MERGE_AWSOpensearch,
1959
- CaseConfigParamInput_NUMBER_OF_INDEXING_CLIENTS_AWSOpensearch,
2081
+ CaseConfigParamInput_M_AWSOpensearch,
2082
+ CaseConfigParamInput_EFConstruction_AWSOpensearch,
1960
2083
  CaseConfigParamInput_NUMBER_OF_SHARDS_AWSOpensearch,
1961
2084
  CaseConfigParamInput_NUMBER_OF_REPLICAS_AWSOpensearch,
2085
+ CaseConfigParamInput_NUMBER_OF_INDEXING_CLIENTS_AWSOpensearch,
1962
2086
  CaseConfigParamInput_INDEX_THREAD_QTY_AWSOpensearch,
2087
+ CaseConfigParamInput_INDEX_THREAD_QTY_DURING_FORCE_MERGE_AWSOpensearch,
1963
2088
  ]
1964
2089
 
1965
2090
  AWSOpenSearchPerformanceConfig = [
1966
- CaseConfigParamInput_EFConstruction_AWSOpensearch,
1967
- CaseConfigParamInput_M_AWSOpensearch,
2091
+ CaseConfigParamInput_REFRESH_INTERVAL_AWSOpensearch,
1968
2092
  CaseConfigParamInput_EF_SEARCH_AWSOpensearch,
1969
2093
  CaseConfigParamInput_ENGINE_NAME_AWSOpensearch,
1970
2094
  CaseConfigParamInput_METRIC_TYPE_NAME_AWSOpensearch,
1971
- CaseConfigParamInput_INDEX_THREAD_QTY_DURING_FORCE_MERGE_AWSOpensearch,
1972
- CaseConfigParamInput_NUMBER_OF_INDEXING_CLIENTS_AWSOpensearch,
2095
+ CaseConfigParamInput_M_AWSOpensearch,
2096
+ CaseConfigParamInput_EFConstruction_AWSOpensearch,
1973
2097
  CaseConfigParamInput_NUMBER_OF_SHARDS_AWSOpensearch,
1974
2098
  CaseConfigParamInput_NUMBER_OF_REPLICAS_AWSOpensearch,
2099
+ CaseConfigParamInput_NUMBER_OF_INDEXING_CLIENTS_AWSOpensearch,
1975
2100
  CaseConfigParamInput_INDEX_THREAD_QTY_AWSOpensearch,
2101
+ CaseConfigParamInput_INDEX_THREAD_QTY_DURING_FORCE_MERGE_AWSOpensearch,
1976
2102
  ]
1977
2103
 
1978
2104
  # Map DB to config
@@ -1997,6 +2123,10 @@ CASE_CONFIG_MAP = {
1997
2123
  CaseLabel.Load: AWSOpensearchLoadingConfig,
1998
2124
  CaseLabel.Performance: AWSOpenSearchPerformanceConfig,
1999
2125
  },
2126
+ DB.OSSOpenSearch: {
2127
+ CaseLabel.Load: AWSOpensearchLoadingConfig,
2128
+ CaseLabel.Performance: AWSOpenSearchPerformanceConfig,
2129
+ },
2000
2130
  DB.PgVector: {
2001
2131
  CaseLabel.Load: PgVectorLoadingConfig,
2002
2132
  CaseLabel.Performance: PgVectorPerformanceConfig,
@@ -2041,10 +2171,6 @@ CASE_CONFIG_MAP = {
2041
2171
  CaseLabel.Load: LanceDBLoadConfig,
2042
2172
  CaseLabel.Performance: LanceDBPerformanceConfig,
2043
2173
  },
2044
- DB.AWSOpenSearch: {
2045
- CaseLabel.Load: AWSOpensearchLoadingConfig,
2046
- CaseLabel.Performance: AWSOpenSearchPerformanceConfig,
2047
- },
2048
2174
  }
2049
2175
 
2050
2176
 
@@ -59,12 +59,15 @@ DB_TO_ICON = {
59
59
  DB.Chroma: "https://assets.zilliz.com/chroma_ceb3f06ed7.png",
60
60
  DB.AliyunOpenSearch: "",
61
61
  DB.AWSOpenSearch: "https://assets.zilliz.com/opensearch_1eee37584e.jpeg",
62
+ DB.OSSOpenSearch: "https://images.seeklogo.com/logo-png/50/1/opensearch-icon-logo-png_seeklogo-500356.png",
62
63
  DB.MongoDB: "",
63
64
  DB.TiDB: "https://img2.pingcap.com/forms/3/d/3d7fd5f9767323d6f037795704211ac44b4923d6.png",
64
65
  DB.Clickhouse: "",
65
66
  DB.Vespa: "https://vespa.ai/vespa-content/uploads/2025/01/Vespa-symbol-green-rgb.png.webp",
66
67
  DB.LanceDB: "",
67
68
  DB.OceanBase: "",
69
+ DB.S3Vectors: "https://assets.zilliz.com/s3_vectors_daf370b4e5.png",
70
+ DB.Hologres: "https://img.alicdn.com/imgextra/i3/O1CN01d9qrry1i6lTNa2BRa_!!6000000004364-2-tps-218-200.png",
68
71
  }
69
72
 
70
73
  # RedisCloud color: #0D6EFD
@@ -79,6 +82,7 @@ COLOR_MAP = {
79
82
  DB.PgVector.value: "#4C779A",
80
83
  DB.Redis.value: "#0D6EFD",
81
84
  DB.AWSOpenSearch.value: "#0DCAF0",
85
+ DB.OSSOpenSearch.value: "#0DCAF0",
82
86
  DB.TiDB.value: "#0D6EFD",
83
87
  DB.Vespa.value: "#61d790",
84
88
  }
@@ -60,7 +60,7 @@ def main():
60
60
  getResults(resultesContainer, "vectordb_bench_concurrent")
61
61
 
62
62
  # main
63
- latency_type = st.radio("Latency Type", options=["latency_p99", "latency_avg"])
63
+ latency_type = st.radio("Latency Type", options=["latency_p99", "latency_p95", "latency_avg"])
64
64
  drawChartsByCase(shownData, showCaseNames, st.container(), latency_type=latency_type)
65
65
 
66
66
  # footer
@@ -78,7 +78,7 @@ def main():
78
78
  )
79
79
 
80
80
  st.button(
81
- "\+ New Dataset",
81
+ "+ New Dataset",
82
82
  key="add_custom_configs",
83
83
  type="primary",
84
84
  on_click=lambda: customCaseManager.addCase(),
@@ -0,0 +1,56 @@
1
+ import streamlit as st
2
+ from vectordb_bench.backend.filter import FilterOp
3
+ from vectordb_bench.frontend.components.check_results.footer import footer
4
+ from vectordb_bench.frontend.components.check_results.headerIcon import drawHeaderIcon
5
+ from vectordb_bench.frontend.components.check_results.nav import (
6
+ NavToQuriesPerDollar,
7
+ NavToRunTest,
8
+ NavToPages,
9
+ )
10
+ from vectordb_bench.frontend.components.int_filter.charts import drawCharts
11
+ from vectordb_bench.frontend.components.check_results.filters import getshownData
12
+ from vectordb_bench.frontend.config.styles import FAVICON
13
+ from vectordb_bench.interface import benchmark_runner
14
+
15
+
16
+ def main():
17
+ # set page config
18
+ st.set_page_config(
19
+ page_title="Int Filter",
20
+ page_icon=FAVICON,
21
+ layout="wide",
22
+ # initial_sidebar_state="collapsed",
23
+ )
24
+
25
+ # header
26
+ drawHeaderIcon(st)
27
+
28
+ # navigate
29
+ NavToPages(st)
30
+
31
+ allResults = benchmark_runner.get_results()
32
+
33
+ st.title("Vector Database Benchmark (Int Filter)")
34
+
35
+ # results selector and filter
36
+ resultSelectorContainer = st.sidebar.container()
37
+ shownData, failedTasks, showCaseNames = getshownData(
38
+ resultSelectorContainer, allResults, filter_type=FilterOp.NumGE
39
+ )
40
+
41
+ resultSelectorContainer.divider()
42
+
43
+ # nav
44
+ navContainer = st.sidebar.container()
45
+ NavToRunTest(navContainer)
46
+ NavToQuriesPerDollar(navContainer)
47
+
48
+ # charts
49
+ drawCharts(st, shownData)
50
+
51
+ # footer
52
+ footer(st.container())
53
+
54
+
55
+ if __name__ == "__main__":
56
+ main()
@@ -71,7 +71,6 @@ def main():
71
71
  getResults(resultesContainer, "vectordb_bench_streaming")
72
72
 
73
73
  # # main
74
- # latency_type = st.radio("Latency Type", options=["latency_p99", "latency_avg"])
75
74
  st.markdown("Tests search performance with a **stable** and **fixed** insertion rate.")
76
75
  control_panel = st.columns(3)
77
76
  compared_with_optimized = control_panel[0].toggle(
@@ -84,6 +83,15 @@ def main():
84
83
  value=False,
85
84
  help="Since vdbbench inserts may be faster than vetordb can process them, the time it actually reaches search_stage may have different delays.",
86
85
  )
86
+
87
+ # Latency type selection
88
+ latency_type = control_panel[2].radio(
89
+ "Latency Type",
90
+ options=["latency_p99", "latency_p95"],
91
+ index=0,
92
+ help="Choose between P99 (slowest 1%) or P95 (slowest 5%) latency metrics.",
93
+ )
94
+
87
95
  accuracy_metric = DisplayedMetric.recall
88
96
  show_ndcg = control_panel[1].toggle(
89
97
  "Show **NDCG** instead of Recall.",
@@ -103,6 +111,11 @@ def main():
103
111
  else:
104
112
  if need_adjust:
105
113
  accuracy_metric = DisplayedMetric.adjusted_recall
114
+
115
+ # Determine which latency metric to display
116
+ latency_metric = DisplayedMetric.latency_p99 if latency_type == "latency_p99" else DisplayedMetric.latency_p95
117
+ latency_desc = "serial lantency (p99)" if latency_type == "latency_p99" else "serial lantency (p95)"
118
+
106
119
  line_chart_displayed_y_metrics: list[tuple[DisplayedMetric, str]] = [
107
120
  (
108
121
  DisplayedMetric.qps,
@@ -110,8 +123,8 @@ def main():
110
123
  ),
111
124
  (accuracy_metric, "calculated in each search_stage."),
112
125
  (
113
- DisplayedMetric.latency_p99,
114
- "serial lantency (p99) of **serial search** tests in each search stage.",
126
+ latency_metric,
127
+ f"{latency_desc} of **serial search** tests in each search stage.",
115
128
  ),
116
129
  ]
117
130
  line_chart_displayed_x_metric = DisplayedMetric.search_stage
@@ -43,7 +43,11 @@ class BenchMarkRunner:
43
43
  self.running_task: TaskRunner | None = None
44
44
  self.latest_error: str | None = None
45
45
  self.drop_old: bool = True
46
- self.dataset_source: DatasetSource = DatasetSource.S3
46
+ # set default data source by ENV
47
+ if config.DATASET_SOURCE.upper() == "ALIYUNOSS":
48
+ self.dataset_source: DatasetSource = DatasetSource.AliyunOSS
49
+ else:
50
+ self.dataset_source: DatasetSource = DatasetSource.S3
47
51
 
48
52
  def set_drop_old(self, drop_old: bool):
49
53
  self.drop_old = drop_old