vectordb-bench 1.0.3__py3-none-any.whl → 1.0.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. vectordb_bench/backend/cases.py +45 -1
  2. vectordb_bench/backend/clients/__init__.py +32 -0
  3. vectordb_bench/backend/clients/aliyun_opensearch/aliyun_opensearch.py +34 -42
  4. vectordb_bench/backend/clients/aliyun_opensearch/config.py +0 -7
  5. vectordb_bench/backend/clients/milvus/cli.py +216 -0
  6. vectordb_bench/backend/clients/oss_opensearch/cli.py +155 -0
  7. vectordb_bench/backend/clients/oss_opensearch/config.py +157 -0
  8. vectordb_bench/backend/clients/oss_opensearch/oss_opensearch.py +582 -0
  9. vectordb_bench/backend/clients/oss_opensearch/run.py +166 -0
  10. vectordb_bench/backend/clients/s3_vectors/config.py +41 -0
  11. vectordb_bench/backend/clients/s3_vectors/s3_vectors.py +171 -0
  12. vectordb_bench/backend/clients/tidb/cli.py +0 -4
  13. vectordb_bench/backend/clients/tidb/config.py +22 -2
  14. vectordb_bench/backend/dataset.py +70 -0
  15. vectordb_bench/backend/filter.py +17 -0
  16. vectordb_bench/backend/runner/mp_runner.py +4 -0
  17. vectordb_bench/backend/runner/read_write_runner.py +10 -9
  18. vectordb_bench/backend/runner/serial_runner.py +23 -7
  19. vectordb_bench/backend/task_runner.py +5 -4
  20. vectordb_bench/cli/vectordbbench.py +2 -0
  21. vectordb_bench/fig/custom_case_run_test.png +0 -0
  22. vectordb_bench/fig/custom_dataset.png +0 -0
  23. vectordb_bench/fig/homepage/bar-chart.png +0 -0
  24. vectordb_bench/fig/homepage/concurrent.png +0 -0
  25. vectordb_bench/fig/homepage/custom.png +0 -0
  26. vectordb_bench/fig/homepage/label_filter.png +0 -0
  27. vectordb_bench/fig/homepage/qp$.png +0 -0
  28. vectordb_bench/fig/homepage/run_test.png +0 -0
  29. vectordb_bench/fig/homepage/streaming.png +0 -0
  30. vectordb_bench/fig/homepage/table.png +0 -0
  31. vectordb_bench/fig/run_test_select_case.png +0 -0
  32. vectordb_bench/fig/run_test_select_db.png +0 -0
  33. vectordb_bench/fig/run_test_submit.png +0 -0
  34. vectordb_bench/frontend/components/check_results/filters.py +1 -4
  35. vectordb_bench/frontend/components/check_results/nav.py +2 -1
  36. vectordb_bench/frontend/components/concurrent/charts.py +5 -0
  37. vectordb_bench/frontend/components/int_filter/charts.py +60 -0
  38. vectordb_bench/frontend/components/streaming/data.py +7 -0
  39. vectordb_bench/frontend/components/welcome/welcomePrams.py +42 -4
  40. vectordb_bench/frontend/config/dbCaseConfigs.py +60 -13
  41. vectordb_bench/frontend/config/styles.py +3 -0
  42. vectordb_bench/frontend/pages/concurrent.py +1 -1
  43. vectordb_bench/frontend/pages/custom.py +1 -1
  44. vectordb_bench/frontend/pages/int_filter.py +56 -0
  45. vectordb_bench/frontend/pages/streaming.py +16 -3
  46. vectordb_bench/metric.py +7 -0
  47. vectordb_bench/models.py +36 -4
  48. vectordb_bench/results/S3Vectors/result_20250722_standard_s3vectors.json +2509 -0
  49. {vectordb_bench-1.0.3.dist-info → vectordb_bench-1.0.5.dist-info}/METADATA +1 -3
  50. {vectordb_bench-1.0.3.dist-info → vectordb_bench-1.0.5.dist-info}/RECORD +54 -32
  51. {vectordb_bench-1.0.3.dist-info → vectordb_bench-1.0.5.dist-info}/WHEEL +0 -0
  52. {vectordb_bench-1.0.3.dist-info → vectordb_bench-1.0.5.dist-info}/entry_points.txt +0 -0
  53. {vectordb_bench-1.0.3.dist-info → vectordb_bench-1.0.5.dist-info}/licenses/LICENSE +0 -0
  54. {vectordb_bench-1.0.3.dist-info → vectordb_bench-1.0.5.dist-info}/top_level.txt +0 -0
@@ -241,7 +241,7 @@ class SerialSearchRunner:
241
241
 
242
242
  return results
243
243
 
244
- def search(self, args: tuple[list, list[list[int]]]) -> tuple[float, float, float]:
244
+ def search(self, args: tuple[list, list[list[int]]]) -> tuple[float, float, float, float]:
245
245
  log.info(f"{mp.current_process().name:14} start search the entire test_data to get recall and latency")
246
246
  with self.db.init():
247
247
  self.db.prepare_filter(self.filters)
@@ -281,6 +281,7 @@ class SerialSearchRunner:
281
281
  avg_ndcg = round(np.mean(ndcgs), 4)
282
282
  cost = round(np.sum(latencies), 4)
283
283
  p99 = round(np.percentile(latencies, 99), 4)
284
+ p95 = round(np.percentile(latencies, 95), 4)
284
285
  log.info(
285
286
  f"{mp.current_process().name:14} search entire test_data: "
286
287
  f"cost={cost}s, "
@@ -288,20 +289,35 @@ class SerialSearchRunner:
288
289
  f"avg_recall={avg_recall}, "
289
290
  f"avg_ndcg={avg_ndcg}, "
290
291
  f"avg_latency={avg_latency}, "
291
- f"p99={p99}"
292
+ f"p99={p99}, "
293
+ f"p95={p95}"
292
294
  )
293
- return (avg_recall, avg_ndcg, p99)
295
+ return (avg_recall, avg_ndcg, p99, p95)
294
296
 
295
- def _run_in_subprocess(self) -> tuple[float, float]:
297
+ def _run_in_subprocess(self) -> tuple[float, float, float, float]:
296
298
  with concurrent.futures.ProcessPoolExecutor(max_workers=1) as executor:
297
299
  future = executor.submit(self.search, (self.test_data, self.ground_truth))
298
300
  return future.result()
299
301
 
300
302
  @utils.time_it
301
- def run(self) -> tuple[float, float, float]:
303
+ def run(self) -> tuple[float, float, float, float]:
304
+ log.info(f"{mp.current_process().name:14} start serial search")
305
+ if self.test_data is None:
306
+ msg = "empty test_data"
307
+ raise RuntimeError(msg)
308
+
309
+ return self._run_in_subprocess()
310
+
311
+ @utils.time_it
312
+ def run_with_cost(self) -> tuple[tuple[float, float, float, float], float]:
302
313
  """
314
+ Search all test data in serial.
303
315
  Returns:
304
- tuple[tuple[float, float, float], float]: (avg_recall, avg_ndcg, p99_latency), cost
305
-
316
+ tuple[tuple[float, float, float, float], float]: (avg_recall, avg_ndcg, p99_latency, p95_latency), cost
306
317
  """
318
+ log.info(f"{mp.current_process().name:14} start serial search")
319
+ if self.test_data is None:
320
+ msg = "empty test_data"
321
+ raise RuntimeError(msg)
322
+
307
323
  return self._run_in_subprocess()
@@ -186,11 +186,12 @@ class CaseRunner(BaseModel):
186
186
  m.conc_num_list,
187
187
  m.conc_qps_list,
188
188
  m.conc_latency_p99_list,
189
+ m.conc_latency_p95_list,
189
190
  m.conc_latency_avg_list,
190
191
  ) = search_results
191
192
  if TaskStage.SEARCH_SERIAL in self.config.stages:
192
193
  search_results = self._serial_search()
193
- m.recall, m.ndcg, m.serial_latency_p99 = search_results
194
+ m.recall, m.ndcg, m.serial_latency_p99, m.serial_latency_p95 = search_results
194
195
 
195
196
  except Exception as e:
196
197
  log.warning(f"Failed to run performance case, reason = {e}")
@@ -230,12 +231,12 @@ class CaseRunner(BaseModel):
230
231
  finally:
231
232
  runner = None
232
233
 
233
- def _serial_search(self) -> tuple[float, float, float]:
234
+ def _serial_search(self) -> tuple[float, float, float, float]:
234
235
  """Performance serial tests, search the entire test data once,
235
- calculate the recall, serial_latency_p99
236
+ calculate the recall, serial_latency_p99, serial_latency_p95
236
237
 
237
238
  Returns:
238
- tuple[float, float, float]: recall, ndcg, serial_latency_p99
239
+ tuple[float, float, float, float]: recall, ndcg, serial_latency_p99, serial_latency_p95
239
240
  """
240
241
  try:
241
242
  results, _ = self.serial_search_runner.run()
@@ -6,6 +6,7 @@ from ..backend.clients.mariadb.cli import MariaDBHNSW
6
6
  from ..backend.clients.memorydb.cli import MemoryDB
7
7
  from ..backend.clients.milvus.cli import MilvusAutoIndex
8
8
  from ..backend.clients.oceanbase.cli import OceanBaseHNSW, OceanBaseIVF
9
+ from ..backend.clients.oss_opensearch.cli import OSSOpenSearch
9
10
  from ..backend.clients.pgdiskann.cli import PgDiskAnn
10
11
  from ..backend.clients.pgvecto_rs.cli import PgVectoRSHNSW, PgVectoRSIVFFlat
11
12
  from ..backend.clients.pgvector.cli import PgVectorHNSW
@@ -31,6 +32,7 @@ cli.add_command(Test)
31
32
  cli.add_command(ZillizAutoIndex)
32
33
  cli.add_command(MilvusAutoIndex)
33
34
  cli.add_command(AWSOpenSearch)
35
+ cli.add_command(OSSOpenSearch)
34
36
  cli.add_command(PgVectorScaleDiskAnn)
35
37
  cli.add_command(PgDiskAnn)
36
38
  cli.add_command(AlloyDBScaNN)
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
@@ -89,7 +89,7 @@ def getShowDbsAndCases(st, result: list[CaseResult], filter_type: FilterOp) -> t
89
89
  col=1,
90
90
  )
91
91
 
92
- if filter_type == FilterOp.StrEqual:
92
+ if filter_type == FilterOp.StrEqual or filter_type == FilterOp.NumGE:
93
93
  container = st.container()
94
94
  datasetWithSizeTypes = [dataset_with_size_type for dataset_with_size_type in DatasetWithSizeType]
95
95
  showDatasetWithSizeTypes = filterView(
@@ -102,9 +102,6 @@ def getShowDbsAndCases(st, result: list[CaseResult], filter_type: FilterOp) -> t
102
102
  datasets = [dataset_with_size_type.get_manager() for dataset_with_size_type in showDatasetWithSizeTypes]
103
103
  showCaseNames = list(set([case.name for case in allCases if case.dataset in datasets]))
104
104
 
105
- if filter_type == FilterOp.NumGE:
106
- raise NotImplementedError
107
-
108
105
  return showDBNames, showCaseNames
109
106
 
110
107
 
@@ -19,7 +19,7 @@ def NavToQuriesPerDollar(st):
19
19
  def NavToResults(st, key="nav-to-results"):
20
20
  navClick = st.button("< &nbsp;&nbsp;Back to Results", key=key)
21
21
  if navClick:
22
- switch_page("vdb benchmark")
22
+ switch_page("results")
23
23
 
24
24
 
25
25
  def NavToPages(st):
@@ -29,6 +29,7 @@ def NavToPages(st):
29
29
  {"name": "Quries Per Dollar", "link": "quries_per_dollar"},
30
30
  {"name": "Concurrent", "link": "concurrent"},
31
31
  {"name": "Label Filter", "link": "label_filter"},
32
+ {"name": "Int Filter", "link": "int_filter"},
32
33
  {"name": "Streaming", "link": "streaming"},
33
34
  {"name": "Tables", "link": "tables"},
34
35
  {"name": "Custom Dataset", "link": "custom"},
@@ -20,6 +20,11 @@ def drawChartsByCase(allData, showCaseNames: list[str], st, latency_type: str):
20
20
  if 0 <= i < len(caseData["conc_latency_p99_list"])
21
21
  else 0
22
22
  ),
23
+ "latency_p95": (
24
+ caseData["conc_latency_p95_list"][i] * 1000
25
+ if "conc_latency_p95_list" in caseData and 0 <= i < len(caseData["conc_latency_p95_list"])
26
+ else 0
27
+ ),
23
28
  "latency_avg": (
24
29
  caseData["conc_latency_avg_list"][i] * 1000
25
30
  if 0 <= i < len(caseData["conc_latency_avg_list"])
@@ -0,0 +1,60 @@
1
+ import plotly.express as px
2
+ from vectordb_bench.metric import metric_unit_map
3
+
4
+
5
+ def drawCharts(st, allData, **kwargs):
6
+ dataset_names = list(set([data["dataset_name"] for data in allData]))
7
+ dataset_names.sort()
8
+ for dataset_name in dataset_names:
9
+ container = st.container()
10
+ container.subheader(dataset_name)
11
+ data = [d for d in allData if d["dataset_name"] == dataset_name]
12
+ drawChartByMetric(container, data, **kwargs)
13
+
14
+
15
+ def drawChartByMetric(st, data, metrics=("qps", "recall"), **kwargs):
16
+ columns = st.columns(len(metrics))
17
+ for i, metric in enumerate(metrics):
18
+ container = columns[i]
19
+ container.markdown(f"#### {metric}")
20
+ drawChart(container, data, metric)
21
+
22
+
23
+ def getRange(metric, data, padding_multipliers):
24
+ minV = min([d.get(metric, 0) for d in data])
25
+ maxV = max([d.get(metric, 0) for d in data])
26
+ padding = maxV - minV
27
+ rangeV = [
28
+ minV - padding * padding_multipliers[0],
29
+ maxV + padding * padding_multipliers[1],
30
+ ]
31
+ return rangeV
32
+
33
+
34
+ def drawChart(st, data: list[object], metric):
35
+ unit = metric_unit_map.get(metric, "")
36
+ x = "filter_rate"
37
+ xrange = getRange(x, data, [0.05, 0.1])
38
+
39
+ y = metric
40
+ yrange = getRange(y, data, [0.2, 0.1])
41
+
42
+ data.sort(key=lambda a: a[x])
43
+
44
+ fig = px.line(
45
+ data,
46
+ x=x,
47
+ y=y,
48
+ color="db_name",
49
+ line_group="db_name",
50
+ text=metric,
51
+ markers=True,
52
+ )
53
+ fig.update_xaxes(range=xrange)
54
+ fig.update_yaxes(range=yrange)
55
+ fig.update_traces(textposition="bottom right", texttemplate="%{y:,.4~r}" + unit)
56
+ fig.update_layout(
57
+ margin=dict(l=0, r=0, t=40, b=0, pad=8),
58
+ legend=dict(orientation="h", yanchor="bottom", y=1, xanchor="right", x=1, title=""),
59
+ )
60
+ st.plotly_chart(fig, use_container_width=True)
@@ -13,6 +13,7 @@ class DisplayedMetric(StrEnum):
13
13
  adjusted_recall = "adjusted_recall"
14
14
  adjusted_ndcg = "adjusted_ndcg"
15
15
  latency_p99 = "latency_p99"
16
+ latency_p95 = "latency_p95"
16
17
  # st_ideal_insert_duration = "st_ideal_insert_duration"
17
18
  # st_search_time_list = "st_search_time_list"
18
19
  insert_duration = "insert_duration"
@@ -31,6 +32,7 @@ class StreamingData:
31
32
  adjusted_recall: float
32
33
  adjusted_ndcg: float
33
34
  latency_p99: float
35
+ latency_p95: float
34
36
  ideal_insert_duration: int
35
37
  insert_duration: float
36
38
  optimize_duration: float
@@ -53,6 +55,11 @@ def get_streaming_data(data) -> list[StreamingData]:
53
55
  adjusted_recall=round(d["st_recall_list"][i] / min(search_stage, 100) * 100, 4),
54
56
  adjusted_ndcg=round(d["st_ndcg_list"][i] / min(search_stage, 100) * 100, 4),
55
57
  latency_p99=round(d["st_serial_latency_p99_list"][i] * 1000, 2),
58
+ latency_p95=(
59
+ round(d["st_serial_latency_p95_list"][i] * 1000, 2)
60
+ if "st_serial_latency_p95_list" in d and i < len(d["st_serial_latency_p95_list"])
61
+ else 0.0
62
+ ),
56
63
  ideal_insert_duration=d["st_ideal_insert_duration"],
57
64
  insert_duration=d["insert_duration"],
58
65
  optimize_duration=d["optimize_duration"],
@@ -2,6 +2,8 @@ import base64
2
2
  from PIL import Image
3
3
  from io import BytesIO
4
4
  import os
5
+ from pathlib import Path
6
+ from importlib import resources
5
7
 
6
8
  from vectordb_bench.frontend.components.welcome.pagestyle import pagestyle
7
9
 
@@ -11,12 +13,38 @@ def get_image_as_base64(image_path):
11
13
  if image_path.startswith("http"):
12
14
  return image_path
13
15
 
16
+ # Try to load from package resources first (for pip installed package)
17
+ if image_path.startswith("fig/homepage/"):
18
+ try:
19
+ # Convert fig/homepage/xxx.png to vectordb_bench.fig.homepage
20
+ package_parts = ["vectordb_bench"] + image_path.split("/")[:-1]
21
+ package_name = ".".join(package_parts)
22
+ file_name = os.path.basename(image_path)
23
+
24
+ # Get the resource content using importlib.resources
25
+ files = resources.files(package_name)
26
+ img_data = (files / file_name).read_bytes()
27
+
28
+ img = Image.open(BytesIO(img_data))
29
+ buffered = BytesIO()
30
+ img.save(buffered, format="PNG")
31
+ return f"data:image/png;base64,{base64.b64encode(buffered.getvalue()).decode()}"
32
+ except Exception:
33
+ # If package resource fails, try the original path
34
+ pass
35
+
36
+ # Fallback to file system path (for development)
14
37
  path = os.path.expanduser(image_path)
38
+ if not os.path.isabs(path):
39
+ # Try relative to the vectordb_bench package directory
40
+ package_dir = Path(__file__).parent.parent.parent
41
+ path = package_dir / path
42
+
15
43
  img = Image.open(path)
16
44
  buffered = BytesIO()
17
45
  img.save(buffered, format="PNG")
18
46
  return f"data:image/png;base64,{base64.b64encode(buffered.getvalue()).decode()}"
19
- except Exception as e:
47
+ except Exception:
20
48
  return None
21
49
 
22
50
 
@@ -66,12 +94,22 @@ def welcomePrams(st):
66
94
  "title": "Label Filter Performance",
67
95
  "description": (
68
96
  "<span style='font-size: 17px;'>"
69
- "To view the perfomance of datasets under different filter ratios "
97
+ "To view the perfomance of datasets under different label filter ratios "
70
98
  "</span>"
71
99
  ),
72
100
  "image": "fig/homepage/label_filter.png",
73
101
  "link": "label_filter",
74
102
  },
103
+ {
104
+ "title": "Int Filter Performance",
105
+ "description": (
106
+ "<span style='font-size: 17px;'>"
107
+ "To view the perfomance of datasets under different int filter ratios "
108
+ "</span>"
109
+ ),
110
+ "image": "fig/homepage/label_filter.png",
111
+ "link": "int_filter",
112
+ },
75
113
  {
76
114
  "title": "Streaming Performance",
77
115
  "description": (
@@ -110,7 +148,7 @@ def welcomePrams(st):
110
148
  for option in options:
111
149
  option["image"] = get_image_as_base64(option["image"])
112
150
 
113
- for i, option in enumerate(options[:6]):
151
+ for option in options[:7]:
114
152
  html_content += f"""
115
153
  <a href="/{option['link']}" target="_self" style="text-decoration: none;">
116
154
  <div class="section-card">
@@ -129,7 +167,7 @@ def welcomePrams(st):
129
167
  <div class="last-row">
130
168
  """
131
169
 
132
- for option in options[6:8]:
170
+ for option in options[7:9]:
133
171
  html_content += f"""
134
172
  <a href="/{option['link']}" target="_self" style="text-decoration: none;">
135
173
  <div class="section-card">
@@ -219,6 +219,17 @@ def generate_label_filter_cases(dataset_with_size_type: DatasetWithSizeType) ->
219
219
  ]
220
220
 
221
221
 
222
+ def generate_int_filter_cases(dataset_with_size_type: DatasetWithSizeType) -> list[CaseConfig]:
223
+ filter_rates = dataset_with_size_type.get_manager().data.scalar_int_rates
224
+ return [
225
+ CaseConfig(
226
+ case_id=CaseType.NewIntFilterPerformanceCase,
227
+ custom_case=dict(dataset_with_size_type=dataset_with_size_type, filter_rate=filter_rate),
228
+ )
229
+ for filter_rate in filter_rates
230
+ ]
231
+
232
+
222
233
  UI_CASE_CLUSTERS: list[UICaseItemCluster] = [
223
234
  UICaseItemCluster(
224
235
  label="Search Performance Test",
@@ -249,6 +260,29 @@ UI_CASE_CLUSTERS: list[UICaseItemCluster] = [
249
260
  UICaseItem(cases=generate_normal_cases(CaseType.Performance1536D500K99P)),
250
261
  ],
251
262
  ),
263
+ UICaseItemCluster(
264
+ label="New-Int-Filter Search Performance Test",
265
+ uiCaseItems=[
266
+ UICaseItem(
267
+ label=f"Int-Filter Search Performance Test - {dataset_with_size_type.value}",
268
+ description=(
269
+ f"[Batch Cases]These cases test the search performance of a vector database "
270
+ f"with dataset {dataset_with_size_type.value}"
271
+ f"under filtering rates of {dataset_with_size_type.get_manager().data.scalar_int_rates}, at varying parallel levels."
272
+ f"Results will show index building time, recall, and maximum QPS."
273
+ ),
274
+ cases=generate_int_filter_cases(dataset_with_size_type),
275
+ )
276
+ for dataset_with_size_type in [
277
+ DatasetWithSizeType.CohereMedium,
278
+ DatasetWithSizeType.CohereLarge,
279
+ DatasetWithSizeType.OpenAIMedium,
280
+ DatasetWithSizeType.OpenAILarge,
281
+ DatasetWithSizeType.BioasqMedium,
282
+ DatasetWithSizeType.BioasqLarge,
283
+ ]
284
+ ],
285
+ ),
252
286
  UICaseItemCluster(
253
287
  label="Label-Filter Search Performance Test",
254
288
  uiCaseItems=[
@@ -632,6 +666,7 @@ CaseConfigParamInput_EFConstruction_ES = CaseConfigInput(
632
666
 
633
667
  CaseConfigParamInput_EFConstruction_AWSOpensearch = CaseConfigInput(
634
668
  label=CaseConfigParamType.EFConstruction,
669
+ displayLabel="EF Construction",
635
670
  inputType=InputType.Number,
636
671
  inputConfig={
637
672
  "min": 100,
@@ -642,6 +677,7 @@ CaseConfigParamInput_EFConstruction_AWSOpensearch = CaseConfigInput(
642
677
 
643
678
  CaseConfigParamInput_M_AWSOpensearch = CaseConfigInput(
644
679
  label=CaseConfigParamType.M,
680
+ displayLabel="M",
645
681
  inputType=InputType.Number,
646
682
  inputConfig={
647
683
  "min": 4,
@@ -652,6 +688,7 @@ CaseConfigParamInput_M_AWSOpensearch = CaseConfigInput(
652
688
 
653
689
  CaseConfigParamInput_EF_SEARCH_AWSOpensearch = CaseConfigInput(
654
690
  label=CaseConfigParamType.ef_search,
691
+ displayLabel="EF Search",
655
692
  inputType=InputType.Number,
656
693
  inputConfig={
657
694
  "min": 1,
@@ -1177,7 +1214,7 @@ CaseConfigParamInput_ZillizLevel = CaseConfigInput(
1177
1214
  inputType=InputType.Number,
1178
1215
  inputConfig={
1179
1216
  "min": 1,
1180
- "max": 3,
1217
+ "max": 10,
1181
1218
  "value": 1,
1182
1219
  },
1183
1220
  )
@@ -1587,6 +1624,14 @@ CaseConfigParamInput_METRIC_TYPE_NAME_AWSOpensearch = CaseConfigInput(
1587
1624
  },
1588
1625
  )
1589
1626
 
1627
+ CaseConfigParamInput_REFRESH_INTERVAL_AWSOpensearch = CaseConfigInput(
1628
+ label=CaseConfigParamType.refresh_interval,
1629
+ displayLabel="Refresh Interval",
1630
+ inputHelp="How often to make new data searchable. (e.g., 30s, 1m).",
1631
+ inputType=InputType.Text,
1632
+ inputConfig={"value": "60s", "placeholder": "e.g. 30s, 1m"},
1633
+ )
1634
+
1590
1635
  MilvusLoadConfig = [
1591
1636
  CaseConfigParamInput_IndexType,
1592
1637
  CaseConfigParamInput_M,
@@ -1951,28 +1996,30 @@ LanceDBLoadConfig = [
1951
1996
  LanceDBPerformanceConfig = LanceDBLoadConfig
1952
1997
 
1953
1998
  AWSOpensearchLoadingConfig = [
1954
- CaseConfigParamInput_EFConstruction_AWSOpensearch,
1955
- CaseConfigParamInput_M_AWSOpensearch,
1999
+ CaseConfigParamInput_REFRESH_INTERVAL_AWSOpensearch,
1956
2000
  CaseConfigParamInput_ENGINE_NAME_AWSOpensearch,
1957
2001
  CaseConfigParamInput_METRIC_TYPE_NAME_AWSOpensearch,
1958
- CaseConfigParamInput_INDEX_THREAD_QTY_DURING_FORCE_MERGE_AWSOpensearch,
1959
- CaseConfigParamInput_NUMBER_OF_INDEXING_CLIENTS_AWSOpensearch,
2002
+ CaseConfigParamInput_M_AWSOpensearch,
2003
+ CaseConfigParamInput_EFConstruction_AWSOpensearch,
1960
2004
  CaseConfigParamInput_NUMBER_OF_SHARDS_AWSOpensearch,
1961
2005
  CaseConfigParamInput_NUMBER_OF_REPLICAS_AWSOpensearch,
2006
+ CaseConfigParamInput_NUMBER_OF_INDEXING_CLIENTS_AWSOpensearch,
1962
2007
  CaseConfigParamInput_INDEX_THREAD_QTY_AWSOpensearch,
2008
+ CaseConfigParamInput_INDEX_THREAD_QTY_DURING_FORCE_MERGE_AWSOpensearch,
1963
2009
  ]
1964
2010
 
1965
2011
  AWSOpenSearchPerformanceConfig = [
1966
- CaseConfigParamInput_EFConstruction_AWSOpensearch,
1967
- CaseConfigParamInput_M_AWSOpensearch,
2012
+ CaseConfigParamInput_REFRESH_INTERVAL_AWSOpensearch,
1968
2013
  CaseConfigParamInput_EF_SEARCH_AWSOpensearch,
1969
2014
  CaseConfigParamInput_ENGINE_NAME_AWSOpensearch,
1970
2015
  CaseConfigParamInput_METRIC_TYPE_NAME_AWSOpensearch,
1971
- CaseConfigParamInput_INDEX_THREAD_QTY_DURING_FORCE_MERGE_AWSOpensearch,
1972
- CaseConfigParamInput_NUMBER_OF_INDEXING_CLIENTS_AWSOpensearch,
2016
+ CaseConfigParamInput_M_AWSOpensearch,
2017
+ CaseConfigParamInput_EFConstruction_AWSOpensearch,
1973
2018
  CaseConfigParamInput_NUMBER_OF_SHARDS_AWSOpensearch,
1974
2019
  CaseConfigParamInput_NUMBER_OF_REPLICAS_AWSOpensearch,
2020
+ CaseConfigParamInput_NUMBER_OF_INDEXING_CLIENTS_AWSOpensearch,
1975
2021
  CaseConfigParamInput_INDEX_THREAD_QTY_AWSOpensearch,
2022
+ CaseConfigParamInput_INDEX_THREAD_QTY_DURING_FORCE_MERGE_AWSOpensearch,
1976
2023
  ]
1977
2024
 
1978
2025
  # Map DB to config
@@ -1997,6 +2044,10 @@ CASE_CONFIG_MAP = {
1997
2044
  CaseLabel.Load: AWSOpensearchLoadingConfig,
1998
2045
  CaseLabel.Performance: AWSOpenSearchPerformanceConfig,
1999
2046
  },
2047
+ DB.OSSOpenSearch: {
2048
+ CaseLabel.Load: AWSOpensearchLoadingConfig,
2049
+ CaseLabel.Performance: AWSOpenSearchPerformanceConfig,
2050
+ },
2000
2051
  DB.PgVector: {
2001
2052
  CaseLabel.Load: PgVectorLoadingConfig,
2002
2053
  CaseLabel.Performance: PgVectorPerformanceConfig,
@@ -2041,10 +2092,6 @@ CASE_CONFIG_MAP = {
2041
2092
  CaseLabel.Load: LanceDBLoadConfig,
2042
2093
  CaseLabel.Performance: LanceDBPerformanceConfig,
2043
2094
  },
2044
- DB.AWSOpenSearch: {
2045
- CaseLabel.Load: AWSOpensearchLoadingConfig,
2046
- CaseLabel.Performance: AWSOpenSearchPerformanceConfig,
2047
- },
2048
2095
  }
2049
2096
 
2050
2097
 
@@ -59,12 +59,14 @@ DB_TO_ICON = {
59
59
  DB.Chroma: "https://assets.zilliz.com/chroma_ceb3f06ed7.png",
60
60
  DB.AliyunOpenSearch: "",
61
61
  DB.AWSOpenSearch: "https://assets.zilliz.com/opensearch_1eee37584e.jpeg",
62
+ DB.OSSOpenSearch: "https://images.seeklogo.com/logo-png/50/1/opensearch-icon-logo-png_seeklogo-500356.png",
62
63
  DB.MongoDB: "",
63
64
  DB.TiDB: "https://img2.pingcap.com/forms/3/d/3d7fd5f9767323d6f037795704211ac44b4923d6.png",
64
65
  DB.Clickhouse: "",
65
66
  DB.Vespa: "https://vespa.ai/vespa-content/uploads/2025/01/Vespa-symbol-green-rgb.png.webp",
66
67
  DB.LanceDB: "",
67
68
  DB.OceanBase: "",
69
+ DB.S3Vectors: "https://assets.zilliz.com/s3_vectors_daf370b4e5.png",
68
70
  }
69
71
 
70
72
  # RedisCloud color: #0D6EFD
@@ -79,6 +81,7 @@ COLOR_MAP = {
79
81
  DB.PgVector.value: "#4C779A",
80
82
  DB.Redis.value: "#0D6EFD",
81
83
  DB.AWSOpenSearch.value: "#0DCAF0",
84
+ DB.OSSOpenSearch.value: "#0DCAF0",
82
85
  DB.TiDB.value: "#0D6EFD",
83
86
  DB.Vespa.value: "#61d790",
84
87
  }
@@ -60,7 +60,7 @@ def main():
60
60
  getResults(resultesContainer, "vectordb_bench_concurrent")
61
61
 
62
62
  # main
63
- latency_type = st.radio("Latency Type", options=["latency_p99", "latency_avg"])
63
+ latency_type = st.radio("Latency Type", options=["latency_p99", "latency_p95", "latency_avg"])
64
64
  drawChartsByCase(shownData, showCaseNames, st.container(), latency_type=latency_type)
65
65
 
66
66
  # footer
@@ -78,7 +78,7 @@ def main():
78
78
  )
79
79
 
80
80
  st.button(
81
- "\+ New Dataset",
81
+ "+ New Dataset",
82
82
  key="add_custom_configs",
83
83
  type="primary",
84
84
  on_click=lambda: customCaseManager.addCase(),
@@ -0,0 +1,56 @@
1
+ import streamlit as st
2
+ from vectordb_bench.backend.filter import FilterOp
3
+ from vectordb_bench.frontend.components.check_results.footer import footer
4
+ from vectordb_bench.frontend.components.check_results.headerIcon import drawHeaderIcon
5
+ from vectordb_bench.frontend.components.check_results.nav import (
6
+ NavToQuriesPerDollar,
7
+ NavToRunTest,
8
+ NavToPages,
9
+ )
10
+ from vectordb_bench.frontend.components.int_filter.charts import drawCharts
11
+ from vectordb_bench.frontend.components.check_results.filters import getshownData
12
+ from vectordb_bench.frontend.config.styles import FAVICON
13
+ from vectordb_bench.interface import benchmark_runner
14
+
15
+
16
+ def main():
17
+ # set page config
18
+ st.set_page_config(
19
+ page_title="Int Filter",
20
+ page_icon=FAVICON,
21
+ layout="wide",
22
+ # initial_sidebar_state="collapsed",
23
+ )
24
+
25
+ # header
26
+ drawHeaderIcon(st)
27
+
28
+ # navigate
29
+ NavToPages(st)
30
+
31
+ allResults = benchmark_runner.get_results()
32
+
33
+ st.title("Vector Database Benchmark (Int Filter)")
34
+
35
+ # results selector and filter
36
+ resultSelectorContainer = st.sidebar.container()
37
+ shownData, failedTasks, showCaseNames = getshownData(
38
+ resultSelectorContainer, allResults, filter_type=FilterOp.NumGE
39
+ )
40
+
41
+ resultSelectorContainer.divider()
42
+
43
+ # nav
44
+ navContainer = st.sidebar.container()
45
+ NavToRunTest(navContainer)
46
+ NavToQuriesPerDollar(navContainer)
47
+
48
+ # charts
49
+ drawCharts(st, shownData)
50
+
51
+ # footer
52
+ footer(st.container())
53
+
54
+
55
+ if __name__ == "__main__":
56
+ main()