vectordb-bench 0.0.13__py3-none-any.whl → 0.0.14__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (34) hide show
  1. vectordb_bench/backend/clients/aws_opensearch/aws_opensearch.py +47 -6
  2. vectordb_bench/backend/clients/aws_opensearch/config.py +12 -6
  3. vectordb_bench/backend/clients/aws_opensearch/run.py +34 -3
  4. vectordb_bench/backend/clients/pgvector/cli.py +17 -2
  5. vectordb_bench/backend/clients/pgvector/config.py +20 -5
  6. vectordb_bench/backend/clients/pgvector/pgvector.py +95 -25
  7. vectordb_bench/backend/clients/pgvectorscale/cli.py +108 -0
  8. vectordb_bench/backend/clients/pgvectorscale/pgvectorscale.py +22 -4
  9. vectordb_bench/backend/clients/pinecone/config.py +0 -2
  10. vectordb_bench/backend/clients/pinecone/pinecone.py +34 -36
  11. vectordb_bench/backend/clients/redis/cli.py +8 -0
  12. vectordb_bench/backend/clients/redis/config.py +37 -6
  13. vectordb_bench/backend/runner/mp_runner.py +2 -1
  14. vectordb_bench/cli/cli.py +137 -0
  15. vectordb_bench/cli/vectordbbench.py +2 -1
  16. vectordb_bench/frontend/components/check_results/charts.py +9 -6
  17. vectordb_bench/frontend/components/concurrent/charts.py +3 -6
  18. vectordb_bench/frontend/config/dbCaseConfigs.py +57 -0
  19. vectordb_bench/frontend/pages/quries_per_dollar.py +13 -5
  20. vectordb_bench/frontend/vdb_benchmark.py +11 -3
  21. vectordb_bench/models.py +7 -3
  22. vectordb_bench/results/Milvus/result_20230727_standard_milvus.json +53 -1
  23. vectordb_bench/results/Milvus/result_20230808_standard_milvus.json +48 -0
  24. vectordb_bench/results/ZillizCloud/result_20230727_standard_zillizcloud.json +29 -1
  25. vectordb_bench/results/ZillizCloud/result_20230808_standard_zillizcloud.json +24 -0
  26. vectordb_bench/results/ZillizCloud/result_20240105_standard_202401_zillizcloud.json +98 -49
  27. vectordb_bench/results/getLeaderboardData.py +17 -7
  28. vectordb_bench/results/leaderboard.json +1 -1
  29. {vectordb_bench-0.0.13.dist-info → vectordb_bench-0.0.14.dist-info}/METADATA +60 -35
  30. {vectordb_bench-0.0.13.dist-info → vectordb_bench-0.0.14.dist-info}/RECORD +34 -33
  31. {vectordb_bench-0.0.13.dist-info → vectordb_bench-0.0.14.dist-info}/WHEEL +1 -1
  32. {vectordb_bench-0.0.13.dist-info → vectordb_bench-0.0.14.dist-info}/LICENSE +0 -0
  33. {vectordb_bench-0.0.13.dist-info → vectordb_bench-0.0.14.dist-info}/entry_points.txt +0 -0
  34. {vectordb_bench-0.0.13.dist-info → vectordb_bench-0.0.14.dist-info}/top_level.txt +0 -0
@@ -3,7 +3,7 @@
3
3
  import logging
4
4
  from contextlib import contextmanager
5
5
  from typing import Type
6
-
6
+ import pinecone
7
7
  from ..api import VectorDB, DBConfig, DBCaseConfig, EmptyDBCaseConfig, IndexType
8
8
  from .config import PineconeConfig
9
9
 
@@ -11,7 +11,8 @@ from .config import PineconeConfig
11
11
  log = logging.getLogger(__name__)
12
12
 
13
13
  PINECONE_MAX_NUM_PER_BATCH = 1000
14
- PINECONE_MAX_SIZE_PER_BATCH = 2 * 1024 * 1024 # 2MB
14
+ PINECONE_MAX_SIZE_PER_BATCH = 2 * 1024 * 1024 # 2MB
15
+
15
16
 
16
17
  class Pinecone(VectorDB):
17
18
  def __init__(
@@ -23,30 +24,25 @@ class Pinecone(VectorDB):
23
24
  **kwargs,
24
25
  ):
25
26
  """Initialize wrapper around the milvus vector database."""
26
- self.index_name = db_config["index_name"]
27
- self.api_key = db_config["api_key"]
28
- self.environment = db_config["environment"]
29
- self.batch_size = int(min(PINECONE_MAX_SIZE_PER_BATCH / (dim * 5), PINECONE_MAX_NUM_PER_BATCH))
30
- # Pincone will make connections with server while import
31
- # so place the import here.
32
- import pinecone
33
- pinecone.init(
34
- api_key=self.api_key, environment=self.environment)
27
+ self.index_name = db_config.get("index_name", "")
28
+ self.api_key = db_config.get("api_key", "")
29
+ self.batch_size = int(
30
+ min(PINECONE_MAX_SIZE_PER_BATCH / (dim * 5), PINECONE_MAX_NUM_PER_BATCH)
31
+ )
32
+
33
+ pc = pinecone.Pinecone(api_key=self.api_key)
34
+ index = pc.Index(self.index_name)
35
+
35
36
  if drop_old:
36
- list_indexes = pinecone.list_indexes()
37
- if self.index_name in list_indexes:
38
- index = pinecone.Index(self.index_name)
39
- index_dim = index.describe_index_stats()["dimension"]
40
- if (index_dim != dim):
41
- raise ValueError(
42
- f"Pinecone index {self.index_name} dimension mismatch, expected {index_dim} got {dim}")
43
- log.info(
44
- f"Pinecone client delete old index: {self.index_name}")
45
- index.delete(delete_all=True)
46
- index.close()
47
- else:
37
+ index_stats = index.describe_index_stats()
38
+ index_dim = index_stats["dimension"]
39
+ if index_dim != dim:
48
40
  raise ValueError(
49
- f"Pinecone index {self.index_name} does not exist")
41
+ f"Pinecone index {self.index_name} dimension mismatch, expected {index_dim} got {dim}"
42
+ )
43
+ for namespace in index_stats["namespaces"]:
44
+ log.info(f"Pinecone index delete namespace: {namespace}")
45
+ index.delete(delete_all=True, namespace=namespace)
50
46
 
51
47
  self._metadata_key = "meta"
52
48
 
@@ -59,13 +55,10 @@ class Pinecone(VectorDB):
59
55
  return EmptyDBCaseConfig
60
56
 
61
57
  @contextmanager
62
- def init(self) -> None:
63
- import pinecone
64
- pinecone.init(
65
- api_key=self.api_key, environment=self.environment)
66
- self.index = pinecone.Index(self.index_name)
58
+ def init(self):
59
+ pc = pinecone.Pinecone(api_key=self.api_key)
60
+ self.index = pc.Index(self.index_name)
67
61
  yield
68
- self.index.close()
69
62
 
70
63
  def ready_to_load(self):
71
64
  pass
@@ -83,11 +76,16 @@ class Pinecone(VectorDB):
83
76
  insert_count = 0
84
77
  try:
85
78
  for batch_start_offset in range(0, len(embeddings), self.batch_size):
86
- batch_end_offset = min(batch_start_offset + self.batch_size, len(embeddings))
79
+ batch_end_offset = min(
80
+ batch_start_offset + self.batch_size, len(embeddings)
81
+ )
87
82
  insert_datas = []
88
83
  for i in range(batch_start_offset, batch_end_offset):
89
- insert_data = (str(metadata[i]), embeddings[i], {
90
- self._metadata_key: metadata[i]})
84
+ insert_data = (
85
+ str(metadata[i]),
86
+ embeddings[i],
87
+ {self._metadata_key: metadata[i]},
88
+ )
91
89
  insert_datas.append(insert_data)
92
90
  self.index.upsert(insert_datas)
93
91
  insert_count += batch_end_offset - batch_start_offset
@@ -101,7 +99,7 @@ class Pinecone(VectorDB):
101
99
  k: int = 100,
102
100
  filters: dict | None = None,
103
101
  timeout: int | None = None,
104
- ) -> list[tuple[int, float]]:
102
+ ) -> list[int]:
105
103
  if filters is None:
106
104
  pinecone_filters = {}
107
105
  else:
@@ -111,9 +109,9 @@ class Pinecone(VectorDB):
111
109
  top_k=k,
112
110
  vector=query,
113
111
  filter=pinecone_filters,
114
- )['matches']
112
+ )["matches"]
115
113
  except Exception as e:
116
114
  print(f"Error querying index: {e}")
117
115
  raise e
118
- id_res = [int(one_res['id']) for one_res in res]
116
+ id_res = [int(one_res["id"]) for one_res in res]
119
117
  return id_res
@@ -3,6 +3,9 @@ from typing import Annotated, TypedDict, Unpack
3
3
  import click
4
4
  from pydantic import SecretStr
5
5
 
6
+ from .config import RedisHNSWConfig
7
+
8
+
6
9
  from ....cli.cli import (
7
10
  CommonTypedDict,
8
11
  HNSWFlavor2,
@@ -69,6 +72,11 @@ def Redis(**parameters: Unpack[RedisHNSWTypedDict]):
69
72
  ssl=parameters["ssl"],
70
73
  ssl_ca_certs=parameters["ssl_ca_certs"],
71
74
  cmd=parameters["cmd"],
75
+ ),
76
+ db_case_config=RedisHNSWConfig(
77
+ M=parameters["m"],
78
+ efConstruction=parameters["ef_construction"],
79
+ ef=parameters["ef_runtime"],
72
80
  ),
73
81
  **parameters,
74
82
  )
@@ -1,14 +1,45 @@
1
- from pydantic import SecretStr
2
- from ..api import DBConfig
1
+ from pydantic import SecretStr, BaseModel
2
+ from ..api import DBConfig, DBCaseConfig, MetricType, IndexType
3
3
 
4
4
  class RedisConfig(DBConfig):
5
- password: SecretStr
5
+ password: SecretStr | None = None
6
6
  host: SecretStr
7
- port: int = None
7
+ port: int | None = None
8
8
 
9
9
  def to_dict(self) -> dict:
10
10
  return {
11
11
  "host": self.host.get_secret_value(),
12
12
  "port": self.port,
13
- "password": self.password.get_secret_value(),
14
- }
13
+ "password": self.password.get_secret_value() if self.password is not None else None,
14
+ }
15
+
16
+
17
+
18
+ class RedisIndexConfig(BaseModel):
19
+ """Base config for milvus"""
20
+
21
+ metric_type: MetricType | None = None
22
+
23
+ def parse_metric(self) -> str:
24
+ if not self.metric_type:
25
+ return ""
26
+ return self.metric_type.value
27
+
28
+ class RedisHNSWConfig(RedisIndexConfig, DBCaseConfig):
29
+ M: int
30
+ efConstruction: int
31
+ ef: int | None = None
32
+ index: IndexType = IndexType.HNSW
33
+
34
+ def index_param(self) -> dict:
35
+ return {
36
+ "metric_type": self.parse_metric(),
37
+ "index_type": self.index.value,
38
+ "params": {"M": self.M, "efConstruction": self.efConstruction},
39
+ }
40
+
41
+ def search_param(self) -> dict:
42
+ return {
43
+ "metric_type": self.parse_metric(),
44
+ "params": {"ef": self.ef},
45
+ }
@@ -2,6 +2,7 @@ import time
2
2
  import traceback
3
3
  import concurrent
4
4
  import multiprocessing as mp
5
+ import random
5
6
  import logging
6
7
  from typing import Iterable
7
8
  import numpy as np
@@ -46,7 +47,7 @@ class MultiProcessingSearchRunner:
46
47
  cond.wait()
47
48
 
48
49
  with self.db.init():
49
- num, idx = len(test_data), 0
50
+ num, idx = len(test_data), random.randint(0, len(test_data) - 1)
50
51
 
51
52
  start_time = time.perf_counter()
52
53
  count = 0
vectordb_bench/cli/cli.py CHANGED
@@ -17,6 +17,8 @@ from typing import (
17
17
  Any,
18
18
  )
19
19
  import click
20
+
21
+ from vectordb_bench.backend.clients.api import MetricType
20
22
  from .. import config
21
23
  from ..backend.clients import DB
22
24
  from ..interface import benchMarkRunner, global_result_future
@@ -147,6 +149,37 @@ def parse_task_stages(
147
149
  return stages
148
150
 
149
151
 
152
+ def check_custom_case_parameters(ctx, param, value):
153
+ if ctx.params.get("case_type") == "PerformanceCustomDataset":
154
+ if value is None:
155
+ raise click.BadParameter("Custom case parameters\
156
+ \n--custom-case-name\n--custom-dataset-name\n--custom-dataset-dir\n--custom-dataset-size \
157
+ \n--custom-dataset-dim\n--custom-dataset-file-count\n are required")
158
+ return value
159
+
160
+
161
+ def get_custom_case_config(parameters: dict) -> dict:
162
+ custom_case_config = {}
163
+ if parameters["case_type"] == "PerformanceCustomDataset":
164
+ custom_case_config = {
165
+ "name": parameters["custom_case_name"],
166
+ "description": parameters["custom_case_description"],
167
+ "load_timeout": parameters["custom_case_load_timeout"],
168
+ "optimize_timeout": parameters["custom_case_optimize_timeout"],
169
+ "dataset_config": {
170
+ "name": parameters["custom_dataset_name"],
171
+ "dir": parameters["custom_dataset_dir"],
172
+ "size": parameters["custom_dataset_size"],
173
+ "dim": parameters["custom_dataset_dim"],
174
+ "metric_type": parameters["custom_dataset_metric_type"],
175
+ "file_count": parameters["custom_dataset_file_count"],
176
+ "use_shuffled": parameters["custom_dataset_use_shuffled"],
177
+ "with_gt": parameters["custom_dataset_with_gt"],
178
+ }
179
+ }
180
+ return custom_case_config
181
+
182
+
150
183
  log = logging.getLogger(__name__)
151
184
 
152
185
 
@@ -205,6 +238,7 @@ class CommonTypedDict(TypedDict):
205
238
  click.option(
206
239
  "--case-type",
207
240
  type=click.Choice([ct.name for ct in CaseType if ct.name != "Custom"]),
241
+ is_eager=True,
208
242
  default="Performance1536D50K",
209
243
  help="Case type",
210
244
  ),
@@ -258,6 +292,108 @@ class CommonTypedDict(TypedDict):
258
292
  callback=lambda *args: list(map(int, click_arg_split(*args))),
259
293
  ),
260
294
  ]
295
+ custom_case_name: Annotated[
296
+ str,
297
+ click.option(
298
+ "--custom-case-name",
299
+ help="Custom dataset case name",
300
+ callback=check_custom_case_parameters,
301
+ )
302
+ ]
303
+ custom_case_description: Annotated[
304
+ str,
305
+ click.option(
306
+ "--custom-case-description",
307
+ help="Custom dataset case description",
308
+ default="This is a customized dataset.",
309
+ show_default=True,
310
+ )
311
+ ]
312
+ custom_case_load_timeout: Annotated[
313
+ int,
314
+ click.option(
315
+ "--custom-case-load-timeout",
316
+ help="Custom dataset case load timeout",
317
+ default=36000,
318
+ show_default=True,
319
+ )
320
+ ]
321
+ custom_case_optimize_timeout: Annotated[
322
+ int,
323
+ click.option(
324
+ "--custom-case-optimize-timeout",
325
+ help="Custom dataset case optimize timeout",
326
+ default=36000,
327
+ show_default=True,
328
+ )
329
+ ]
330
+ custom_dataset_name: Annotated[
331
+ str,
332
+ click.option(
333
+ "--custom-dataset-name",
334
+ help="Custom dataset name",
335
+ callback=check_custom_case_parameters,
336
+ ),
337
+ ]
338
+ custom_dataset_dir: Annotated[
339
+ str,
340
+ click.option(
341
+ "--custom-dataset-dir",
342
+ help="Custom dataset directory",
343
+ callback=check_custom_case_parameters,
344
+ ),
345
+ ]
346
+ custom_dataset_size: Annotated[
347
+ int,
348
+ click.option(
349
+ "--custom-dataset-size",
350
+ help="Custom dataset size",
351
+ callback=check_custom_case_parameters,
352
+ ),
353
+ ]
354
+ custom_dataset_dim: Annotated[
355
+ int,
356
+ click.option(
357
+ "--custom-dataset-dim",
358
+ help="Custom dataset dimension",
359
+ callback=check_custom_case_parameters,
360
+ ),
361
+ ]
362
+ custom_dataset_metric_type: Annotated[
363
+ str,
364
+ click.option(
365
+ "--custom-dataset-metric-type",
366
+ help="Custom dataset metric type",
367
+ default=MetricType.COSINE.name,
368
+ show_default=True,
369
+ ),
370
+ ]
371
+ custom_dataset_file_count: Annotated[
372
+ int,
373
+ click.option(
374
+ "--custom-dataset-file-count",
375
+ help="Custom dataset file count",
376
+ callback=check_custom_case_parameters,
377
+ ),
378
+ ]
379
+ custom_dataset_use_shuffled: Annotated[
380
+ bool,
381
+ click.option(
382
+ "--custom-dataset-use-shuffled/--skip-custom-dataset-use-shuffled",
383
+ help="Custom dataset use shuffled",
384
+ default=False,
385
+ show_default=True,
386
+ ),
387
+ ]
388
+ custom_dataset_with_gt: Annotated[
389
+ bool,
390
+ click.option(
391
+ "--custom-dataset-with-gt/--skip-custom-dataset-with-gt",
392
+ help="Custom dataset with ground truth",
393
+ default=True,
394
+ show_default=True,
395
+ ),
396
+ ]
261
397
 
262
398
 
263
399
  class HNSWBaseTypedDict(TypedDict):
@@ -343,6 +479,7 @@ def run(
343
479
  concurrency_duration=parameters["concurrency_duration"],
344
480
  num_concurrency=[int(s) for s in parameters["num_concurrency"]],
345
481
  ),
482
+ custom_case=parameters.get("custom_case", {}),
346
483
  ),
347
484
  stages=parse_task_stages(
348
485
  (
@@ -1,5 +1,6 @@
1
1
  from ..backend.clients.pgvector.cli import PgVectorHNSW
2
2
  from ..backend.clients.pgvecto_rs.cli import PgVectoRSHNSW, PgVectoRSIVFFlat
3
+ from ..backend.clients.pgvectorscale.cli import PgVectorScaleDiskAnn
3
4
  from ..backend.clients.redis.cli import Redis
4
5
  from ..backend.clients.memorydb.cli import MemoryDB
5
6
  from ..backend.clients.test.cli import Test
@@ -8,7 +9,6 @@ from ..backend.clients.zilliz_cloud.cli import ZillizAutoIndex
8
9
  from ..backend.clients.milvus.cli import MilvusAutoIndex
9
10
  from ..backend.clients.aws_opensearch.cli import AWSOpenSearch
10
11
 
11
-
12
12
  from .cli import cli
13
13
 
14
14
  cli.add_command(PgVectorHNSW)
@@ -21,6 +21,7 @@ cli.add_command(Test)
21
21
  cli.add_command(ZillizAutoIndex)
22
22
  cli.add_command(MilvusAutoIndex)
23
23
  cli.add_command(AWSOpenSearch)
24
+ cli.add_command(PgVectorScaleDiskAnn)
24
25
 
25
26
 
26
27
  if __name__ == "__main__":
@@ -1,5 +1,7 @@
1
1
  from vectordb_bench.backend.cases import Case
2
- from vectordb_bench.frontend.components.check_results.expanderStyle import initMainExpanderStyle
2
+ from vectordb_bench.frontend.components.check_results.expanderStyle import (
3
+ initMainExpanderStyle,
4
+ )
3
5
  from vectordb_bench.metric import metricOrder, isLowerIsBetterMetric, metricUnitMap
4
6
  from vectordb_bench.frontend.config.styles import *
5
7
  from vectordb_bench.models import ResultLabel
@@ -11,7 +13,7 @@ def drawCharts(st, allData, failedTasks, caseNames: list[str]):
11
13
  for caseName in caseNames:
12
14
  chartContainer = st.expander(caseName, True)
13
15
  data = [data for data in allData if data["case_name"] == caseName]
14
- drawChart(data, chartContainer)
16
+ drawChart(data, chartContainer, key_prefix=caseName)
15
17
 
16
18
  errorDBs = failedTasks[caseName]
17
19
  showFailedDBs(chartContainer, errorDBs)
@@ -35,7 +37,7 @@ def showFailedText(st, text, dbs):
35
37
  )
36
38
 
37
39
 
38
- def drawChart(data, st):
40
+ def drawChart(data, st, key_prefix: str):
39
41
  metricsSet = set()
40
42
  for d in data:
41
43
  metricsSet = metricsSet.union(d["metricsSet"])
@@ -43,7 +45,8 @@ def drawChart(data, st):
43
45
 
44
46
  for i, metric in enumerate(showMetrics):
45
47
  container = st.container()
46
- drawMetricChart(data, metric, container)
48
+ key = f"{key_prefix}-{metric}"
49
+ drawMetricChart(data, metric, container, key=key)
47
50
 
48
51
 
49
52
  def getLabelToShapeMap(data):
@@ -75,7 +78,7 @@ def getLabelToShapeMap(data):
75
78
  return labelToShapeMap
76
79
 
77
80
 
78
- def drawMetricChart(data, metric, st):
81
+ def drawMetricChart(data, metric, st, key: str):
79
82
  dataWithMetric = [d for d in data if d.get(metric, 0) > 1e-7]
80
83
  # dataWithMetric = data
81
84
  if len(dataWithMetric) == 0:
@@ -161,4 +164,4 @@ def drawMetricChart(data, metric, st):
161
164
  ),
162
165
  )
163
166
 
164
- chart.plotly_chart(fig, use_container_width=True)
167
+ chart.plotly_chart(fig, use_container_width=True, key=key)
@@ -22,7 +22,7 @@ def drawChartsByCase(allData, showCaseNames: list[str], st):
22
22
  for caseData in caseDataList
23
23
  for i in range(len(caseData["conc_num_list"]))
24
24
  ]
25
- drawChart(data, chartContainer)
25
+ drawChart(data, chartContainer, key=f"{caseName}-qps-p99")
26
26
 
27
27
 
28
28
  def getRange(metric, data, padding_multipliers):
@@ -36,7 +36,7 @@ def getRange(metric, data, padding_multipliers):
36
36
  return rangeV
37
37
 
38
38
 
39
- def drawChart(data, st):
39
+ def drawChart(data, st, key: str):
40
40
  if len(data) == 0:
41
41
  return
42
42
 
@@ -73,7 +73,4 @@ def drawChart(data, st):
73
73
  fig.update_yaxes(range=yrange, title_text="QPS")
74
74
  fig.update_traces(textposition="bottom right", texttemplate="conc-%{text:,.4~r}")
75
75
 
76
- st.plotly_chart(
77
- fig,
78
- use_container_width=True,
79
- )
76
+ st.plotly_chart(fig, use_container_width=True, key=key)
@@ -360,6 +360,37 @@ CaseConfigParamInput_EFConstruction_ES = CaseConfigInput(
360
360
  },
361
361
  )
362
362
 
363
+ CaseConfigParamInput_EFConstruction_AWSOpensearch = CaseConfigInput(
364
+ label=CaseConfigParamType.EFConstruction,
365
+ inputType=InputType.Number,
366
+ inputConfig={
367
+ "min": 100,
368
+ "max": 1024,
369
+ "value": 256,
370
+ },
371
+ )
372
+
373
+ CaseConfigParamInput_M_AWSOpensearch = CaseConfigInput(
374
+ label=CaseConfigParamType.M,
375
+ inputType=InputType.Number,
376
+ inputConfig={
377
+ "min": 4,
378
+ "max": 64,
379
+ "value": 16,
380
+ },
381
+ )
382
+
383
+ CaseConfigParamInput_EF_SEARCH_AWSOpensearch = CaseConfigInput(
384
+ label=CaseConfigParamType.ef_search,
385
+ inputType=InputType.Number,
386
+ inputConfig={
387
+ "min": 100,
388
+ "max": 1024,
389
+ "value": 256,
390
+ },
391
+ )
392
+
393
+
363
394
  CaseConfigParamInput_maintenance_work_mem_PgVector = CaseConfigInput(
364
395
  label=CaseConfigParamType.maintenance_work_mem,
365
396
  inputHelp="Recommended value: 1.33x the index size, not to exceed the available free memory."
@@ -738,6 +769,19 @@ CaseConfigParamInput_QuantizationType_PgVectoRS = CaseConfigInput(
738
769
  ],
739
770
  )
740
771
 
772
+ CaseConfigParamInput_QuantizationType_PgVector = CaseConfigInput(
773
+ label=CaseConfigParamType.quantizationType,
774
+ inputType=InputType.Option,
775
+ inputConfig={
776
+ "options": ["none", "halfvec"],
777
+ },
778
+ isDisplayed=lambda config: config.get(CaseConfigParamType.IndexType, None)
779
+ in [
780
+ IndexType.HNSW.value,
781
+ IndexType.IVFFlat.value,
782
+ ],
783
+ )
784
+
741
785
  CaseConfigParamInput_QuantizationRatio_PgVectoRS = CaseConfigInput(
742
786
  label=CaseConfigParamType.quantizationRatio,
743
787
  inputType=InputType.Option,
@@ -826,11 +870,19 @@ ESPerformanceConfig = [
826
870
  CaseConfigParamInput_NumCandidates_ES,
827
871
  ]
828
872
 
873
+ AWSOpensearchLoadingConfig = [CaseConfigParamInput_EFConstruction_AWSOpensearch, CaseConfigParamInput_M_AWSOpensearch]
874
+ AWSOpenSearchPerformanceConfig = [
875
+ CaseConfigParamInput_EFConstruction_AWSOpensearch,
876
+ CaseConfigParamInput_M_AWSOpensearch,
877
+ CaseConfigParamInput_EF_SEARCH_AWSOpensearch,
878
+ ]
879
+
829
880
  PgVectorLoadingConfig = [
830
881
  CaseConfigParamInput_IndexType_PgVector,
831
882
  CaseConfigParamInput_Lists_PgVector,
832
883
  CaseConfigParamInput_m,
833
884
  CaseConfigParamInput_EFConstruction_PgVector,
885
+ CaseConfigParamInput_QuantizationType_PgVector,
834
886
  CaseConfigParamInput_maintenance_work_mem_PgVector,
835
887
  CaseConfigParamInput_max_parallel_workers_PgVector,
836
888
  ]
@@ -841,6 +893,7 @@ PgVectorPerformanceConfig = [
841
893
  CaseConfigParamInput_EFSearch_PgVector,
842
894
  CaseConfigParamInput_Lists_PgVector,
843
895
  CaseConfigParamInput_Probes_PgVector,
896
+ CaseConfigParamInput_QuantizationType_PgVector,
844
897
  CaseConfigParamInput_maintenance_work_mem_PgVector,
845
898
  CaseConfigParamInput_max_parallel_workers_PgVector,
846
899
  ]
@@ -905,6 +958,10 @@ CASE_CONFIG_MAP = {
905
958
  CaseLabel.Load: ESLoadingConfig,
906
959
  CaseLabel.Performance: ESPerformanceConfig,
907
960
  },
961
+ DB.AWSOpenSearch: {
962
+ CaseLabel.Load: AWSOpensearchLoadingConfig,
963
+ CaseLabel.Performance: AWSOpenSearchPerformanceConfig,
964
+ },
908
965
  DB.PgVector: {
909
966
  CaseLabel.Load: PgVectorLoadingConfig,
910
967
  CaseLabel.Performance: PgVectorPerformanceConfig,
@@ -1,10 +1,17 @@
1
1
  import streamlit as st
2
2
  from vectordb_bench.frontend.components.check_results.footer import footer
3
- from vectordb_bench.frontend.components.check_results.expanderStyle import initMainExpanderStyle
3
+ from vectordb_bench.frontend.components.check_results.expanderStyle import (
4
+ initMainExpanderStyle,
5
+ )
4
6
  from vectordb_bench.frontend.components.check_results.priceTable import priceTable
5
- from vectordb_bench.frontend.components.check_results.stPageConfig import initResultsPageConfig
7
+ from vectordb_bench.frontend.components.check_results.stPageConfig import (
8
+ initResultsPageConfig,
9
+ )
6
10
  from vectordb_bench.frontend.components.check_results.headerIcon import drawHeaderIcon
7
- from vectordb_bench.frontend.components.check_results.nav import NavToResults, NavToRunTest
11
+ from vectordb_bench.frontend.components.check_results.nav import (
12
+ NavToResults,
13
+ NavToRunTest,
14
+ )
8
15
  from vectordb_bench.frontend.components.check_results.charts import drawMetricChart
9
16
  from vectordb_bench.frontend.components.check_results.filters import getshownData
10
17
  from vectordb_bench.frontend.components.get_results.saveAsImage import getResults
@@ -16,7 +23,7 @@ from vectordb_bench.metric import QURIES_PER_DOLLAR_METRIC
16
23
  def main():
17
24
  # set page config
18
25
  initResultsPageConfig(st)
19
-
26
+
20
27
  # header
21
28
  drawHeaderIcon(st)
22
29
 
@@ -57,7 +64,8 @@ def main():
57
64
  dataWithMetric.append(d)
58
65
  if len(dataWithMetric) > 0:
59
66
  chartContainer = st.expander(caseName, True)
60
- drawMetricChart(data, metric, chartContainer)
67
+ key = f"{caseName}-{metric}"
68
+ drawMetricChart(data, metric, chartContainer, key=key)
61
69
 
62
70
  # footer
63
71
  footer(st.container())
@@ -1,8 +1,13 @@
1
1
  import streamlit as st
2
2
  from vectordb_bench.frontend.components.check_results.footer import footer
3
- from vectordb_bench.frontend.components.check_results.stPageConfig import initResultsPageConfig
3
+ from vectordb_bench.frontend.components.check_results.stPageConfig import (
4
+ initResultsPageConfig,
5
+ )
4
6
  from vectordb_bench.frontend.components.check_results.headerIcon import drawHeaderIcon
5
- from vectordb_bench.frontend.components.check_results.nav import NavToQuriesPerDollar, NavToRunTest
7
+ from vectordb_bench.frontend.components.check_results.nav import (
8
+ NavToQuriesPerDollar,
9
+ NavToRunTest,
10
+ )
6
11
  from vectordb_bench.frontend.components.check_results.charts import drawCharts
7
12
  from vectordb_bench.frontend.components.check_results.filters import getshownData
8
13
  from vectordb_bench.frontend.components.get_results.saveAsImage import getResults
@@ -20,7 +25,10 @@ def main():
20
25
  allResults = benchMarkRunner.get_results()
21
26
 
22
27
  st.title("Vector Database Benchmark")
23
- st.caption("Note that all testing was completed in July 2023, except for the times already noted.")
28
+ st.caption(
29
+ "Except for zillizcloud-v2024.1, which was tested in _January 2024_, all other tests were completed before _August 2023_."
30
+ )
31
+ st.caption("All tested milvus are in _standalone_ mode.")
24
32
 
25
33
  # results selector and filter
26
34
  resultSelectorContainer = st.sidebar.container()
vectordb_bench/models.py CHANGED
@@ -1,6 +1,6 @@
1
1
  import logging
2
2
  import pathlib
3
- from datetime import date
3
+ from datetime import date, datetime
4
4
  from enum import Enum, StrEnum, auto
5
5
  from typing import List, Self
6
6
 
@@ -163,16 +163,20 @@ class TestResult(BaseModel):
163
163
  results: list[CaseResult]
164
164
 
165
165
  file_fmt: str = "result_{}_{}_{}.json" # result_20230718_statndard_milvus.json
166
+ timestamp: float = 0.0
166
167
 
167
168
  def flush(self):
168
169
  db2case = self.get_db_results()
169
-
170
+ timestamp = datetime.combine(date.today(), datetime.min.time()).timestamp()
170
171
  result_root = config.RESULTS_LOCAL_DIR
171
172
  for db, result in db2case.items():
172
173
  self.write_db_file(
173
174
  result_dir=result_root.joinpath(db.value),
174
175
  partial=TestResult(
175
- run_id=self.run_id, task_label=self.task_label, results=result
176
+ run_id=self.run_id,
177
+ task_label=self.task_label,
178
+ results=result,
179
+ timestamp=timestamp,
176
180
  ),
177
181
  db=db.value.lower(),
178
182
  )