vectordb-bench 0.0.22__py3-none-any.whl → 0.0.24__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. vectordb_bench/backend/clients/__init__.py +65 -1
  2. vectordb_bench/backend/clients/api.py +2 -1
  3. vectordb_bench/backend/clients/chroma/chroma.py +2 -2
  4. vectordb_bench/backend/clients/clickhouse/cli.py +66 -0
  5. vectordb_bench/backend/clients/clickhouse/clickhouse.py +156 -0
  6. vectordb_bench/backend/clients/clickhouse/config.py +60 -0
  7. vectordb_bench/backend/clients/elastic_cloud/elastic_cloud.py +1 -1
  8. vectordb_bench/backend/clients/mariadb/cli.py +122 -0
  9. vectordb_bench/backend/clients/mariadb/config.py +73 -0
  10. vectordb_bench/backend/clients/mariadb/mariadb.py +208 -0
  11. vectordb_bench/backend/clients/milvus/cli.py +32 -0
  12. vectordb_bench/backend/clients/milvus/config.py +32 -0
  13. vectordb_bench/backend/clients/milvus/milvus.py +1 -1
  14. vectordb_bench/backend/clients/pgvector/cli.py +14 -3
  15. vectordb_bench/backend/clients/pgvector/config.py +22 -5
  16. vectordb_bench/backend/clients/pgvector/pgvector.py +62 -19
  17. vectordb_bench/backend/clients/pinecone/pinecone.py +1 -1
  18. vectordb_bench/backend/clients/qdrant_cloud/config.py +1 -9
  19. vectordb_bench/backend/clients/qdrant_cloud/qdrant_cloud.py +1 -1
  20. vectordb_bench/backend/clients/tidb/cli.py +98 -0
  21. vectordb_bench/backend/clients/tidb/config.py +46 -0
  22. vectordb_bench/backend/clients/tidb/tidb.py +233 -0
  23. vectordb_bench/backend/clients/vespa/cli.py +47 -0
  24. vectordb_bench/backend/clients/vespa/config.py +51 -0
  25. vectordb_bench/backend/clients/vespa/util.py +15 -0
  26. vectordb_bench/backend/clients/vespa/vespa.py +249 -0
  27. vectordb_bench/backend/clients/weaviate_cloud/weaviate_cloud.py +1 -1
  28. vectordb_bench/cli/cli.py +20 -17
  29. vectordb_bench/cli/vectordbbench.py +8 -0
  30. vectordb_bench/frontend/config/dbCaseConfigs.py +147 -0
  31. vectordb_bench/frontend/config/styles.py +4 -0
  32. vectordb_bench/models.py +8 -6
  33. {vectordb_bench-0.0.22.dist-info → vectordb_bench-0.0.24.dist-info}/METADATA +22 -3
  34. {vectordb_bench-0.0.22.dist-info → vectordb_bench-0.0.24.dist-info}/RECORD +38 -25
  35. {vectordb_bench-0.0.22.dist-info → vectordb_bench-0.0.24.dist-info}/WHEEL +1 -1
  36. {vectordb_bench-0.0.22.dist-info → vectordb_bench-0.0.24.dist-info}/entry_points.txt +0 -0
  37. {vectordb_bench-0.0.22.dist-info → vectordb_bench-0.0.24.dist-info/licenses}/LICENSE +0 -0
  38. {vectordb_bench-0.0.22.dist-info → vectordb_bench-0.0.24.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,249 @@
1
+ import datetime
2
+ import logging
3
+ import math
4
+ from collections.abc import Generator
5
+ from contextlib import contextmanager
6
+
7
+ from vespa import application
8
+
9
+ from ..api import VectorDB
10
+ from . import util
11
+ from .config import VespaHNSWConfig
12
+
13
+ log = logging.getLogger(__name__)
14
+
15
+
16
+ class Vespa(VectorDB):
17
+ def __init__(
18
+ self,
19
+ dim: int,
20
+ db_config: dict[str, str],
21
+ db_case_config: VespaHNSWConfig | None = None,
22
+ collection_name: str = "VectorDBBenchCollection",
23
+ drop_old: bool = False,
24
+ **kwargs,
25
+ ) -> None:
26
+ self.dim = dim
27
+ self.db_config = db_config
28
+ self.case_config = db_case_config or VespaHNSWConfig()
29
+ self.schema_name = collection_name
30
+
31
+ client = self.deploy_http()
32
+ client.wait_for_application_up()
33
+
34
+ if drop_old:
35
+ try:
36
+ client.delete_all_docs("vectordbbench_content", self.schema_name)
37
+ except Exception:
38
+ drop_old = False
39
+ log.exception(f"Vespa client drop_old schema: {self.schema_name}")
40
+
41
+ @contextmanager
42
+ def init(self) -> Generator[None, None, None]:
43
+ """create and destory connections to database.
44
+ Why contextmanager:
45
+
46
+ In multiprocessing search tasks, vectordbbench might init
47
+ totally hundreds of thousands of connections with DB server.
48
+
49
+ Too many connections may drain local FDs or server connection resources.
50
+ If the DB client doesn't have `close()` method, just set the object to None.
51
+
52
+ Examples:
53
+ >>> with self.init():
54
+ >>> self.insert_embeddings()
55
+ """
56
+ self.client = application.Vespa(self.db_config["url"], port=self.db_config["port"])
57
+ yield
58
+ self.client = None
59
+
60
+ def need_normalize_cosine(self) -> bool:
61
+ """Wheather this database need to normalize dataset to support COSINE"""
62
+ return False
63
+
64
+ def insert_embeddings(
65
+ self,
66
+ embeddings: list[list[float]],
67
+ metadata: list[int],
68
+ **kwargs,
69
+ ) -> tuple[int, Exception | None]:
70
+ """Insert the embeddings to the vector database. The default number of embeddings for
71
+ each insert_embeddings is 5000.
72
+
73
+ Args:
74
+ embeddings(list[list[float]]): list of embedding to add to the vector database.
75
+ metadatas(list[int]): metadata associated with the embeddings, for filtering.
76
+ **kwargs(Any): vector database specific parameters.
77
+
78
+ Returns:
79
+ int: inserted data count
80
+ """
81
+ assert self.client is not None
82
+
83
+ data = ({"id": str(i), "fields": {"id": i, "embedding": e}} for i, e in zip(metadata, embeddings, strict=True))
84
+ self.client.feed_iterable(data, self.schema_name)
85
+ return len(embeddings), None
86
+
87
+ def search_embedding(
88
+ self,
89
+ query: list[float],
90
+ k: int = 100,
91
+ filters: dict | None = None,
92
+ ) -> list[int]:
93
+ """Get k most similar embeddings to query vector.
94
+
95
+ Args:
96
+ query(list[float]): query embedding to look up documents similar to.
97
+ k(int): Number of most similar embeddings to return. Defaults to 100.
98
+ filters(dict, optional): filtering expression to filter the data while searching.
99
+
100
+ Returns:
101
+ list[int]: list of k most similar embeddings IDs to the query embedding.
102
+ """
103
+ assert self.client is not None
104
+
105
+ ef = self.case_config.ef
106
+ extra_ef = max(0, ef - k)
107
+ embedding_field = "embedding" if self.case_config.quantization_type == "none" else "embedding_binary"
108
+
109
+ yql = (
110
+ f"select id from {self.schema_name} where " # noqa: S608
111
+ f"{{targetHits: {k}, hnsw.exploreAdditionalHits: {extra_ef}}}"
112
+ f"nearestNeighbor({embedding_field}, query_embedding)"
113
+ )
114
+
115
+ if filters:
116
+ id_filter = filters.get("id")
117
+ yql += f" and id >= {id_filter}"
118
+
119
+ query_embedding = query if self.case_config.quantization_type == "none" else util.binarize_tensor(query)
120
+
121
+ ranking = self.case_config.quantization_type
122
+
123
+ result = self.client.query({"yql": yql, "input.query(query_embedding)": query_embedding, "ranking": ranking})
124
+ return [child["fields"]["id"] for child in result.get_json()["root"]["children"]]
125
+
126
+ def optimize(self, data_size: int | None = None):
127
+ """optimize will be called between insertion and search in performance cases.
128
+
129
+ Should be blocked until the vectorDB is ready to be tested on
130
+ heavy performance cases.
131
+
132
+ Time(insert the dataset) + Time(optimize) will be recorded as "load_duration" metric
133
+ Optimize's execution time is limited, the limited time is based on cases.
134
+ """
135
+
136
+ @property
137
+ def application_package(self):
138
+ if getattr(self, "_application_package", None) is None:
139
+ self._application_package = self._create_application_package()
140
+ return self._application_package
141
+
142
+ def _create_application_package(self):
143
+ from vespa.package import (
144
+ HNSW,
145
+ ApplicationPackage,
146
+ Document,
147
+ Field,
148
+ RankProfile,
149
+ Schema,
150
+ Validation,
151
+ ValidationID,
152
+ )
153
+
154
+ fields = [
155
+ Field(
156
+ "id",
157
+ "int",
158
+ indexing=["summary", "attribute"],
159
+ ),
160
+ Field(
161
+ "embedding",
162
+ f"tensor<float>(x[{self.dim}])",
163
+ indexing=["summary", "attribute", "index"],
164
+ ann=HNSW(**self.case_config.index_param()),
165
+ ),
166
+ ]
167
+
168
+ if self.case_config.quantization_type == "binary":
169
+ fields.append(
170
+ Field(
171
+ "embedding_binary",
172
+ f"tensor<int8>(x[{math.ceil(self.dim / 8)}])",
173
+ indexing=[
174
+ "input embedding",
175
+ # convert 32 bit float to 1 bit
176
+ "binarize",
177
+ # pack 8 bits into one int8
178
+ "pack_bits",
179
+ "summary",
180
+ "attribute",
181
+ "index",
182
+ ],
183
+ ann=HNSW(**{**self.case_config.index_param(), "distance_metric": "hamming"}),
184
+ is_document_field=False,
185
+ )
186
+ )
187
+
188
+ tomorrow = datetime.date.today() + datetime.timedelta(days=1)
189
+
190
+ return ApplicationPackage(
191
+ "vectordbbench",
192
+ [
193
+ Schema(
194
+ self.schema_name,
195
+ Document(
196
+ fields,
197
+ ),
198
+ rank_profiles=[
199
+ RankProfile(
200
+ name="none",
201
+ first_phase="",
202
+ inherits="default",
203
+ inputs=[("query(query_embedding)", f"tensor<float>(x[{self.dim}])")],
204
+ ),
205
+ RankProfile(
206
+ name="binary",
207
+ first_phase="",
208
+ inherits="default",
209
+ inputs=[("query(query_embedding)", f"tensor<int8>(x[{math.ceil(self.dim / 8)}])")],
210
+ ),
211
+ ],
212
+ )
213
+ ],
214
+ validations=[
215
+ Validation(ValidationID.tensorTypeChange, until=tomorrow),
216
+ Validation(ValidationID.fieldTypeChange, until=tomorrow),
217
+ ],
218
+ )
219
+
220
+ def deploy_http(self) -> application.Vespa:
221
+ """
222
+ Deploy a Vespa application package via HTTP REST API.
223
+
224
+ Returns:
225
+ application.Vespa: The deployed Vespa application instance
226
+ """
227
+ import requests
228
+
229
+ url = self.db_config["url"] + ":19071/application/v2/tenant/default/prepareandactivate"
230
+ package_data = self.application_package.to_zip()
231
+ headers = {"Content-Type": "application/zip"}
232
+
233
+ try:
234
+ response = requests.post(url=url, data=package_data, headers=headers, timeout=10)
235
+
236
+ response.raise_for_status()
237
+ result = response.json()
238
+ return application.Vespa(
239
+ url=self.db_config["url"],
240
+ port=self.db_config["port"],
241
+ deployment_message=result.get("message"),
242
+ application_package=self.application_package,
243
+ )
244
+
245
+ except requests.exceptions.RequestException as e:
246
+ error_msg = f"Failed to deploy Vespa application: {e!s}"
247
+ if hasattr(e, "response") and e.response is not None:
248
+ error_msg += f" - Response: {e.response.text}"
249
+ raise RuntimeError(error_msg) from e
@@ -99,7 +99,7 @@ class WeaviateCloud(VectorDB):
99
99
  embeddings: Iterable[list[float]],
100
100
  metadata: list[int],
101
101
  **kwargs,
102
- ) -> (int, Exception):
102
+ ) -> tuple[int, Exception]:
103
103
  """Insert embeddings into Weaviate"""
104
104
  assert self.client.schema.exists(self.collection_name)
105
105
  insert_count = 0
vectordb_bench/cli/cli.py CHANGED
@@ -1,9 +1,9 @@
1
1
  import logging
2
- import os
3
2
  import time
4
3
  from collections.abc import Callable
5
4
  from concurrent.futures import wait
6
5
  from datetime import datetime
6
+ from pathlib import Path
7
7
  from pprint import pformat
8
8
  from typing import (
9
9
  Annotated,
@@ -38,18 +38,16 @@ except ImportError:
38
38
  from yaml import Loader
39
39
 
40
40
 
41
- def click_get_defaults_from_file(ctx, param, value):
41
+ def click_get_defaults_from_file(ctx, param, value): # noqa: ANN001, ARG001
42
42
  if value:
43
- if os.path.exists(value):
44
- input_file = value
45
- else:
46
- input_file = os.path.join(config.CONFIG_LOCAL_DIR, value)
43
+ input_file = value if Path.exists(value) else Path.join(config.CONFIG_LOCAL_DIR, value)
47
44
  try:
48
- with open(input_file) as f:
49
- _config: dict[str, dict[str, Any]] = load(f.read(), Loader=Loader)
45
+ with Path.open(input_file) as f:
46
+ _config: dict[str, dict[str, Any]] = load(f.read(), Loader=Loader) # noqa: S506
50
47
  ctx.default_map = _config.get(ctx.command.name, {})
51
48
  except Exception as e:
52
- raise click.BadParameter(f"Failed to load config file: {e}")
49
+ msg = f"Failed to load config file: {e}"
50
+ raise click.BadParameter(msg) from e
53
51
  return value
54
52
 
55
53
 
@@ -68,12 +66,16 @@ def click_parameter_decorators_from_typed_dict(
68
66
 
69
67
 
70
68
  For clarity, the key names of the TypedDict will be used to determine the type hints for the input parameters.
71
- The actual function parameters are controlled by the click.option definitions. You must manually ensure these are aligned in a sensible way!
69
+ The actual function parameters are controlled by the click.option definitions.
70
+ You must manually ensure these are aligned in a sensible way!
72
71
 
73
72
  Example:
74
73
  ```
75
74
  class CommonTypedDict(TypedDict):
76
- z: Annotated[int, click.option("--z/--no-z", is_flag=True, type=bool, help="help z", default=True, show_default=True)]
75
+ z: Annotated[
76
+ int,
77
+ click.option("--z/--no-z", is_flag=True, type=bool, help="help z", default=True, show_default=True)
78
+ ]
77
79
  name: Annotated[str, click.argument("name", required=False, default="Jeff")]
78
80
 
79
81
  class FooTypedDict(CommonTypedDict):
@@ -91,14 +93,16 @@ def click_parameter_decorators_from_typed_dict(
91
93
  for _, t in get_type_hints(typed_dict, include_extras=True).items():
92
94
  assert get_origin(t) is Annotated
93
95
  if len(t.__metadata__) == 1 and t.__metadata__[0].__module__ == "click.decorators":
94
- # happy path -- only accept Annotated[..., Union[click.option,click.argument,...]] with no additional metadata defined (len=1)
96
+ # happy path -- only accept Annotated[..., Union[click.option,click.argument,...]]
97
+ # with no additional metadata defined (len=1)
95
98
  decorators.append(t.__metadata__[0])
96
99
  else:
97
100
  raise RuntimeError(
98
- "Click-TypedDict decorator parsing must only contain root type and a click decorator like click.option. See docstring",
101
+ "Click-TypedDict decorator parsing must only contain root type "
102
+ "and a click decorator like click.option. See docstring",
99
103
  )
100
104
 
101
- def deco(f):
105
+ def deco(f): # noqa: ANN001
102
106
  for dec in reversed(decorators):
103
107
  f = dec(f)
104
108
  return f
@@ -106,7 +110,7 @@ def click_parameter_decorators_from_typed_dict(
106
110
  return deco
107
111
 
108
112
 
109
- def click_arg_split(ctx: click.Context, param: click.core.Option, value):
113
+ def click_arg_split(ctx: click.Context, param: click.core.Option, value): # noqa: ANN001, ARG001
110
114
  """Will split a comma-separated list input into an actual list.
111
115
 
112
116
  Args:
@@ -145,8 +149,7 @@ def parse_task_stages(
145
149
  return stages
146
150
 
147
151
 
148
- # ruff: noqa
149
- def check_custom_case_parameters(ctx: any, param: any, value: any):
152
+ def check_custom_case_parameters(ctx: any, param: any, value: any): # noqa: ARG001
150
153
  if ctx.params.get("case_type") == "PerformanceCustomDataset" and value is None:
151
154
  raise click.BadParameter(
152
155
  """ Custom case parameters
@@ -1,5 +1,7 @@
1
1
  from ..backend.clients.alloydb.cli import AlloyDBScaNN
2
2
  from ..backend.clients.aws_opensearch.cli import AWSOpenSearch
3
+ from ..backend.clients.clickhouse.cli import Clickhouse
4
+ from ..backend.clients.mariadb.cli import MariaDBHNSW
3
5
  from ..backend.clients.memorydb.cli import MemoryDB
4
6
  from ..backend.clients.milvus.cli import MilvusAutoIndex
5
7
  from ..backend.clients.pgdiskann.cli import PgDiskAnn
@@ -8,6 +10,8 @@ from ..backend.clients.pgvector.cli import PgVectorHNSW
8
10
  from ..backend.clients.pgvectorscale.cli import PgVectorScaleDiskAnn
9
11
  from ..backend.clients.redis.cli import Redis
10
12
  from ..backend.clients.test.cli import Test
13
+ from ..backend.clients.tidb.cli import TiDB
14
+ from ..backend.clients.vespa.cli import Vespa
11
15
  from ..backend.clients.weaviate_cloud.cli import Weaviate
12
16
  from ..backend.clients.zilliz_cloud.cli import ZillizAutoIndex
13
17
  from .cli import cli
@@ -25,6 +29,10 @@ cli.add_command(AWSOpenSearch)
25
29
  cli.add_command(PgVectorScaleDiskAnn)
26
30
  cli.add_command(PgDiskAnn)
27
31
  cli.add_command(AlloyDBScaNN)
32
+ cli.add_command(MariaDBHNSW)
33
+ cli.add_command(TiDB)
34
+ cli.add_command(Clickhouse)
35
+ cli.add_command(Vespa)
28
36
 
29
37
 
30
38
  if __name__ == "__main__":
@@ -173,6 +173,7 @@ CaseConfigParamInput_IndexType = CaseConfigInput(
173
173
  IndexType.GPU_IVF_FLAT.value,
174
174
  IndexType.GPU_IVF_PQ.value,
175
175
  IndexType.GPU_CAGRA.value,
176
+ IndexType.GPU_BRUTE_FORCE.value,
176
177
  ],
177
178
  },
178
179
  )
@@ -562,6 +563,7 @@ CaseConfigParamInput_Nlist = CaseConfigInput(
562
563
  IndexType.IVFSQ8.value,
563
564
  IndexType.GPU_IVF_FLAT.value,
564
565
  IndexType.GPU_IVF_PQ.value,
566
+ IndexType.GPU_BRUTE_FORCE.value,
565
567
  ],
566
568
  )
567
569
 
@@ -579,6 +581,7 @@ CaseConfigParamInput_Nprobe = CaseConfigInput(
579
581
  IndexType.IVFSQ8.value,
580
582
  IndexType.GPU_IVF_FLAT.value,
581
583
  IndexType.GPU_IVF_PQ.value,
584
+ IndexType.GPU_BRUTE_FORCE.value,
582
585
  ],
583
586
  )
584
587
 
@@ -703,6 +706,7 @@ CaseConfigParamInput_cache_dataset_on_device = CaseConfigInput(
703
706
  IndexType.GPU_CAGRA.value,
704
707
  IndexType.GPU_IVF_PQ.value,
705
708
  IndexType.GPU_IVF_FLAT.value,
709
+ IndexType.GPU_BRUTE_FORCE.value,
706
710
  ],
707
711
  )
708
712
 
@@ -720,6 +724,7 @@ CaseConfigParamInput_refine_ratio = CaseConfigInput(
720
724
  IndexType.GPU_CAGRA.value,
721
725
  IndexType.GPU_IVF_PQ.value,
722
726
  IndexType.GPU_IVF_FLAT.value,
727
+ IndexType.GPU_BRUTE_FORCE.value,
723
728
  ],
724
729
  )
725
730
 
@@ -818,6 +823,19 @@ CaseConfigParamInput_QuantizationRatio_PgVectoRS = CaseConfigInput(
818
823
  ],
819
824
  )
820
825
 
826
+ CaseConfigParamInput_TableQuantizationType_PgVector = CaseConfigInput(
827
+ label=CaseConfigParamType.tableQuantizationType,
828
+ inputType=InputType.Option,
829
+ inputConfig={
830
+ "options": ["none", "bit", "halfvec"],
831
+ },
832
+ isDisplayed=lambda config: config.get(CaseConfigParamType.IndexType, None)
833
+ in [
834
+ IndexType.HNSW.value,
835
+ IndexType.IVFFlat.value,
836
+ ],
837
+ )
838
+
821
839
  CaseConfigParamInput_max_parallel_workers_PgVectorRS = CaseConfigInput(
822
840
  label=CaseConfigParamType.max_parallel_workers,
823
841
  displayLabel="Max parallel workers",
@@ -1040,6 +1058,61 @@ CaseConfigParamInput_NumCandidates_AliES = CaseConfigInput(
1040
1058
  },
1041
1059
  )
1042
1060
 
1061
+ CaseConfigParamInput_IndexType_MariaDB = CaseConfigInput(
1062
+ label=CaseConfigParamType.IndexType,
1063
+ inputHelp="Select Index Type",
1064
+ inputType=InputType.Option,
1065
+ inputConfig={
1066
+ "options": [
1067
+ IndexType.HNSW.value,
1068
+ ],
1069
+ },
1070
+ )
1071
+
1072
+ CaseConfigParamInput_StorageEngine_MariaDB = CaseConfigInput(
1073
+ label=CaseConfigParamType.storage_engine,
1074
+ inputHelp="Select Storage Engine",
1075
+ inputType=InputType.Option,
1076
+ inputConfig={
1077
+ "options": ["InnoDB", "MyISAM"],
1078
+ },
1079
+ )
1080
+
1081
+ CaseConfigParamInput_M_MariaDB = CaseConfigInput(
1082
+ label=CaseConfigParamType.M,
1083
+ inputHelp="M parameter in MHNSW vector indexing",
1084
+ inputType=InputType.Number,
1085
+ inputConfig={
1086
+ "min": 3,
1087
+ "max": 200,
1088
+ "value": 6,
1089
+ },
1090
+ isDisplayed=lambda config: config.get(CaseConfigParamType.IndexType, None) == IndexType.HNSW.value,
1091
+ )
1092
+
1093
+ CaseConfigParamInput_EFSearch_MariaDB = CaseConfigInput(
1094
+ label=CaseConfigParamType.ef_search,
1095
+ inputHelp="mhnsw_ef_search",
1096
+ inputType=InputType.Number,
1097
+ inputConfig={
1098
+ "min": 1,
1099
+ "max": 10000,
1100
+ "value": 20,
1101
+ },
1102
+ isDisplayed=lambda config: config.get(CaseConfigParamType.IndexType, None) == IndexType.HNSW.value,
1103
+ )
1104
+
1105
+ CaseConfigParamInput_CacheSize_MariaDB = CaseConfigInput(
1106
+ label=CaseConfigParamType.max_cache_size,
1107
+ inputHelp="mhnsw_max_cache_size",
1108
+ inputType=InputType.Number,
1109
+ inputConfig={
1110
+ "min": 1048576,
1111
+ "max": (1 << 53) - 1,
1112
+ "value": 16 * 1024**3,
1113
+ },
1114
+ isDisplayed=lambda config: config.get(CaseConfigParamType.IndexType, None) == IndexType.HNSW.value,
1115
+ )
1043
1116
 
1044
1117
  CaseConfigParamInput_MongoDBQuantizationType = CaseConfigInput(
1045
1118
  label=CaseConfigParamType.mongodb_quantization_type,
@@ -1061,6 +1134,47 @@ CaseConfigParamInput_MongoDBNumCandidatesRatio = CaseConfigInput(
1061
1134
  )
1062
1135
 
1063
1136
 
1137
+ CaseConfigParamInput_M_Vespa = CaseConfigInput(
1138
+ label=CaseConfigParamType.M,
1139
+ inputType=InputType.Number,
1140
+ inputConfig={
1141
+ "min": 4,
1142
+ "max": 64,
1143
+ "value": 16,
1144
+ },
1145
+ isDisplayed=lambda config: config.get(CaseConfigParamType.IndexType, None) == IndexType.HNSW.value,
1146
+ )
1147
+
1148
+ CaseConfigParamInput_IndexType_Vespa = CaseConfigInput(
1149
+ label=CaseConfigParamType.IndexType,
1150
+ inputType=InputType.Option,
1151
+ inputConfig={
1152
+ "options": [
1153
+ IndexType.HNSW.value,
1154
+ ],
1155
+ },
1156
+ )
1157
+
1158
+ CaseConfigParamInput_QuantizationType_Vespa = CaseConfigInput(
1159
+ label=CaseConfigParamType.quantizationType,
1160
+ inputType=InputType.Option,
1161
+ inputConfig={
1162
+ "options": ["none", "binary"],
1163
+ },
1164
+ )
1165
+
1166
+ CaseConfigParamInput_EFConstruction_Vespa = CaseConfigInput(
1167
+ label=CaseConfigParamType.EFConstruction,
1168
+ inputType=InputType.Number,
1169
+ inputConfig={
1170
+ "min": 8,
1171
+ "max": 512,
1172
+ "value": 200,
1173
+ },
1174
+ isDisplayed=lambda config: config[CaseConfigParamType.IndexType] == IndexType.HNSW.value,
1175
+ )
1176
+
1177
+
1064
1178
  MilvusLoadConfig = [
1065
1179
  CaseConfigParamInput_IndexType,
1066
1180
  CaseConfigParamInput_M,
@@ -1133,6 +1247,7 @@ PgVectorLoadingConfig = [
1133
1247
  CaseConfigParamInput_m,
1134
1248
  CaseConfigParamInput_EFConstruction_PgVector,
1135
1249
  CaseConfigParamInput_QuantizationType_PgVector,
1250
+ CaseConfigParamInput_TableQuantizationType_PgVector,
1136
1251
  CaseConfigParamInput_maintenance_work_mem_PgVector,
1137
1252
  CaseConfigParamInput_max_parallel_workers_PgVector,
1138
1253
  ]
@@ -1144,6 +1259,7 @@ PgVectorPerformanceConfig = [
1144
1259
  CaseConfigParamInput_Lists_PgVector,
1145
1260
  CaseConfigParamInput_Probes_PgVector,
1146
1261
  CaseConfigParamInput_QuantizationType_PgVector,
1262
+ CaseConfigParamInput_TableQuantizationType_PgVector,
1147
1263
  CaseConfigParamInput_maintenance_work_mem_PgVector,
1148
1264
  CaseConfigParamInput_max_parallel_workers_PgVector,
1149
1265
  CaseConfigParamInput_reranking_PgVector,
@@ -1252,6 +1368,29 @@ MongoDBPerformanceConfig = [
1252
1368
  CaseConfigParamInput_MongoDBNumCandidatesRatio,
1253
1369
  ]
1254
1370
 
1371
+ MariaDBLoadingConfig = [
1372
+ CaseConfigParamInput_IndexType_MariaDB,
1373
+ CaseConfigParamInput_StorageEngine_MariaDB,
1374
+ CaseConfigParamInput_M_MariaDB,
1375
+ CaseConfigParamInput_CacheSize_MariaDB,
1376
+ ]
1377
+ MariaDBPerformanceConfig = [
1378
+ CaseConfigParamInput_IndexType_MariaDB,
1379
+ CaseConfigParamInput_StorageEngine_MariaDB,
1380
+ CaseConfigParamInput_M_MariaDB,
1381
+ CaseConfigParamInput_CacheSize_MariaDB,
1382
+ CaseConfigParamInput_EFSearch_MariaDB,
1383
+ ]
1384
+
1385
+ VespaLoadingConfig = [
1386
+ CaseConfigParamInput_IndexType_Vespa,
1387
+ CaseConfigParamInput_QuantizationType_Vespa,
1388
+ CaseConfigParamInput_M_Vespa,
1389
+ CaseConfigParamInput_EF_Milvus,
1390
+ CaseConfigParamInput_EFConstruction_Vespa,
1391
+ ]
1392
+ VespaPerformanceConfig = VespaLoadingConfig
1393
+
1255
1394
  CASE_CONFIG_MAP = {
1256
1395
  DB.Milvus: {
1257
1396
  CaseLabel.Load: MilvusLoadConfig,
@@ -1304,4 +1443,12 @@ CASE_CONFIG_MAP = {
1304
1443
  CaseLabel.Load: MongoDBLoadingConfig,
1305
1444
  CaseLabel.Performance: MongoDBPerformanceConfig,
1306
1445
  },
1446
+ DB.MariaDB: {
1447
+ CaseLabel.Load: MariaDBLoadingConfig,
1448
+ CaseLabel.Performance: MariaDBPerformanceConfig,
1449
+ },
1450
+ DB.Vespa: {
1451
+ CaseLabel.Load: VespaLoadingConfig,
1452
+ CaseLabel.Performance: VespaPerformanceConfig,
1453
+ },
1307
1454
  }
@@ -47,6 +47,8 @@ DB_TO_ICON = {
47
47
  DB.Redis: "https://assets.zilliz.com/Redis_Cloud_74b8bfef39.png",
48
48
  DB.Chroma: "https://assets.zilliz.com/chroma_ceb3f06ed7.png",
49
49
  DB.AWSOpenSearch: "https://assets.zilliz.com/opensearch_1eee37584e.jpeg",
50
+ DB.TiDB: "https://img2.pingcap.com/forms/3/d/3d7fd5f9767323d6f037795704211ac44b4923d6.png",
51
+ DB.Vespa: "https://vespa.ai/vespa-content/uploads/2025/01/Vespa-symbol-green-rgb.png.webp",
50
52
  }
51
53
 
52
54
  # RedisCloud color: #0D6EFD
@@ -61,4 +63,6 @@ COLOR_MAP = {
61
63
  DB.PgVector.value: "#4C779A",
62
64
  DB.Redis.value: "#0D6EFD",
63
65
  DB.AWSOpenSearch.value: "#0DCAF0",
66
+ DB.TiDB.value: "#0D6EFD",
67
+ DB.Vespa.value: "#61d790",
64
68
  }
vectordb_bench/models.py CHANGED
@@ -49,6 +49,7 @@ class CaseConfigParamType(Enum):
49
49
  probes = "probes"
50
50
  quantizationType = "quantization_type"
51
51
  quantizationRatio = "quantization_ratio"
52
+ tableQuantizationType = "table_quantization_type"
52
53
  reranking = "reranking"
53
54
  rerankingMetric = "reranking_metric"
54
55
  quantizedFetchLimit = "quantized_fetch_limit"
@@ -87,6 +88,8 @@ class CaseConfigParamType(Enum):
87
88
  preReorderingNumNeigbors = "pre_reordering_num_neighbors"
88
89
  numSearchThreads = "num_search_threads"
89
90
  maxNumPrefetchDatasets = "max_num_prefetch_datasets"
91
+ storage_engine = "storage_engine"
92
+ max_cache_size = "max_cache_size"
90
93
 
91
94
  # mongodb params
92
95
  mongodb_quantization_type = "quantization"
@@ -260,7 +263,6 @@ class TestResult(BaseModel):
260
263
  )
261
264
  return TestResult.validate(test_result)
262
265
 
263
- # ruff: noqa
264
266
  def display(self, dbs: list[DB] | None = None):
265
267
  filter_list = dbs if dbs and isinstance(dbs, list) else None
266
268
  sorted_results = sorted(
@@ -291,7 +293,7 @@ class TestResult(BaseModel):
291
293
  max_qps = 10 if max_qps < 10 else max_qps
292
294
  max_recall = 13 if max_recall < 13 else max_recall
293
295
 
294
- LENGTH = (
296
+ LENGTH = ( # noqa: N806
295
297
  max_db,
296
298
  max_db_labels,
297
299
  max_case,
@@ -304,13 +306,13 @@ class TestResult(BaseModel):
304
306
  5,
305
307
  )
306
308
 
307
- DATA_FORMAT = (
309
+ DATA_FORMAT = ( # noqa: N806
308
310
  f"%-{max_db}s | %-{max_db_labels}s %-{max_case}s %-{len(self.task_label)}s"
309
311
  f" | %-{max_load_dur}s %-{max_qps}s %-15s %-{max_recall}s %-14s"
310
312
  f" | %-5s"
311
313
  )
312
314
 
313
- TITLE = DATA_FORMAT % (
315
+ TITLE = DATA_FORMAT % ( # noqa: N806
314
316
  "DB",
315
317
  "db_label",
316
318
  "case",
@@ -322,8 +324,8 @@ class TestResult(BaseModel):
322
324
  "max_load_count",
323
325
  "label",
324
326
  )
325
- SPLIT = DATA_FORMAT % tuple(map(lambda x: "-" * x, LENGTH))
326
- SUMMARY_FORMAT = ("Task summary: run_id=%s, task_label=%s") % (
327
+ SPLIT = DATA_FORMAT % tuple(map(lambda x: "-" * x, LENGTH)) # noqa: C417, N806
328
+ SUMMARY_FORMAT = ("Task summary: run_id=%s, task_label=%s") % ( # noqa: N806
327
329
  self.run_id[:5],
328
330
  self.task_label,
329
331
  )