vectordb-bench 0.0.23__py3-none-any.whl → 0.0.25__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. vectordb_bench/backend/clients/__init__.py +33 -1
  2. vectordb_bench/backend/clients/api.py +1 -1
  3. vectordb_bench/backend/clients/chroma/chroma.py +2 -2
  4. vectordb_bench/backend/clients/clickhouse/cli.py +66 -0
  5. vectordb_bench/backend/clients/clickhouse/clickhouse.py +156 -0
  6. vectordb_bench/backend/clients/clickhouse/config.py +60 -0
  7. vectordb_bench/backend/clients/elastic_cloud/elastic_cloud.py +1 -1
  8. vectordb_bench/backend/clients/mariadb/cli.py +60 -45
  9. vectordb_bench/backend/clients/mariadb/config.py +11 -9
  10. vectordb_bench/backend/clients/mariadb/mariadb.py +52 -58
  11. vectordb_bench/backend/clients/milvus/cli.py +1 -19
  12. vectordb_bench/backend/clients/milvus/config.py +0 -1
  13. vectordb_bench/backend/clients/milvus/milvus.py +1 -1
  14. vectordb_bench/backend/clients/pgvector/cli.py +1 -2
  15. vectordb_bench/backend/clients/pinecone/pinecone.py +1 -1
  16. vectordb_bench/backend/clients/qdrant_cloud/config.py +1 -9
  17. vectordb_bench/backend/clients/qdrant_cloud/qdrant_cloud.py +1 -1
  18. vectordb_bench/backend/clients/tidb/config.py +6 -9
  19. vectordb_bench/backend/clients/tidb/tidb.py +17 -18
  20. vectordb_bench/backend/clients/vespa/cli.py +47 -0
  21. vectordb_bench/backend/clients/vespa/config.py +51 -0
  22. vectordb_bench/backend/clients/vespa/util.py +15 -0
  23. vectordb_bench/backend/clients/vespa/vespa.py +249 -0
  24. vectordb_bench/backend/clients/weaviate_cloud/weaviate_cloud.py +1 -1
  25. vectordb_bench/cli/cli.py +21 -17
  26. vectordb_bench/cli/vectordbbench.py +5 -1
  27. vectordb_bench/frontend/config/dbCaseConfigs.py +58 -7
  28. vectordb_bench/frontend/config/styles.py +2 -0
  29. vectordb_bench/models.py +5 -6
  30. {vectordb_bench-0.0.23.dist-info → vectordb_bench-0.0.25.dist-info}/METADATA +11 -3
  31. {vectordb_bench-0.0.23.dist-info → vectordb_bench-0.0.25.dist-info}/RECORD +35 -28
  32. {vectordb_bench-0.0.23.dist-info → vectordb_bench-0.0.25.dist-info}/WHEEL +1 -1
  33. {vectordb_bench-0.0.23.dist-info → vectordb_bench-0.0.25.dist-info}/entry_points.txt +0 -0
  34. {vectordb_bench-0.0.23.dist-info → vectordb_bench-0.0.25.dist-info/licenses}/LICENSE +0 -0
  35. {vectordb_bench-0.0.23.dist-info → vectordb_bench-0.0.25.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,249 @@
1
+ import datetime
2
+ import logging
3
+ import math
4
+ from collections.abc import Generator
5
+ from contextlib import contextmanager
6
+
7
+ from vespa import application
8
+
9
+ from ..api import VectorDB
10
+ from . import util
11
+ from .config import VespaHNSWConfig
12
+
13
+ log = logging.getLogger(__name__)
14
+
15
+
16
+ class Vespa(VectorDB):
17
+ def __init__(
18
+ self,
19
+ dim: int,
20
+ db_config: dict[str, str],
21
+ db_case_config: VespaHNSWConfig | None = None,
22
+ collection_name: str = "VectorDBBenchCollection",
23
+ drop_old: bool = False,
24
+ **kwargs,
25
+ ) -> None:
26
+ self.dim = dim
27
+ self.db_config = db_config
28
+ self.case_config = db_case_config or VespaHNSWConfig()
29
+ self.schema_name = collection_name
30
+
31
+ client = self.deploy_http()
32
+ client.wait_for_application_up()
33
+
34
+ if drop_old:
35
+ try:
36
+ client.delete_all_docs("vectordbbench_content", self.schema_name)
37
+ except Exception:
38
+ drop_old = False
39
+ log.exception(f"Vespa client drop_old schema: {self.schema_name}")
40
+
41
+ @contextmanager
42
+ def init(self) -> Generator[None, None, None]:
43
+ """create and destory connections to database.
44
+ Why contextmanager:
45
+
46
+ In multiprocessing search tasks, vectordbbench might init
47
+ totally hundreds of thousands of connections with DB server.
48
+
49
+ Too many connections may drain local FDs or server connection resources.
50
+ If the DB client doesn't have `close()` method, just set the object to None.
51
+
52
+ Examples:
53
+ >>> with self.init():
54
+ >>> self.insert_embeddings()
55
+ """
56
+ self.client = application.Vespa(self.db_config["url"], port=self.db_config["port"])
57
+ yield
58
+ self.client = None
59
+
60
+ def need_normalize_cosine(self) -> bool:
61
+ """Wheather this database need to normalize dataset to support COSINE"""
62
+ return False
63
+
64
+ def insert_embeddings(
65
+ self,
66
+ embeddings: list[list[float]],
67
+ metadata: list[int],
68
+ **kwargs,
69
+ ) -> tuple[int, Exception | None]:
70
+ """Insert the embeddings to the vector database. The default number of embeddings for
71
+ each insert_embeddings is 5000.
72
+
73
+ Args:
74
+ embeddings(list[list[float]]): list of embedding to add to the vector database.
75
+ metadatas(list[int]): metadata associated with the embeddings, for filtering.
76
+ **kwargs(Any): vector database specific parameters.
77
+
78
+ Returns:
79
+ int: inserted data count
80
+ """
81
+ assert self.client is not None
82
+
83
+ data = ({"id": str(i), "fields": {"id": i, "embedding": e}} for i, e in zip(metadata, embeddings, strict=True))
84
+ self.client.feed_iterable(data, self.schema_name)
85
+ return len(embeddings), None
86
+
87
+ def search_embedding(
88
+ self,
89
+ query: list[float],
90
+ k: int = 100,
91
+ filters: dict | None = None,
92
+ ) -> list[int]:
93
+ """Get k most similar embeddings to query vector.
94
+
95
+ Args:
96
+ query(list[float]): query embedding to look up documents similar to.
97
+ k(int): Number of most similar embeddings to return. Defaults to 100.
98
+ filters(dict, optional): filtering expression to filter the data while searching.
99
+
100
+ Returns:
101
+ list[int]: list of k most similar embeddings IDs to the query embedding.
102
+ """
103
+ assert self.client is not None
104
+
105
+ ef = self.case_config.ef
106
+ extra_ef = max(0, ef - k)
107
+ embedding_field = "embedding" if self.case_config.quantization_type == "none" else "embedding_binary"
108
+
109
+ yql = (
110
+ f"select id from {self.schema_name} where " # noqa: S608
111
+ f"{{targetHits: {k}, hnsw.exploreAdditionalHits: {extra_ef}}}"
112
+ f"nearestNeighbor({embedding_field}, query_embedding)"
113
+ )
114
+
115
+ if filters:
116
+ id_filter = filters.get("id")
117
+ yql += f" and id >= {id_filter}"
118
+
119
+ query_embedding = query if self.case_config.quantization_type == "none" else util.binarize_tensor(query)
120
+
121
+ ranking = self.case_config.quantization_type
122
+
123
+ result = self.client.query({"yql": yql, "input.query(query_embedding)": query_embedding, "ranking": ranking})
124
+ return [child["fields"]["id"] for child in result.get_json()["root"]["children"]]
125
+
126
+ def optimize(self, data_size: int | None = None):
127
+ """optimize will be called between insertion and search in performance cases.
128
+
129
+ Should be blocked until the vectorDB is ready to be tested on
130
+ heavy performance cases.
131
+
132
+ Time(insert the dataset) + Time(optimize) will be recorded as "load_duration" metric
133
+ Optimize's execution time is limited, the limited time is based on cases.
134
+ """
135
+
136
+ @property
137
+ def application_package(self):
138
+ if getattr(self, "_application_package", None) is None:
139
+ self._application_package = self._create_application_package()
140
+ return self._application_package
141
+
142
+ def _create_application_package(self):
143
+ from vespa.package import (
144
+ HNSW,
145
+ ApplicationPackage,
146
+ Document,
147
+ Field,
148
+ RankProfile,
149
+ Schema,
150
+ Validation,
151
+ ValidationID,
152
+ )
153
+
154
+ fields = [
155
+ Field(
156
+ "id",
157
+ "int",
158
+ indexing=["summary", "attribute"],
159
+ ),
160
+ Field(
161
+ "embedding",
162
+ f"tensor<float>(x[{self.dim}])",
163
+ indexing=["summary", "attribute", "index"],
164
+ ann=HNSW(**self.case_config.index_param()),
165
+ ),
166
+ ]
167
+
168
+ if self.case_config.quantization_type == "binary":
169
+ fields.append(
170
+ Field(
171
+ "embedding_binary",
172
+ f"tensor<int8>(x[{math.ceil(self.dim / 8)}])",
173
+ indexing=[
174
+ "input embedding",
175
+ # convert 32 bit float to 1 bit
176
+ "binarize",
177
+ # pack 8 bits into one int8
178
+ "pack_bits",
179
+ "summary",
180
+ "attribute",
181
+ "index",
182
+ ],
183
+ ann=HNSW(**{**self.case_config.index_param(), "distance_metric": "hamming"}),
184
+ is_document_field=False,
185
+ )
186
+ )
187
+
188
+ tomorrow = datetime.date.today() + datetime.timedelta(days=1)
189
+
190
+ return ApplicationPackage(
191
+ "vectordbbench",
192
+ [
193
+ Schema(
194
+ self.schema_name,
195
+ Document(
196
+ fields,
197
+ ),
198
+ rank_profiles=[
199
+ RankProfile(
200
+ name="none",
201
+ first_phase="",
202
+ inherits="default",
203
+ inputs=[("query(query_embedding)", f"tensor<float>(x[{self.dim}])")],
204
+ ),
205
+ RankProfile(
206
+ name="binary",
207
+ first_phase="",
208
+ inherits="default",
209
+ inputs=[("query(query_embedding)", f"tensor<int8>(x[{math.ceil(self.dim / 8)}])")],
210
+ ),
211
+ ],
212
+ )
213
+ ],
214
+ validations=[
215
+ Validation(ValidationID.tensorTypeChange, until=tomorrow),
216
+ Validation(ValidationID.fieldTypeChange, until=tomorrow),
217
+ ],
218
+ )
219
+
220
+ def deploy_http(self) -> application.Vespa:
221
+ """
222
+ Deploy a Vespa application package via HTTP REST API.
223
+
224
+ Returns:
225
+ application.Vespa: The deployed Vespa application instance
226
+ """
227
+ import requests
228
+
229
+ url = self.db_config["url"] + ":19071/application/v2/tenant/default/prepareandactivate"
230
+ package_data = self.application_package.to_zip()
231
+ headers = {"Content-Type": "application/zip"}
232
+
233
+ try:
234
+ response = requests.post(url=url, data=package_data, headers=headers, timeout=10)
235
+
236
+ response.raise_for_status()
237
+ result = response.json()
238
+ return application.Vespa(
239
+ url=self.db_config["url"],
240
+ port=self.db_config["port"],
241
+ deployment_message=result.get("message"),
242
+ application_package=self.application_package,
243
+ )
244
+
245
+ except requests.exceptions.RequestException as e:
246
+ error_msg = f"Failed to deploy Vespa application: {e!s}"
247
+ if hasattr(e, "response") and e.response is not None:
248
+ error_msg += f" - Response: {e.response.text}"
249
+ raise RuntimeError(error_msg) from e
@@ -99,7 +99,7 @@ class WeaviateCloud(VectorDB):
99
99
  embeddings: Iterable[list[float]],
100
100
  metadata: list[int],
101
101
  **kwargs,
102
- ) -> (int, Exception):
102
+ ) -> tuple[int, Exception]:
103
103
  """Insert embeddings into Weaviate"""
104
104
  assert self.client.schema.exists(self.collection_name)
105
105
  insert_count = 0
vectordb_bench/cli/cli.py CHANGED
@@ -1,9 +1,9 @@
1
1
  import logging
2
- import os
3
2
  import time
4
3
  from collections.abc import Callable
5
4
  from concurrent.futures import wait
6
5
  from datetime import datetime
6
+ from pathlib import Path
7
7
  from pprint import pformat
8
8
  from typing import (
9
9
  Annotated,
@@ -38,18 +38,17 @@ except ImportError:
38
38
  from yaml import Loader
39
39
 
40
40
 
41
- def click_get_defaults_from_file(ctx, param, value):
41
+ def click_get_defaults_from_file(ctx, param, value): # noqa: ANN001, ARG001
42
42
  if value:
43
- if os.path.exists(value):
44
- input_file = value
45
- else:
46
- input_file = os.path.join(config.CONFIG_LOCAL_DIR, value)
43
+ path = Path(value)
44
+ input_file = path if path.exists() else Path(config.CONFIG_LOCAL_DIR, path)
47
45
  try:
48
- with open(input_file) as f:
49
- _config: dict[str, dict[str, Any]] = load(f.read(), Loader=Loader)
46
+ with input_file.open() as f:
47
+ _config: dict[str, dict[str, Any]] = load(f.read(), Loader=Loader) # noqa: S506
50
48
  ctx.default_map = _config.get(ctx.command.name, {})
51
49
  except Exception as e:
52
- raise click.BadParameter(f"Failed to load config file: {e}")
50
+ msg = f"Failed to load config file: {e}"
51
+ raise click.BadParameter(msg) from e
53
52
  return value
54
53
 
55
54
 
@@ -68,12 +67,16 @@ def click_parameter_decorators_from_typed_dict(
68
67
 
69
68
 
70
69
  For clarity, the key names of the TypedDict will be used to determine the type hints for the input parameters.
71
- The actual function parameters are controlled by the click.option definitions. You must manually ensure these are aligned in a sensible way!
70
+ The actual function parameters are controlled by the click.option definitions.
71
+ You must manually ensure these are aligned in a sensible way!
72
72
 
73
73
  Example:
74
74
  ```
75
75
  class CommonTypedDict(TypedDict):
76
- z: Annotated[int, click.option("--z/--no-z", is_flag=True, type=bool, help="help z", default=True, show_default=True)]
76
+ z: Annotated[
77
+ int,
78
+ click.option("--z/--no-z", is_flag=True, type=bool, help="help z", default=True, show_default=True)
79
+ ]
77
80
  name: Annotated[str, click.argument("name", required=False, default="Jeff")]
78
81
 
79
82
  class FooTypedDict(CommonTypedDict):
@@ -91,14 +94,16 @@ def click_parameter_decorators_from_typed_dict(
91
94
  for _, t in get_type_hints(typed_dict, include_extras=True).items():
92
95
  assert get_origin(t) is Annotated
93
96
  if len(t.__metadata__) == 1 and t.__metadata__[0].__module__ == "click.decorators":
94
- # happy path -- only accept Annotated[..., Union[click.option,click.argument,...]] with no additional metadata defined (len=1)
97
+ # happy path -- only accept Annotated[..., Union[click.option,click.argument,...]]
98
+ # with no additional metadata defined (len=1)
95
99
  decorators.append(t.__metadata__[0])
96
100
  else:
97
101
  raise RuntimeError(
98
- "Click-TypedDict decorator parsing must only contain root type and a click decorator like click.option. See docstring",
102
+ "Click-TypedDict decorator parsing must only contain root type "
103
+ "and a click decorator like click.option. See docstring",
99
104
  )
100
105
 
101
- def deco(f):
106
+ def deco(f): # noqa: ANN001
102
107
  for dec in reversed(decorators):
103
108
  f = dec(f)
104
109
  return f
@@ -106,7 +111,7 @@ def click_parameter_decorators_from_typed_dict(
106
111
  return deco
107
112
 
108
113
 
109
- def click_arg_split(ctx: click.Context, param: click.core.Option, value):
114
+ def click_arg_split(ctx: click.Context, param: click.core.Option, value): # noqa: ANN001, ARG001
110
115
  """Will split a comma-separated list input into an actual list.
111
116
 
112
117
  Args:
@@ -145,8 +150,7 @@ def parse_task_stages(
145
150
  return stages
146
151
 
147
152
 
148
- # ruff: noqa
149
- def check_custom_case_parameters(ctx: any, param: any, value: any):
153
+ def check_custom_case_parameters(ctx: any, param: any, value: any): # noqa: ARG001
150
154
  if ctx.params.get("case_type") == "PerformanceCustomDataset" and value is None:
151
155
  raise click.BadParameter(
152
156
  """ Custom case parameters
@@ -1,5 +1,6 @@
1
1
  from ..backend.clients.alloydb.cli import AlloyDBScaNN
2
2
  from ..backend.clients.aws_opensearch.cli import AWSOpenSearch
3
+ from ..backend.clients.clickhouse.cli import Clickhouse
3
4
  from ..backend.clients.mariadb.cli import MariaDBHNSW
4
5
  from ..backend.clients.memorydb.cli import MemoryDB
5
6
  from ..backend.clients.milvus.cli import MilvusAutoIndex
@@ -9,9 +10,10 @@ from ..backend.clients.pgvector.cli import PgVectorHNSW
9
10
  from ..backend.clients.pgvectorscale.cli import PgVectorScaleDiskAnn
10
11
  from ..backend.clients.redis.cli import Redis
11
12
  from ..backend.clients.test.cli import Test
13
+ from ..backend.clients.tidb.cli import TiDB
14
+ from ..backend.clients.vespa.cli import Vespa
12
15
  from ..backend.clients.weaviate_cloud.cli import Weaviate
13
16
  from ..backend.clients.zilliz_cloud.cli import ZillizAutoIndex
14
- from ..backend.clients.tidb.cli import TiDB
15
17
  from .cli import cli
16
18
 
17
19
  cli.add_command(PgVectorHNSW)
@@ -29,6 +31,8 @@ cli.add_command(PgDiskAnn)
29
31
  cli.add_command(AlloyDBScaNN)
30
32
  cli.add_command(MariaDBHNSW)
31
33
  cli.add_command(TiDB)
34
+ cli.add_command(Clickhouse)
35
+ cli.add_command(Vespa)
32
36
 
33
37
 
34
38
  if __name__ == "__main__":
@@ -1087,8 +1087,7 @@ CaseConfigParamInput_M_MariaDB = CaseConfigInput(
1087
1087
  "max": 200,
1088
1088
  "value": 6,
1089
1089
  },
1090
- isDisplayed=lambda config: config.get(CaseConfigParamType.IndexType, None)
1091
- == IndexType.HNSW.value,
1090
+ isDisplayed=lambda config: config.get(CaseConfigParamType.IndexType, None) == IndexType.HNSW.value,
1092
1091
  )
1093
1092
 
1094
1093
  CaseConfigParamInput_EFSearch_MariaDB = CaseConfigInput(
@@ -1100,8 +1099,7 @@ CaseConfigParamInput_EFSearch_MariaDB = CaseConfigInput(
1100
1099
  "max": 10000,
1101
1100
  "value": 20,
1102
1101
  },
1103
- isDisplayed=lambda config: config.get(CaseConfigParamType.IndexType, None)
1104
- == IndexType.HNSW.value,
1102
+ isDisplayed=lambda config: config.get(CaseConfigParamType.IndexType, None) == IndexType.HNSW.value,
1105
1103
  )
1106
1104
 
1107
1105
  CaseConfigParamInput_CacheSize_MariaDB = CaseConfigInput(
@@ -1111,10 +1109,9 @@ CaseConfigParamInput_CacheSize_MariaDB = CaseConfigInput(
1111
1109
  inputConfig={
1112
1110
  "min": 1048576,
1113
1111
  "max": (1 << 53) - 1,
1114
- "value": 16 * 1024 ** 3,
1112
+ "value": 16 * 1024**3,
1115
1113
  },
1116
- isDisplayed=lambda config: config.get(CaseConfigParamType.IndexType, None)
1117
- == IndexType.HNSW.value,
1114
+ isDisplayed=lambda config: config.get(CaseConfigParamType.IndexType, None) == IndexType.HNSW.value,
1118
1115
  )
1119
1116
 
1120
1117
  CaseConfigParamInput_MongoDBQuantizationType = CaseConfigInput(
@@ -1137,6 +1134,47 @@ CaseConfigParamInput_MongoDBNumCandidatesRatio = CaseConfigInput(
1137
1134
  )
1138
1135
 
1139
1136
 
1137
+ CaseConfigParamInput_M_Vespa = CaseConfigInput(
1138
+ label=CaseConfigParamType.M,
1139
+ inputType=InputType.Number,
1140
+ inputConfig={
1141
+ "min": 4,
1142
+ "max": 64,
1143
+ "value": 16,
1144
+ },
1145
+ isDisplayed=lambda config: config.get(CaseConfigParamType.IndexType, None) == IndexType.HNSW.value,
1146
+ )
1147
+
1148
+ CaseConfigParamInput_IndexType_Vespa = CaseConfigInput(
1149
+ label=CaseConfigParamType.IndexType,
1150
+ inputType=InputType.Option,
1151
+ inputConfig={
1152
+ "options": [
1153
+ IndexType.HNSW.value,
1154
+ ],
1155
+ },
1156
+ )
1157
+
1158
+ CaseConfigParamInput_QuantizationType_Vespa = CaseConfigInput(
1159
+ label=CaseConfigParamType.quantizationType,
1160
+ inputType=InputType.Option,
1161
+ inputConfig={
1162
+ "options": ["none", "binary"],
1163
+ },
1164
+ )
1165
+
1166
+ CaseConfigParamInput_EFConstruction_Vespa = CaseConfigInput(
1167
+ label=CaseConfigParamType.EFConstruction,
1168
+ inputType=InputType.Number,
1169
+ inputConfig={
1170
+ "min": 8,
1171
+ "max": 512,
1172
+ "value": 200,
1173
+ },
1174
+ isDisplayed=lambda config: config[CaseConfigParamType.IndexType] == IndexType.HNSW.value,
1175
+ )
1176
+
1177
+
1140
1178
  MilvusLoadConfig = [
1141
1179
  CaseConfigParamInput_IndexType,
1142
1180
  CaseConfigParamInput_M,
@@ -1344,6 +1382,15 @@ MariaDBPerformanceConfig = [
1344
1382
  CaseConfigParamInput_EFSearch_MariaDB,
1345
1383
  ]
1346
1384
 
1385
+ VespaLoadingConfig = [
1386
+ CaseConfigParamInput_IndexType_Vespa,
1387
+ CaseConfigParamInput_QuantizationType_Vespa,
1388
+ CaseConfigParamInput_M_Vespa,
1389
+ CaseConfigParamInput_EF_Milvus,
1390
+ CaseConfigParamInput_EFConstruction_Vespa,
1391
+ ]
1392
+ VespaPerformanceConfig = VespaLoadingConfig
1393
+
1347
1394
  CASE_CONFIG_MAP = {
1348
1395
  DB.Milvus: {
1349
1396
  CaseLabel.Load: MilvusLoadConfig,
@@ -1400,4 +1447,8 @@ CASE_CONFIG_MAP = {
1400
1447
  CaseLabel.Load: MariaDBLoadingConfig,
1401
1448
  CaseLabel.Performance: MariaDBPerformanceConfig,
1402
1449
  },
1450
+ DB.Vespa: {
1451
+ CaseLabel.Load: VespaLoadingConfig,
1452
+ CaseLabel.Performance: VespaPerformanceConfig,
1453
+ },
1403
1454
  }
@@ -48,6 +48,7 @@ DB_TO_ICON = {
48
48
  DB.Chroma: "https://assets.zilliz.com/chroma_ceb3f06ed7.png",
49
49
  DB.AWSOpenSearch: "https://assets.zilliz.com/opensearch_1eee37584e.jpeg",
50
50
  DB.TiDB: "https://img2.pingcap.com/forms/3/d/3d7fd5f9767323d6f037795704211ac44b4923d6.png",
51
+ DB.Vespa: "https://vespa.ai/vespa-content/uploads/2025/01/Vespa-symbol-green-rgb.png.webp",
51
52
  }
52
53
 
53
54
  # RedisCloud color: #0D6EFD
@@ -63,4 +64,5 @@ COLOR_MAP = {
63
64
  DB.Redis.value: "#0D6EFD",
64
65
  DB.AWSOpenSearch.value: "#0DCAF0",
65
66
  DB.TiDB.value: "#0D6EFD",
67
+ DB.Vespa.value: "#61d790",
66
68
  }
vectordb_bench/models.py CHANGED
@@ -263,7 +263,6 @@ class TestResult(BaseModel):
263
263
  )
264
264
  return TestResult.validate(test_result)
265
265
 
266
- # ruff: noqa
267
266
  def display(self, dbs: list[DB] | None = None):
268
267
  filter_list = dbs if dbs and isinstance(dbs, list) else None
269
268
  sorted_results = sorted(
@@ -294,7 +293,7 @@ class TestResult(BaseModel):
294
293
  max_qps = 10 if max_qps < 10 else max_qps
295
294
  max_recall = 13 if max_recall < 13 else max_recall
296
295
 
297
- LENGTH = (
296
+ LENGTH = ( # noqa: N806
298
297
  max_db,
299
298
  max_db_labels,
300
299
  max_case,
@@ -307,13 +306,13 @@ class TestResult(BaseModel):
307
306
  5,
308
307
  )
309
308
 
310
- DATA_FORMAT = (
309
+ DATA_FORMAT = ( # noqa: N806
311
310
  f"%-{max_db}s | %-{max_db_labels}s %-{max_case}s %-{len(self.task_label)}s"
312
311
  f" | %-{max_load_dur}s %-{max_qps}s %-15s %-{max_recall}s %-14s"
313
312
  f" | %-5s"
314
313
  )
315
314
 
316
- TITLE = DATA_FORMAT % (
315
+ TITLE = DATA_FORMAT % ( # noqa: N806
317
316
  "DB",
318
317
  "db_label",
319
318
  "case",
@@ -325,8 +324,8 @@ class TestResult(BaseModel):
325
324
  "max_load_count",
326
325
  "label",
327
326
  )
328
- SPLIT = DATA_FORMAT % tuple(map(lambda x: "-" * x, LENGTH))
329
- SUMMARY_FORMAT = ("Task summary: run_id=%s, task_label=%s") % (
327
+ SPLIT = DATA_FORMAT % tuple(map(lambda x: "-" * x, LENGTH)) # noqa: C417, N806
328
+ SUMMARY_FORMAT = ("Task summary: run_id=%s, task_label=%s") % ( # noqa: N806
330
329
  self.run_id[:5],
331
330
  self.task_label,
332
331
  )
@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.2
1
+ Metadata-Version: 2.4
2
2
  Name: vectordb-bench
3
- Version: 0.0.23
3
+ Version: 0.0.25
4
4
  Summary: VectorDBBench is not just an offering of benchmark results for mainstream vector databases and cloud services, it's your go-to tool for the ultimate performance and cost-effectiveness comparison. Designed with ease-of-use in mind, VectorDBBench is devised to help users, even non-professionals, reproduce results or test new systems, making the hunt for the optimal choice amongst a plethora of cloud services and open-source vector databases a breeze.
5
5
  Author-email: XuanYang-cn <xuan.yang@zilliz.com>
6
6
  Project-URL: repository, https://github.com/zilliztech/VectorDBBench
@@ -13,7 +13,7 @@ License-File: LICENSE
13
13
  Requires-Dist: click
14
14
  Requires-Dist: pytz
15
15
  Requires-Dist: streamlit-autorefresh
16
- Requires-Dist: streamlit!=1.34.0
16
+ Requires-Dist: streamlit!=1.34.0,<1.44
17
17
  Requires-Dist: streamlit_extras
18
18
  Requires-Dist: tqdm
19
19
  Requires-Dist: s3fs
@@ -50,6 +50,8 @@ Requires-Dist: alibabacloud_ha3engine_vector; extra == "all"
50
50
  Requires-Dist: alibabacloud_searchengine20211025; extra == "all"
51
51
  Requires-Dist: mariadb; extra == "all"
52
52
  Requires-Dist: PyMySQL; extra == "all"
53
+ Requires-Dist: clickhouse-connect; extra == "all"
54
+ Requires-Dist: pyvespa; extra == "all"
53
55
  Provides-Extra: qdrant
54
56
  Requires-Dist: qdrant-client; extra == "qdrant"
55
57
  Provides-Extra: pinecone
@@ -81,6 +83,11 @@ Provides-Extra: mariadb
81
83
  Requires-Dist: mariadb; extra == "mariadb"
82
84
  Provides-Extra: tidb
83
85
  Requires-Dist: PyMySQL; extra == "tidb"
86
+ Provides-Extra: clickhouse
87
+ Requires-Dist: clickhouse-connect; extra == "clickhouse"
88
+ Provides-Extra: vespa
89
+ Requires-Dist: pyvespa; extra == "vespa"
90
+ Dynamic: license-file
84
91
 
85
92
  # VectorDBBench: A Benchmark Tool for VectorDB
86
93
 
@@ -140,6 +147,7 @@ All the database client supported
140
147
  | aliyun_opensearch | `pip install vectordb-bench[aliyun_opensearch]` |
141
148
  | mongodb | `pip install vectordb-bench[mongodb]` |
142
149
  | tidb | `pip install vectordb-bench[tidb]` |
150
+ | vespa | `pip install vectordb-bench[vespa]` |
143
151
 
144
152
  ### Run
145
153