vectordb-bench 0.0.23__py3-none-any.whl → 0.0.25__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vectordb_bench/backend/clients/__init__.py +33 -1
- vectordb_bench/backend/clients/api.py +1 -1
- vectordb_bench/backend/clients/chroma/chroma.py +2 -2
- vectordb_bench/backend/clients/clickhouse/cli.py +66 -0
- vectordb_bench/backend/clients/clickhouse/clickhouse.py +156 -0
- vectordb_bench/backend/clients/clickhouse/config.py +60 -0
- vectordb_bench/backend/clients/elastic_cloud/elastic_cloud.py +1 -1
- vectordb_bench/backend/clients/mariadb/cli.py +60 -45
- vectordb_bench/backend/clients/mariadb/config.py +11 -9
- vectordb_bench/backend/clients/mariadb/mariadb.py +52 -58
- vectordb_bench/backend/clients/milvus/cli.py +1 -19
- vectordb_bench/backend/clients/milvus/config.py +0 -1
- vectordb_bench/backend/clients/milvus/milvus.py +1 -1
- vectordb_bench/backend/clients/pgvector/cli.py +1 -2
- vectordb_bench/backend/clients/pinecone/pinecone.py +1 -1
- vectordb_bench/backend/clients/qdrant_cloud/config.py +1 -9
- vectordb_bench/backend/clients/qdrant_cloud/qdrant_cloud.py +1 -1
- vectordb_bench/backend/clients/tidb/config.py +6 -9
- vectordb_bench/backend/clients/tidb/tidb.py +17 -18
- vectordb_bench/backend/clients/vespa/cli.py +47 -0
- vectordb_bench/backend/clients/vespa/config.py +51 -0
- vectordb_bench/backend/clients/vespa/util.py +15 -0
- vectordb_bench/backend/clients/vespa/vespa.py +249 -0
- vectordb_bench/backend/clients/weaviate_cloud/weaviate_cloud.py +1 -1
- vectordb_bench/cli/cli.py +21 -17
- vectordb_bench/cli/vectordbbench.py +5 -1
- vectordb_bench/frontend/config/dbCaseConfigs.py +58 -7
- vectordb_bench/frontend/config/styles.py +2 -0
- vectordb_bench/models.py +5 -6
- {vectordb_bench-0.0.23.dist-info → vectordb_bench-0.0.25.dist-info}/METADATA +11 -3
- {vectordb_bench-0.0.23.dist-info → vectordb_bench-0.0.25.dist-info}/RECORD +35 -28
- {vectordb_bench-0.0.23.dist-info → vectordb_bench-0.0.25.dist-info}/WHEEL +1 -1
- {vectordb_bench-0.0.23.dist-info → vectordb_bench-0.0.25.dist-info}/entry_points.txt +0 -0
- {vectordb_bench-0.0.23.dist-info → vectordb_bench-0.0.25.dist-info/licenses}/LICENSE +0 -0
- {vectordb_bench-0.0.23.dist-info → vectordb_bench-0.0.25.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,249 @@
|
|
1
|
+
import datetime
|
2
|
+
import logging
|
3
|
+
import math
|
4
|
+
from collections.abc import Generator
|
5
|
+
from contextlib import contextmanager
|
6
|
+
|
7
|
+
from vespa import application
|
8
|
+
|
9
|
+
from ..api import VectorDB
|
10
|
+
from . import util
|
11
|
+
from .config import VespaHNSWConfig
|
12
|
+
|
13
|
+
log = logging.getLogger(__name__)
|
14
|
+
|
15
|
+
|
16
|
+
class Vespa(VectorDB):
|
17
|
+
def __init__(
|
18
|
+
self,
|
19
|
+
dim: int,
|
20
|
+
db_config: dict[str, str],
|
21
|
+
db_case_config: VespaHNSWConfig | None = None,
|
22
|
+
collection_name: str = "VectorDBBenchCollection",
|
23
|
+
drop_old: bool = False,
|
24
|
+
**kwargs,
|
25
|
+
) -> None:
|
26
|
+
self.dim = dim
|
27
|
+
self.db_config = db_config
|
28
|
+
self.case_config = db_case_config or VespaHNSWConfig()
|
29
|
+
self.schema_name = collection_name
|
30
|
+
|
31
|
+
client = self.deploy_http()
|
32
|
+
client.wait_for_application_up()
|
33
|
+
|
34
|
+
if drop_old:
|
35
|
+
try:
|
36
|
+
client.delete_all_docs("vectordbbench_content", self.schema_name)
|
37
|
+
except Exception:
|
38
|
+
drop_old = False
|
39
|
+
log.exception(f"Vespa client drop_old schema: {self.schema_name}")
|
40
|
+
|
41
|
+
@contextmanager
|
42
|
+
def init(self) -> Generator[None, None, None]:
|
43
|
+
"""create and destory connections to database.
|
44
|
+
Why contextmanager:
|
45
|
+
|
46
|
+
In multiprocessing search tasks, vectordbbench might init
|
47
|
+
totally hundreds of thousands of connections with DB server.
|
48
|
+
|
49
|
+
Too many connections may drain local FDs or server connection resources.
|
50
|
+
If the DB client doesn't have `close()` method, just set the object to None.
|
51
|
+
|
52
|
+
Examples:
|
53
|
+
>>> with self.init():
|
54
|
+
>>> self.insert_embeddings()
|
55
|
+
"""
|
56
|
+
self.client = application.Vespa(self.db_config["url"], port=self.db_config["port"])
|
57
|
+
yield
|
58
|
+
self.client = None
|
59
|
+
|
60
|
+
def need_normalize_cosine(self) -> bool:
|
61
|
+
"""Wheather this database need to normalize dataset to support COSINE"""
|
62
|
+
return False
|
63
|
+
|
64
|
+
def insert_embeddings(
|
65
|
+
self,
|
66
|
+
embeddings: list[list[float]],
|
67
|
+
metadata: list[int],
|
68
|
+
**kwargs,
|
69
|
+
) -> tuple[int, Exception | None]:
|
70
|
+
"""Insert the embeddings to the vector database. The default number of embeddings for
|
71
|
+
each insert_embeddings is 5000.
|
72
|
+
|
73
|
+
Args:
|
74
|
+
embeddings(list[list[float]]): list of embedding to add to the vector database.
|
75
|
+
metadatas(list[int]): metadata associated with the embeddings, for filtering.
|
76
|
+
**kwargs(Any): vector database specific parameters.
|
77
|
+
|
78
|
+
Returns:
|
79
|
+
int: inserted data count
|
80
|
+
"""
|
81
|
+
assert self.client is not None
|
82
|
+
|
83
|
+
data = ({"id": str(i), "fields": {"id": i, "embedding": e}} for i, e in zip(metadata, embeddings, strict=True))
|
84
|
+
self.client.feed_iterable(data, self.schema_name)
|
85
|
+
return len(embeddings), None
|
86
|
+
|
87
|
+
def search_embedding(
|
88
|
+
self,
|
89
|
+
query: list[float],
|
90
|
+
k: int = 100,
|
91
|
+
filters: dict | None = None,
|
92
|
+
) -> list[int]:
|
93
|
+
"""Get k most similar embeddings to query vector.
|
94
|
+
|
95
|
+
Args:
|
96
|
+
query(list[float]): query embedding to look up documents similar to.
|
97
|
+
k(int): Number of most similar embeddings to return. Defaults to 100.
|
98
|
+
filters(dict, optional): filtering expression to filter the data while searching.
|
99
|
+
|
100
|
+
Returns:
|
101
|
+
list[int]: list of k most similar embeddings IDs to the query embedding.
|
102
|
+
"""
|
103
|
+
assert self.client is not None
|
104
|
+
|
105
|
+
ef = self.case_config.ef
|
106
|
+
extra_ef = max(0, ef - k)
|
107
|
+
embedding_field = "embedding" if self.case_config.quantization_type == "none" else "embedding_binary"
|
108
|
+
|
109
|
+
yql = (
|
110
|
+
f"select id from {self.schema_name} where " # noqa: S608
|
111
|
+
f"{{targetHits: {k}, hnsw.exploreAdditionalHits: {extra_ef}}}"
|
112
|
+
f"nearestNeighbor({embedding_field}, query_embedding)"
|
113
|
+
)
|
114
|
+
|
115
|
+
if filters:
|
116
|
+
id_filter = filters.get("id")
|
117
|
+
yql += f" and id >= {id_filter}"
|
118
|
+
|
119
|
+
query_embedding = query if self.case_config.quantization_type == "none" else util.binarize_tensor(query)
|
120
|
+
|
121
|
+
ranking = self.case_config.quantization_type
|
122
|
+
|
123
|
+
result = self.client.query({"yql": yql, "input.query(query_embedding)": query_embedding, "ranking": ranking})
|
124
|
+
return [child["fields"]["id"] for child in result.get_json()["root"]["children"]]
|
125
|
+
|
126
|
+
def optimize(self, data_size: int | None = None):
|
127
|
+
"""optimize will be called between insertion and search in performance cases.
|
128
|
+
|
129
|
+
Should be blocked until the vectorDB is ready to be tested on
|
130
|
+
heavy performance cases.
|
131
|
+
|
132
|
+
Time(insert the dataset) + Time(optimize) will be recorded as "load_duration" metric
|
133
|
+
Optimize's execution time is limited, the limited time is based on cases.
|
134
|
+
"""
|
135
|
+
|
136
|
+
@property
|
137
|
+
def application_package(self):
|
138
|
+
if getattr(self, "_application_package", None) is None:
|
139
|
+
self._application_package = self._create_application_package()
|
140
|
+
return self._application_package
|
141
|
+
|
142
|
+
def _create_application_package(self):
|
143
|
+
from vespa.package import (
|
144
|
+
HNSW,
|
145
|
+
ApplicationPackage,
|
146
|
+
Document,
|
147
|
+
Field,
|
148
|
+
RankProfile,
|
149
|
+
Schema,
|
150
|
+
Validation,
|
151
|
+
ValidationID,
|
152
|
+
)
|
153
|
+
|
154
|
+
fields = [
|
155
|
+
Field(
|
156
|
+
"id",
|
157
|
+
"int",
|
158
|
+
indexing=["summary", "attribute"],
|
159
|
+
),
|
160
|
+
Field(
|
161
|
+
"embedding",
|
162
|
+
f"tensor<float>(x[{self.dim}])",
|
163
|
+
indexing=["summary", "attribute", "index"],
|
164
|
+
ann=HNSW(**self.case_config.index_param()),
|
165
|
+
),
|
166
|
+
]
|
167
|
+
|
168
|
+
if self.case_config.quantization_type == "binary":
|
169
|
+
fields.append(
|
170
|
+
Field(
|
171
|
+
"embedding_binary",
|
172
|
+
f"tensor<int8>(x[{math.ceil(self.dim / 8)}])",
|
173
|
+
indexing=[
|
174
|
+
"input embedding",
|
175
|
+
# convert 32 bit float to 1 bit
|
176
|
+
"binarize",
|
177
|
+
# pack 8 bits into one int8
|
178
|
+
"pack_bits",
|
179
|
+
"summary",
|
180
|
+
"attribute",
|
181
|
+
"index",
|
182
|
+
],
|
183
|
+
ann=HNSW(**{**self.case_config.index_param(), "distance_metric": "hamming"}),
|
184
|
+
is_document_field=False,
|
185
|
+
)
|
186
|
+
)
|
187
|
+
|
188
|
+
tomorrow = datetime.date.today() + datetime.timedelta(days=1)
|
189
|
+
|
190
|
+
return ApplicationPackage(
|
191
|
+
"vectordbbench",
|
192
|
+
[
|
193
|
+
Schema(
|
194
|
+
self.schema_name,
|
195
|
+
Document(
|
196
|
+
fields,
|
197
|
+
),
|
198
|
+
rank_profiles=[
|
199
|
+
RankProfile(
|
200
|
+
name="none",
|
201
|
+
first_phase="",
|
202
|
+
inherits="default",
|
203
|
+
inputs=[("query(query_embedding)", f"tensor<float>(x[{self.dim}])")],
|
204
|
+
),
|
205
|
+
RankProfile(
|
206
|
+
name="binary",
|
207
|
+
first_phase="",
|
208
|
+
inherits="default",
|
209
|
+
inputs=[("query(query_embedding)", f"tensor<int8>(x[{math.ceil(self.dim / 8)}])")],
|
210
|
+
),
|
211
|
+
],
|
212
|
+
)
|
213
|
+
],
|
214
|
+
validations=[
|
215
|
+
Validation(ValidationID.tensorTypeChange, until=tomorrow),
|
216
|
+
Validation(ValidationID.fieldTypeChange, until=tomorrow),
|
217
|
+
],
|
218
|
+
)
|
219
|
+
|
220
|
+
def deploy_http(self) -> application.Vespa:
|
221
|
+
"""
|
222
|
+
Deploy a Vespa application package via HTTP REST API.
|
223
|
+
|
224
|
+
Returns:
|
225
|
+
application.Vespa: The deployed Vespa application instance
|
226
|
+
"""
|
227
|
+
import requests
|
228
|
+
|
229
|
+
url = self.db_config["url"] + ":19071/application/v2/tenant/default/prepareandactivate"
|
230
|
+
package_data = self.application_package.to_zip()
|
231
|
+
headers = {"Content-Type": "application/zip"}
|
232
|
+
|
233
|
+
try:
|
234
|
+
response = requests.post(url=url, data=package_data, headers=headers, timeout=10)
|
235
|
+
|
236
|
+
response.raise_for_status()
|
237
|
+
result = response.json()
|
238
|
+
return application.Vespa(
|
239
|
+
url=self.db_config["url"],
|
240
|
+
port=self.db_config["port"],
|
241
|
+
deployment_message=result.get("message"),
|
242
|
+
application_package=self.application_package,
|
243
|
+
)
|
244
|
+
|
245
|
+
except requests.exceptions.RequestException as e:
|
246
|
+
error_msg = f"Failed to deploy Vespa application: {e!s}"
|
247
|
+
if hasattr(e, "response") and e.response is not None:
|
248
|
+
error_msg += f" - Response: {e.response.text}"
|
249
|
+
raise RuntimeError(error_msg) from e
|
@@ -99,7 +99,7 @@ class WeaviateCloud(VectorDB):
|
|
99
99
|
embeddings: Iterable[list[float]],
|
100
100
|
metadata: list[int],
|
101
101
|
**kwargs,
|
102
|
-
) ->
|
102
|
+
) -> tuple[int, Exception]:
|
103
103
|
"""Insert embeddings into Weaviate"""
|
104
104
|
assert self.client.schema.exists(self.collection_name)
|
105
105
|
insert_count = 0
|
vectordb_bench/cli/cli.py
CHANGED
@@ -1,9 +1,9 @@
|
|
1
1
|
import logging
|
2
|
-
import os
|
3
2
|
import time
|
4
3
|
from collections.abc import Callable
|
5
4
|
from concurrent.futures import wait
|
6
5
|
from datetime import datetime
|
6
|
+
from pathlib import Path
|
7
7
|
from pprint import pformat
|
8
8
|
from typing import (
|
9
9
|
Annotated,
|
@@ -38,18 +38,17 @@ except ImportError:
|
|
38
38
|
from yaml import Loader
|
39
39
|
|
40
40
|
|
41
|
-
def click_get_defaults_from_file(ctx, param, value):
|
41
|
+
def click_get_defaults_from_file(ctx, param, value): # noqa: ANN001, ARG001
|
42
42
|
if value:
|
43
|
-
|
44
|
-
|
45
|
-
else:
|
46
|
-
input_file = os.path.join(config.CONFIG_LOCAL_DIR, value)
|
43
|
+
path = Path(value)
|
44
|
+
input_file = path if path.exists() else Path(config.CONFIG_LOCAL_DIR, path)
|
47
45
|
try:
|
48
|
-
with open(
|
49
|
-
_config: dict[str, dict[str, Any]] = load(f.read(), Loader=Loader)
|
46
|
+
with input_file.open() as f:
|
47
|
+
_config: dict[str, dict[str, Any]] = load(f.read(), Loader=Loader) # noqa: S506
|
50
48
|
ctx.default_map = _config.get(ctx.command.name, {})
|
51
49
|
except Exception as e:
|
52
|
-
|
50
|
+
msg = f"Failed to load config file: {e}"
|
51
|
+
raise click.BadParameter(msg) from e
|
53
52
|
return value
|
54
53
|
|
55
54
|
|
@@ -68,12 +67,16 @@ def click_parameter_decorators_from_typed_dict(
|
|
68
67
|
|
69
68
|
|
70
69
|
For clarity, the key names of the TypedDict will be used to determine the type hints for the input parameters.
|
71
|
-
The actual function parameters are controlled by the click.option definitions.
|
70
|
+
The actual function parameters are controlled by the click.option definitions.
|
71
|
+
You must manually ensure these are aligned in a sensible way!
|
72
72
|
|
73
73
|
Example:
|
74
74
|
```
|
75
75
|
class CommonTypedDict(TypedDict):
|
76
|
-
z: Annotated[
|
76
|
+
z: Annotated[
|
77
|
+
int,
|
78
|
+
click.option("--z/--no-z", is_flag=True, type=bool, help="help z", default=True, show_default=True)
|
79
|
+
]
|
77
80
|
name: Annotated[str, click.argument("name", required=False, default="Jeff")]
|
78
81
|
|
79
82
|
class FooTypedDict(CommonTypedDict):
|
@@ -91,14 +94,16 @@ def click_parameter_decorators_from_typed_dict(
|
|
91
94
|
for _, t in get_type_hints(typed_dict, include_extras=True).items():
|
92
95
|
assert get_origin(t) is Annotated
|
93
96
|
if len(t.__metadata__) == 1 and t.__metadata__[0].__module__ == "click.decorators":
|
94
|
-
# happy path -- only accept Annotated[..., Union[click.option,click.argument,...]]
|
97
|
+
# happy path -- only accept Annotated[..., Union[click.option,click.argument,...]]
|
98
|
+
# with no additional metadata defined (len=1)
|
95
99
|
decorators.append(t.__metadata__[0])
|
96
100
|
else:
|
97
101
|
raise RuntimeError(
|
98
|
-
"Click-TypedDict decorator parsing must only contain root type
|
102
|
+
"Click-TypedDict decorator parsing must only contain root type "
|
103
|
+
"and a click decorator like click.option. See docstring",
|
99
104
|
)
|
100
105
|
|
101
|
-
def deco(f):
|
106
|
+
def deco(f): # noqa: ANN001
|
102
107
|
for dec in reversed(decorators):
|
103
108
|
f = dec(f)
|
104
109
|
return f
|
@@ -106,7 +111,7 @@ def click_parameter_decorators_from_typed_dict(
|
|
106
111
|
return deco
|
107
112
|
|
108
113
|
|
109
|
-
def click_arg_split(ctx: click.Context, param: click.core.Option, value):
|
114
|
+
def click_arg_split(ctx: click.Context, param: click.core.Option, value): # noqa: ANN001, ARG001
|
110
115
|
"""Will split a comma-separated list input into an actual list.
|
111
116
|
|
112
117
|
Args:
|
@@ -145,8 +150,7 @@ def parse_task_stages(
|
|
145
150
|
return stages
|
146
151
|
|
147
152
|
|
148
|
-
#
|
149
|
-
def check_custom_case_parameters(ctx: any, param: any, value: any):
|
153
|
+
def check_custom_case_parameters(ctx: any, param: any, value: any): # noqa: ARG001
|
150
154
|
if ctx.params.get("case_type") == "PerformanceCustomDataset" and value is None:
|
151
155
|
raise click.BadParameter(
|
152
156
|
""" Custom case parameters
|
@@ -1,5 +1,6 @@
|
|
1
1
|
from ..backend.clients.alloydb.cli import AlloyDBScaNN
|
2
2
|
from ..backend.clients.aws_opensearch.cli import AWSOpenSearch
|
3
|
+
from ..backend.clients.clickhouse.cli import Clickhouse
|
3
4
|
from ..backend.clients.mariadb.cli import MariaDBHNSW
|
4
5
|
from ..backend.clients.memorydb.cli import MemoryDB
|
5
6
|
from ..backend.clients.milvus.cli import MilvusAutoIndex
|
@@ -9,9 +10,10 @@ from ..backend.clients.pgvector.cli import PgVectorHNSW
|
|
9
10
|
from ..backend.clients.pgvectorscale.cli import PgVectorScaleDiskAnn
|
10
11
|
from ..backend.clients.redis.cli import Redis
|
11
12
|
from ..backend.clients.test.cli import Test
|
13
|
+
from ..backend.clients.tidb.cli import TiDB
|
14
|
+
from ..backend.clients.vespa.cli import Vespa
|
12
15
|
from ..backend.clients.weaviate_cloud.cli import Weaviate
|
13
16
|
from ..backend.clients.zilliz_cloud.cli import ZillizAutoIndex
|
14
|
-
from ..backend.clients.tidb.cli import TiDB
|
15
17
|
from .cli import cli
|
16
18
|
|
17
19
|
cli.add_command(PgVectorHNSW)
|
@@ -29,6 +31,8 @@ cli.add_command(PgDiskAnn)
|
|
29
31
|
cli.add_command(AlloyDBScaNN)
|
30
32
|
cli.add_command(MariaDBHNSW)
|
31
33
|
cli.add_command(TiDB)
|
34
|
+
cli.add_command(Clickhouse)
|
35
|
+
cli.add_command(Vespa)
|
32
36
|
|
33
37
|
|
34
38
|
if __name__ == "__main__":
|
@@ -1087,8 +1087,7 @@ CaseConfigParamInput_M_MariaDB = CaseConfigInput(
|
|
1087
1087
|
"max": 200,
|
1088
1088
|
"value": 6,
|
1089
1089
|
},
|
1090
|
-
isDisplayed=lambda config: config.get(CaseConfigParamType.IndexType, None)
|
1091
|
-
== IndexType.HNSW.value,
|
1090
|
+
isDisplayed=lambda config: config.get(CaseConfigParamType.IndexType, None) == IndexType.HNSW.value,
|
1092
1091
|
)
|
1093
1092
|
|
1094
1093
|
CaseConfigParamInput_EFSearch_MariaDB = CaseConfigInput(
|
@@ -1100,8 +1099,7 @@ CaseConfigParamInput_EFSearch_MariaDB = CaseConfigInput(
|
|
1100
1099
|
"max": 10000,
|
1101
1100
|
"value": 20,
|
1102
1101
|
},
|
1103
|
-
isDisplayed=lambda config: config.get(CaseConfigParamType.IndexType, None)
|
1104
|
-
== IndexType.HNSW.value,
|
1102
|
+
isDisplayed=lambda config: config.get(CaseConfigParamType.IndexType, None) == IndexType.HNSW.value,
|
1105
1103
|
)
|
1106
1104
|
|
1107
1105
|
CaseConfigParamInput_CacheSize_MariaDB = CaseConfigInput(
|
@@ -1111,10 +1109,9 @@ CaseConfigParamInput_CacheSize_MariaDB = CaseConfigInput(
|
|
1111
1109
|
inputConfig={
|
1112
1110
|
"min": 1048576,
|
1113
1111
|
"max": (1 << 53) - 1,
|
1114
|
-
"value": 16 * 1024
|
1112
|
+
"value": 16 * 1024**3,
|
1115
1113
|
},
|
1116
|
-
isDisplayed=lambda config: config.get(CaseConfigParamType.IndexType, None)
|
1117
|
-
== IndexType.HNSW.value,
|
1114
|
+
isDisplayed=lambda config: config.get(CaseConfigParamType.IndexType, None) == IndexType.HNSW.value,
|
1118
1115
|
)
|
1119
1116
|
|
1120
1117
|
CaseConfigParamInput_MongoDBQuantizationType = CaseConfigInput(
|
@@ -1137,6 +1134,47 @@ CaseConfigParamInput_MongoDBNumCandidatesRatio = CaseConfigInput(
|
|
1137
1134
|
)
|
1138
1135
|
|
1139
1136
|
|
1137
|
+
CaseConfigParamInput_M_Vespa = CaseConfigInput(
|
1138
|
+
label=CaseConfigParamType.M,
|
1139
|
+
inputType=InputType.Number,
|
1140
|
+
inputConfig={
|
1141
|
+
"min": 4,
|
1142
|
+
"max": 64,
|
1143
|
+
"value": 16,
|
1144
|
+
},
|
1145
|
+
isDisplayed=lambda config: config.get(CaseConfigParamType.IndexType, None) == IndexType.HNSW.value,
|
1146
|
+
)
|
1147
|
+
|
1148
|
+
CaseConfigParamInput_IndexType_Vespa = CaseConfigInput(
|
1149
|
+
label=CaseConfigParamType.IndexType,
|
1150
|
+
inputType=InputType.Option,
|
1151
|
+
inputConfig={
|
1152
|
+
"options": [
|
1153
|
+
IndexType.HNSW.value,
|
1154
|
+
],
|
1155
|
+
},
|
1156
|
+
)
|
1157
|
+
|
1158
|
+
CaseConfigParamInput_QuantizationType_Vespa = CaseConfigInput(
|
1159
|
+
label=CaseConfigParamType.quantizationType,
|
1160
|
+
inputType=InputType.Option,
|
1161
|
+
inputConfig={
|
1162
|
+
"options": ["none", "binary"],
|
1163
|
+
},
|
1164
|
+
)
|
1165
|
+
|
1166
|
+
CaseConfigParamInput_EFConstruction_Vespa = CaseConfigInput(
|
1167
|
+
label=CaseConfigParamType.EFConstruction,
|
1168
|
+
inputType=InputType.Number,
|
1169
|
+
inputConfig={
|
1170
|
+
"min": 8,
|
1171
|
+
"max": 512,
|
1172
|
+
"value": 200,
|
1173
|
+
},
|
1174
|
+
isDisplayed=lambda config: config[CaseConfigParamType.IndexType] == IndexType.HNSW.value,
|
1175
|
+
)
|
1176
|
+
|
1177
|
+
|
1140
1178
|
MilvusLoadConfig = [
|
1141
1179
|
CaseConfigParamInput_IndexType,
|
1142
1180
|
CaseConfigParamInput_M,
|
@@ -1344,6 +1382,15 @@ MariaDBPerformanceConfig = [
|
|
1344
1382
|
CaseConfigParamInput_EFSearch_MariaDB,
|
1345
1383
|
]
|
1346
1384
|
|
1385
|
+
VespaLoadingConfig = [
|
1386
|
+
CaseConfigParamInput_IndexType_Vespa,
|
1387
|
+
CaseConfigParamInput_QuantizationType_Vespa,
|
1388
|
+
CaseConfigParamInput_M_Vespa,
|
1389
|
+
CaseConfigParamInput_EF_Milvus,
|
1390
|
+
CaseConfigParamInput_EFConstruction_Vespa,
|
1391
|
+
]
|
1392
|
+
VespaPerformanceConfig = VespaLoadingConfig
|
1393
|
+
|
1347
1394
|
CASE_CONFIG_MAP = {
|
1348
1395
|
DB.Milvus: {
|
1349
1396
|
CaseLabel.Load: MilvusLoadConfig,
|
@@ -1400,4 +1447,8 @@ CASE_CONFIG_MAP = {
|
|
1400
1447
|
CaseLabel.Load: MariaDBLoadingConfig,
|
1401
1448
|
CaseLabel.Performance: MariaDBPerformanceConfig,
|
1402
1449
|
},
|
1450
|
+
DB.Vespa: {
|
1451
|
+
CaseLabel.Load: VespaLoadingConfig,
|
1452
|
+
CaseLabel.Performance: VespaPerformanceConfig,
|
1453
|
+
},
|
1403
1454
|
}
|
@@ -48,6 +48,7 @@ DB_TO_ICON = {
|
|
48
48
|
DB.Chroma: "https://assets.zilliz.com/chroma_ceb3f06ed7.png",
|
49
49
|
DB.AWSOpenSearch: "https://assets.zilliz.com/opensearch_1eee37584e.jpeg",
|
50
50
|
DB.TiDB: "https://img2.pingcap.com/forms/3/d/3d7fd5f9767323d6f037795704211ac44b4923d6.png",
|
51
|
+
DB.Vespa: "https://vespa.ai/vespa-content/uploads/2025/01/Vespa-symbol-green-rgb.png.webp",
|
51
52
|
}
|
52
53
|
|
53
54
|
# RedisCloud color: #0D6EFD
|
@@ -63,4 +64,5 @@ COLOR_MAP = {
|
|
63
64
|
DB.Redis.value: "#0D6EFD",
|
64
65
|
DB.AWSOpenSearch.value: "#0DCAF0",
|
65
66
|
DB.TiDB.value: "#0D6EFD",
|
67
|
+
DB.Vespa.value: "#61d790",
|
66
68
|
}
|
vectordb_bench/models.py
CHANGED
@@ -263,7 +263,6 @@ class TestResult(BaseModel):
|
|
263
263
|
)
|
264
264
|
return TestResult.validate(test_result)
|
265
265
|
|
266
|
-
# ruff: noqa
|
267
266
|
def display(self, dbs: list[DB] | None = None):
|
268
267
|
filter_list = dbs if dbs and isinstance(dbs, list) else None
|
269
268
|
sorted_results = sorted(
|
@@ -294,7 +293,7 @@ class TestResult(BaseModel):
|
|
294
293
|
max_qps = 10 if max_qps < 10 else max_qps
|
295
294
|
max_recall = 13 if max_recall < 13 else max_recall
|
296
295
|
|
297
|
-
LENGTH = (
|
296
|
+
LENGTH = ( # noqa: N806
|
298
297
|
max_db,
|
299
298
|
max_db_labels,
|
300
299
|
max_case,
|
@@ -307,13 +306,13 @@ class TestResult(BaseModel):
|
|
307
306
|
5,
|
308
307
|
)
|
309
308
|
|
310
|
-
DATA_FORMAT = (
|
309
|
+
DATA_FORMAT = ( # noqa: N806
|
311
310
|
f"%-{max_db}s | %-{max_db_labels}s %-{max_case}s %-{len(self.task_label)}s"
|
312
311
|
f" | %-{max_load_dur}s %-{max_qps}s %-15s %-{max_recall}s %-14s"
|
313
312
|
f" | %-5s"
|
314
313
|
)
|
315
314
|
|
316
|
-
TITLE = DATA_FORMAT % (
|
315
|
+
TITLE = DATA_FORMAT % ( # noqa: N806
|
317
316
|
"DB",
|
318
317
|
"db_label",
|
319
318
|
"case",
|
@@ -325,8 +324,8 @@ class TestResult(BaseModel):
|
|
325
324
|
"max_load_count",
|
326
325
|
"label",
|
327
326
|
)
|
328
|
-
SPLIT = DATA_FORMAT % tuple(map(lambda x: "-" * x, LENGTH))
|
329
|
-
SUMMARY_FORMAT = ("Task summary: run_id=%s, task_label=%s") % (
|
327
|
+
SPLIT = DATA_FORMAT % tuple(map(lambda x: "-" * x, LENGTH)) # noqa: C417, N806
|
328
|
+
SUMMARY_FORMAT = ("Task summary: run_id=%s, task_label=%s") % ( # noqa: N806
|
330
329
|
self.run_id[:5],
|
331
330
|
self.task_label,
|
332
331
|
)
|
@@ -1,6 +1,6 @@
|
|
1
|
-
Metadata-Version: 2.
|
1
|
+
Metadata-Version: 2.4
|
2
2
|
Name: vectordb-bench
|
3
|
-
Version: 0.0.
|
3
|
+
Version: 0.0.25
|
4
4
|
Summary: VectorDBBench is not just an offering of benchmark results for mainstream vector databases and cloud services, it's your go-to tool for the ultimate performance and cost-effectiveness comparison. Designed with ease-of-use in mind, VectorDBBench is devised to help users, even non-professionals, reproduce results or test new systems, making the hunt for the optimal choice amongst a plethora of cloud services and open-source vector databases a breeze.
|
5
5
|
Author-email: XuanYang-cn <xuan.yang@zilliz.com>
|
6
6
|
Project-URL: repository, https://github.com/zilliztech/VectorDBBench
|
@@ -13,7 +13,7 @@ License-File: LICENSE
|
|
13
13
|
Requires-Dist: click
|
14
14
|
Requires-Dist: pytz
|
15
15
|
Requires-Dist: streamlit-autorefresh
|
16
|
-
Requires-Dist: streamlit!=1.34.0
|
16
|
+
Requires-Dist: streamlit!=1.34.0,<1.44
|
17
17
|
Requires-Dist: streamlit_extras
|
18
18
|
Requires-Dist: tqdm
|
19
19
|
Requires-Dist: s3fs
|
@@ -50,6 +50,8 @@ Requires-Dist: alibabacloud_ha3engine_vector; extra == "all"
|
|
50
50
|
Requires-Dist: alibabacloud_searchengine20211025; extra == "all"
|
51
51
|
Requires-Dist: mariadb; extra == "all"
|
52
52
|
Requires-Dist: PyMySQL; extra == "all"
|
53
|
+
Requires-Dist: clickhouse-connect; extra == "all"
|
54
|
+
Requires-Dist: pyvespa; extra == "all"
|
53
55
|
Provides-Extra: qdrant
|
54
56
|
Requires-Dist: qdrant-client; extra == "qdrant"
|
55
57
|
Provides-Extra: pinecone
|
@@ -81,6 +83,11 @@ Provides-Extra: mariadb
|
|
81
83
|
Requires-Dist: mariadb; extra == "mariadb"
|
82
84
|
Provides-Extra: tidb
|
83
85
|
Requires-Dist: PyMySQL; extra == "tidb"
|
86
|
+
Provides-Extra: clickhouse
|
87
|
+
Requires-Dist: clickhouse-connect; extra == "clickhouse"
|
88
|
+
Provides-Extra: vespa
|
89
|
+
Requires-Dist: pyvespa; extra == "vespa"
|
90
|
+
Dynamic: license-file
|
84
91
|
|
85
92
|
# VectorDBBench: A Benchmark Tool for VectorDB
|
86
93
|
|
@@ -140,6 +147,7 @@ All the database client supported
|
|
140
147
|
| aliyun_opensearch | `pip install vectordb-bench[aliyun_opensearch]` |
|
141
148
|
| mongodb | `pip install vectordb-bench[mongodb]` |
|
142
149
|
| tidb | `pip install vectordb-bench[tidb]` |
|
150
|
+
| vespa | `pip install vectordb-bench[vespa]` |
|
143
151
|
|
144
152
|
### Run
|
145
153
|
|