vectordb-bench 0.0.18__py3-none-any.whl → 0.0.20__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vectordb_bench/__init__.py +49 -24
- vectordb_bench/__main__.py +4 -3
- vectordb_bench/backend/assembler.py +12 -13
- vectordb_bench/backend/cases.py +56 -46
- vectordb_bench/backend/clients/__init__.py +101 -14
- vectordb_bench/backend/clients/aliyun_elasticsearch/aliyun_elasticsearch.py +26 -0
- vectordb_bench/backend/clients/aliyun_elasticsearch/config.py +18 -0
- vectordb_bench/backend/clients/aliyun_opensearch/aliyun_opensearch.py +345 -0
- vectordb_bench/backend/clients/aliyun_opensearch/config.py +47 -0
- vectordb_bench/backend/clients/alloydb/alloydb.py +58 -80
- vectordb_bench/backend/clients/alloydb/cli.py +52 -35
- vectordb_bench/backend/clients/alloydb/config.py +30 -30
- vectordb_bench/backend/clients/api.py +8 -9
- vectordb_bench/backend/clients/aws_opensearch/aws_opensearch.py +46 -47
- vectordb_bench/backend/clients/aws_opensearch/cli.py +4 -7
- vectordb_bench/backend/clients/aws_opensearch/config.py +13 -9
- vectordb_bench/backend/clients/aws_opensearch/run.py +69 -59
- vectordb_bench/backend/clients/chroma/chroma.py +38 -36
- vectordb_bench/backend/clients/chroma/config.py +4 -2
- vectordb_bench/backend/clients/elastic_cloud/config.py +5 -5
- vectordb_bench/backend/clients/elastic_cloud/elastic_cloud.py +23 -22
- vectordb_bench/backend/clients/memorydb/cli.py +8 -8
- vectordb_bench/backend/clients/memorydb/config.py +2 -2
- vectordb_bench/backend/clients/memorydb/memorydb.py +65 -53
- vectordb_bench/backend/clients/milvus/cli.py +62 -80
- vectordb_bench/backend/clients/milvus/config.py +31 -7
- vectordb_bench/backend/clients/milvus/milvus.py +23 -26
- vectordb_bench/backend/clients/pgdiskann/cli.py +29 -22
- vectordb_bench/backend/clients/pgdiskann/config.py +29 -26
- vectordb_bench/backend/clients/pgdiskann/pgdiskann.py +55 -73
- vectordb_bench/backend/clients/pgvecto_rs/cli.py +9 -11
- vectordb_bench/backend/clients/pgvecto_rs/config.py +8 -14
- vectordb_bench/backend/clients/pgvecto_rs/pgvecto_rs.py +33 -34
- vectordb_bench/backend/clients/pgvector/cli.py +40 -31
- vectordb_bench/backend/clients/pgvector/config.py +63 -73
- vectordb_bench/backend/clients/pgvector/pgvector.py +97 -98
- vectordb_bench/backend/clients/pgvectorscale/cli.py +38 -24
- vectordb_bench/backend/clients/pgvectorscale/config.py +14 -15
- vectordb_bench/backend/clients/pgvectorscale/pgvectorscale.py +38 -43
- vectordb_bench/backend/clients/pinecone/config.py +1 -0
- vectordb_bench/backend/clients/pinecone/pinecone.py +14 -21
- vectordb_bench/backend/clients/qdrant_cloud/config.py +11 -10
- vectordb_bench/backend/clients/qdrant_cloud/qdrant_cloud.py +40 -31
- vectordb_bench/backend/clients/redis/cli.py +6 -12
- vectordb_bench/backend/clients/redis/config.py +7 -5
- vectordb_bench/backend/clients/redis/redis.py +94 -58
- vectordb_bench/backend/clients/test/cli.py +1 -2
- vectordb_bench/backend/clients/test/config.py +2 -2
- vectordb_bench/backend/clients/test/test.py +4 -5
- vectordb_bench/backend/clients/weaviate_cloud/cli.py +3 -4
- vectordb_bench/backend/clients/weaviate_cloud/config.py +2 -2
- vectordb_bench/backend/clients/weaviate_cloud/weaviate_cloud.py +36 -22
- vectordb_bench/backend/clients/zilliz_cloud/cli.py +14 -11
- vectordb_bench/backend/clients/zilliz_cloud/config.py +2 -4
- vectordb_bench/backend/clients/zilliz_cloud/zilliz_cloud.py +1 -1
- vectordb_bench/backend/data_source.py +30 -18
- vectordb_bench/backend/dataset.py +47 -27
- vectordb_bench/backend/result_collector.py +2 -3
- vectordb_bench/backend/runner/__init__.py +4 -6
- vectordb_bench/backend/runner/mp_runner.py +85 -34
- vectordb_bench/backend/runner/rate_runner.py +51 -23
- vectordb_bench/backend/runner/read_write_runner.py +140 -46
- vectordb_bench/backend/runner/serial_runner.py +99 -50
- vectordb_bench/backend/runner/util.py +4 -19
- vectordb_bench/backend/task_runner.py +95 -74
- vectordb_bench/backend/utils.py +17 -9
- vectordb_bench/base.py +0 -1
- vectordb_bench/cli/cli.py +65 -60
- vectordb_bench/cli/vectordbbench.py +6 -7
- vectordb_bench/frontend/components/check_results/charts.py +8 -19
- vectordb_bench/frontend/components/check_results/data.py +4 -16
- vectordb_bench/frontend/components/check_results/filters.py +8 -16
- vectordb_bench/frontend/components/check_results/nav.py +4 -4
- vectordb_bench/frontend/components/check_results/priceTable.py +1 -3
- vectordb_bench/frontend/components/check_results/stPageConfig.py +2 -1
- vectordb_bench/frontend/components/concurrent/charts.py +12 -12
- vectordb_bench/frontend/components/custom/displayCustomCase.py +17 -11
- vectordb_bench/frontend/components/custom/displaypPrams.py +4 -2
- vectordb_bench/frontend/components/custom/getCustomConfig.py +1 -2
- vectordb_bench/frontend/components/custom/initStyle.py +1 -1
- vectordb_bench/frontend/components/get_results/saveAsImage.py +2 -0
- vectordb_bench/frontend/components/run_test/caseSelector.py +3 -9
- vectordb_bench/frontend/components/run_test/dbConfigSetting.py +1 -4
- vectordb_bench/frontend/components/run_test/dbSelector.py +1 -1
- vectordb_bench/frontend/components/run_test/generateTasks.py +8 -8
- vectordb_bench/frontend/components/run_test/submitTask.py +14 -18
- vectordb_bench/frontend/components/tables/data.py +3 -6
- vectordb_bench/frontend/config/dbCaseConfigs.py +108 -83
- vectordb_bench/frontend/pages/concurrent.py +3 -5
- vectordb_bench/frontend/pages/custom.py +30 -9
- vectordb_bench/frontend/pages/quries_per_dollar.py +3 -3
- vectordb_bench/frontend/pages/run_test.py +3 -7
- vectordb_bench/frontend/utils.py +1 -1
- vectordb_bench/frontend/vdb_benchmark.py +4 -6
- vectordb_bench/interface.py +56 -26
- vectordb_bench/log_util.py +59 -64
- vectordb_bench/metric.py +10 -11
- vectordb_bench/models.py +26 -43
- {vectordb_bench-0.0.18.dist-info → vectordb_bench-0.0.20.dist-info}/METADATA +34 -42
- vectordb_bench-0.0.20.dist-info/RECORD +135 -0
- {vectordb_bench-0.0.18.dist-info → vectordb_bench-0.0.20.dist-info}/WHEEL +1 -1
- vectordb_bench-0.0.18.dist-info/RECORD +0 -131
- {vectordb_bench-0.0.18.dist-info → vectordb_bench-0.0.20.dist-info}/LICENSE +0 -0
- {vectordb_bench-0.0.18.dist-info → vectordb_bench-0.0.20.dist-info}/entry_points.txt +0 -0
- {vectordb_bench-0.0.18.dist-info → vectordb_bench-0.0.20.dist-info}/top_level.txt +0 -0
@@ -3,6 +3,7 @@ from typing import Annotated, TypedDict, Unpack
|
|
3
3
|
import click
|
4
4
|
from pydantic import SecretStr
|
5
5
|
|
6
|
+
from vectordb_bench.backend.clients import DB
|
6
7
|
from vectordb_bench.cli.cli import (
|
7
8
|
CommonTypedDict,
|
8
9
|
HNSWFlavor3,
|
@@ -10,33 +11,41 @@ from vectordb_bench.cli.cli import (
|
|
10
11
|
cli,
|
11
12
|
click_parameter_decorators_from_typed_dict,
|
12
13
|
run,
|
13
|
-
|
14
14
|
)
|
15
|
-
from vectordb_bench.backend.clients import DB
|
16
15
|
|
17
16
|
DBTYPE = DB.Milvus
|
18
17
|
|
19
18
|
|
20
19
|
class MilvusTypedDict(TypedDict):
|
21
20
|
uri: Annotated[
|
22
|
-
str,
|
21
|
+
str,
|
22
|
+
click.option("--uri", type=str, help="uri connection string", required=True),
|
23
|
+
]
|
24
|
+
user_name: Annotated[
|
25
|
+
str | None,
|
26
|
+
click.option("--user-name", type=str, help="Db username", required=False),
|
27
|
+
]
|
28
|
+
password: Annotated[
|
29
|
+
str | None,
|
30
|
+
click.option("--password", type=str, help="Db password", required=False),
|
23
31
|
]
|
24
32
|
|
25
33
|
|
26
|
-
class MilvusAutoIndexTypedDict(CommonTypedDict, MilvusTypedDict):
|
27
|
-
...
|
34
|
+
class MilvusAutoIndexTypedDict(CommonTypedDict, MilvusTypedDict): ...
|
28
35
|
|
29
36
|
|
30
37
|
@cli.command()
|
31
38
|
@click_parameter_decorators_from_typed_dict(MilvusAutoIndexTypedDict)
|
32
39
|
def MilvusAutoIndex(**parameters: Unpack[MilvusAutoIndexTypedDict]):
|
33
|
-
from .config import
|
40
|
+
from .config import AutoIndexConfig, MilvusConfig
|
34
41
|
|
35
42
|
run(
|
36
43
|
db=DBTYPE,
|
37
44
|
db_config=MilvusConfig(
|
38
45
|
db_label=parameters["db_label"],
|
39
46
|
uri=SecretStr(parameters["uri"]),
|
47
|
+
user=parameters["user_name"],
|
48
|
+
password=SecretStr(parameters["password"]),
|
40
49
|
),
|
41
50
|
db_case_config=AutoIndexConfig(),
|
42
51
|
**parameters,
|
@@ -46,33 +55,36 @@ def MilvusAutoIndex(**parameters: Unpack[MilvusAutoIndexTypedDict]):
|
|
46
55
|
@cli.command()
|
47
56
|
@click_parameter_decorators_from_typed_dict(MilvusAutoIndexTypedDict)
|
48
57
|
def MilvusFlat(**parameters: Unpack[MilvusAutoIndexTypedDict]):
|
49
|
-
from .config import
|
58
|
+
from .config import FLATConfig, MilvusConfig
|
50
59
|
|
51
60
|
run(
|
52
61
|
db=DBTYPE,
|
53
62
|
db_config=MilvusConfig(
|
54
63
|
db_label=parameters["db_label"],
|
55
64
|
uri=SecretStr(parameters["uri"]),
|
65
|
+
user=parameters["user_name"],
|
66
|
+
password=SecretStr(parameters["password"]),
|
56
67
|
),
|
57
68
|
db_case_config=FLATConfig(),
|
58
69
|
**parameters,
|
59
70
|
)
|
60
71
|
|
61
72
|
|
62
|
-
class MilvusHNSWTypedDict(CommonTypedDict, MilvusTypedDict, HNSWFlavor3):
|
63
|
-
...
|
73
|
+
class MilvusHNSWTypedDict(CommonTypedDict, MilvusTypedDict, HNSWFlavor3): ...
|
64
74
|
|
65
75
|
|
66
76
|
@cli.command()
|
67
77
|
@click_parameter_decorators_from_typed_dict(MilvusHNSWTypedDict)
|
68
78
|
def MilvusHNSW(**parameters: Unpack[MilvusHNSWTypedDict]):
|
69
|
-
from .config import
|
79
|
+
from .config import HNSWConfig, MilvusConfig
|
70
80
|
|
71
81
|
run(
|
72
82
|
db=DBTYPE,
|
73
83
|
db_config=MilvusConfig(
|
74
84
|
db_label=parameters["db_label"],
|
75
85
|
uri=SecretStr(parameters["uri"]),
|
86
|
+
user=parameters["user_name"],
|
87
|
+
password=SecretStr(parameters["password"]) if parameters["password"] else None,
|
76
88
|
),
|
77
89
|
db_case_config=HNSWConfig(
|
78
90
|
M=parameters["m"],
|
@@ -83,20 +95,21 @@ def MilvusHNSW(**parameters: Unpack[MilvusHNSWTypedDict]):
|
|
83
95
|
)
|
84
96
|
|
85
97
|
|
86
|
-
class MilvusIVFFlatTypedDict(CommonTypedDict, MilvusTypedDict, IVFFlatTypedDictN):
|
87
|
-
...
|
98
|
+
class MilvusIVFFlatTypedDict(CommonTypedDict, MilvusTypedDict, IVFFlatTypedDictN): ...
|
88
99
|
|
89
100
|
|
90
101
|
@cli.command()
|
91
102
|
@click_parameter_decorators_from_typed_dict(MilvusIVFFlatTypedDict)
|
92
103
|
def MilvusIVFFlat(**parameters: Unpack[MilvusIVFFlatTypedDict]):
|
93
|
-
from .config import
|
104
|
+
from .config import IVFFlatConfig, MilvusConfig
|
94
105
|
|
95
106
|
run(
|
96
107
|
db=DBTYPE,
|
97
108
|
db_config=MilvusConfig(
|
98
109
|
db_label=parameters["db_label"],
|
99
110
|
uri=SecretStr(parameters["uri"]),
|
111
|
+
user=parameters["user_name"],
|
112
|
+
password=SecretStr(parameters["password"]),
|
100
113
|
),
|
101
114
|
db_case_config=IVFFlatConfig(
|
102
115
|
nlist=parameters["nlist"],
|
@@ -109,13 +122,15 @@ def MilvusIVFFlat(**parameters: Unpack[MilvusIVFFlatTypedDict]):
|
|
109
122
|
@cli.command()
|
110
123
|
@click_parameter_decorators_from_typed_dict(MilvusIVFFlatTypedDict)
|
111
124
|
def MilvusIVFSQ8(**parameters: Unpack[MilvusIVFFlatTypedDict]):
|
112
|
-
from .config import
|
125
|
+
from .config import IVFSQ8Config, MilvusConfig
|
113
126
|
|
114
127
|
run(
|
115
128
|
db=DBTYPE,
|
116
129
|
db_config=MilvusConfig(
|
117
130
|
db_label=parameters["db_label"],
|
118
131
|
uri=SecretStr(parameters["uri"]),
|
132
|
+
user=parameters["user_name"],
|
133
|
+
password=SecretStr(parameters["password"]),
|
119
134
|
),
|
120
135
|
db_case_config=IVFSQ8Config(
|
121
136
|
nlist=parameters["nlist"],
|
@@ -126,23 +141,21 @@ def MilvusIVFSQ8(**parameters: Unpack[MilvusIVFFlatTypedDict]):
|
|
126
141
|
|
127
142
|
|
128
143
|
class MilvusDISKANNTypedDict(CommonTypedDict, MilvusTypedDict):
|
129
|
-
search_list: Annotated[
|
130
|
-
str, click.option("--search-list",
|
131
|
-
type=int,
|
132
|
-
required=True)
|
133
|
-
]
|
144
|
+
search_list: Annotated[str, click.option("--search-list", type=int, required=True)]
|
134
145
|
|
135
146
|
|
136
147
|
@cli.command()
|
137
148
|
@click_parameter_decorators_from_typed_dict(MilvusDISKANNTypedDict)
|
138
149
|
def MilvusDISKANN(**parameters: Unpack[MilvusDISKANNTypedDict]):
|
139
|
-
from .config import
|
150
|
+
from .config import DISKANNConfig, MilvusConfig
|
140
151
|
|
141
152
|
run(
|
142
153
|
db=DBTYPE,
|
143
154
|
db_config=MilvusConfig(
|
144
155
|
db_label=parameters["db_label"],
|
145
156
|
uri=SecretStr(parameters["uri"]),
|
157
|
+
user=parameters["user_name"],
|
158
|
+
password=SecretStr(parameters["password"]),
|
146
159
|
),
|
147
160
|
db_case_config=DISKANNConfig(
|
148
161
|
search_list=parameters["search_list"],
|
@@ -153,27 +166,24 @@ def MilvusDISKANN(**parameters: Unpack[MilvusDISKANNTypedDict]):
|
|
153
166
|
|
154
167
|
class MilvusGPUIVFTypedDict(CommonTypedDict, MilvusTypedDict, MilvusIVFFlatTypedDict):
|
155
168
|
cache_dataset_on_device: Annotated[
|
156
|
-
str,
|
157
|
-
|
158
|
-
required=True)
|
159
|
-
]
|
160
|
-
refine_ratio: Annotated[
|
161
|
-
str, click.option("--refine-ratio",
|
162
|
-
type=float,
|
163
|
-
required=True)
|
169
|
+
str,
|
170
|
+
click.option("--cache-dataset-on-device", type=str, required=True),
|
164
171
|
]
|
172
|
+
refine_ratio: Annotated[str, click.option("--refine-ratio", type=float, required=True)]
|
165
173
|
|
166
174
|
|
167
175
|
@cli.command()
|
168
176
|
@click_parameter_decorators_from_typed_dict(MilvusGPUIVFTypedDict)
|
169
177
|
def MilvusGPUIVFFlat(**parameters: Unpack[MilvusGPUIVFTypedDict]):
|
170
|
-
from .config import
|
178
|
+
from .config import GPUIVFFlatConfig, MilvusConfig
|
171
179
|
|
172
180
|
run(
|
173
181
|
db=DBTYPE,
|
174
182
|
db_config=MilvusConfig(
|
175
183
|
db_label=parameters["db_label"],
|
176
184
|
uri=SecretStr(parameters["uri"]),
|
185
|
+
user=parameters["user_name"],
|
186
|
+
password=SecretStr(parameters["password"]),
|
177
187
|
),
|
178
188
|
db_case_config=GPUIVFFlatConfig(
|
179
189
|
nlist=parameters["nlist"],
|
@@ -185,29 +195,28 @@ def MilvusGPUIVFFlat(**parameters: Unpack[MilvusGPUIVFTypedDict]):
|
|
185
195
|
)
|
186
196
|
|
187
197
|
|
188
|
-
class MilvusGPUIVFPQTypedDict(
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
type=int,
|
197
|
-
required=True)
|
198
|
-
]
|
198
|
+
class MilvusGPUIVFPQTypedDict(
|
199
|
+
CommonTypedDict,
|
200
|
+
MilvusTypedDict,
|
201
|
+
MilvusIVFFlatTypedDict,
|
202
|
+
MilvusGPUIVFTypedDict,
|
203
|
+
):
|
204
|
+
m: Annotated[str, click.option("--m", type=int, help="hnsw m", required=True)]
|
205
|
+
nbits: Annotated[str, click.option("--nbits", type=int, required=True)]
|
199
206
|
|
200
207
|
|
201
208
|
@cli.command()
|
202
209
|
@click_parameter_decorators_from_typed_dict(MilvusGPUIVFPQTypedDict)
|
203
210
|
def MilvusGPUIVFPQ(**parameters: Unpack[MilvusGPUIVFPQTypedDict]):
|
204
|
-
from .config import
|
211
|
+
from .config import GPUIVFPQConfig, MilvusConfig
|
205
212
|
|
206
213
|
run(
|
207
214
|
db=DBTYPE,
|
208
215
|
db_config=MilvusConfig(
|
209
216
|
db_label=parameters["db_label"],
|
210
217
|
uri=SecretStr(parameters["uri"]),
|
218
|
+
user=parameters["user_name"],
|
219
|
+
password=SecretStr(parameters["password"]),
|
211
220
|
),
|
212
221
|
db_case_config=GPUIVFPQConfig(
|
213
222
|
nlist=parameters["nlist"],
|
@@ -223,57 +232,30 @@ def MilvusGPUIVFPQ(**parameters: Unpack[MilvusGPUIVFPQTypedDict]):
|
|
223
232
|
|
224
233
|
class MilvusGPUCAGRATypedDict(CommonTypedDict, MilvusTypedDict, MilvusGPUIVFTypedDict):
|
225
234
|
intermediate_graph_degree: Annotated[
|
226
|
-
str,
|
227
|
-
|
228
|
-
required=True)
|
229
|
-
]
|
230
|
-
graph_degree: Annotated[
|
231
|
-
str, click.option("--graph-degree",
|
232
|
-
type=int,
|
233
|
-
required=True)
|
234
|
-
]
|
235
|
-
build_algo: Annotated[
|
236
|
-
str, click.option("--build_algo",
|
237
|
-
type=str,
|
238
|
-
required=True)
|
239
|
-
]
|
240
|
-
team_size: Annotated[
|
241
|
-
str, click.option("--team-size",
|
242
|
-
type=int,
|
243
|
-
required=True)
|
244
|
-
]
|
245
|
-
search_width: Annotated[
|
246
|
-
str, click.option("--search-width",
|
247
|
-
type=int,
|
248
|
-
required=True)
|
249
|
-
]
|
250
|
-
itopk_size: Annotated[
|
251
|
-
str, click.option("--itopk-size",
|
252
|
-
type=int,
|
253
|
-
required=True)
|
254
|
-
]
|
255
|
-
min_iterations: Annotated[
|
256
|
-
str, click.option("--min-iterations",
|
257
|
-
type=int,
|
258
|
-
required=True)
|
259
|
-
]
|
260
|
-
max_iterations: Annotated[
|
261
|
-
str, click.option("--max-iterations",
|
262
|
-
type=int,
|
263
|
-
required=True)
|
235
|
+
str,
|
236
|
+
click.option("--intermediate-graph-degree", type=int, required=True),
|
264
237
|
]
|
238
|
+
graph_degree: Annotated[str, click.option("--graph-degree", type=int, required=True)]
|
239
|
+
build_algo: Annotated[str, click.option("--build_algo", type=str, required=True)]
|
240
|
+
team_size: Annotated[str, click.option("--team-size", type=int, required=True)]
|
241
|
+
search_width: Annotated[str, click.option("--search-width", type=int, required=True)]
|
242
|
+
itopk_size: Annotated[str, click.option("--itopk-size", type=int, required=True)]
|
243
|
+
min_iterations: Annotated[str, click.option("--min-iterations", type=int, required=True)]
|
244
|
+
max_iterations: Annotated[str, click.option("--max-iterations", type=int, required=True)]
|
265
245
|
|
266
246
|
|
267
247
|
@cli.command()
|
268
248
|
@click_parameter_decorators_from_typed_dict(MilvusGPUCAGRATypedDict)
|
269
249
|
def MilvusGPUCAGRA(**parameters: Unpack[MilvusGPUCAGRATypedDict]):
|
270
|
-
from .config import
|
250
|
+
from .config import GPUCAGRAConfig, MilvusConfig
|
271
251
|
|
272
252
|
run(
|
273
253
|
db=DBTYPE,
|
274
254
|
db_config=MilvusConfig(
|
275
255
|
db_label=parameters["db_label"],
|
276
256
|
uri=SecretStr(parameters["uri"]),
|
257
|
+
user=parameters["user_name"],
|
258
|
+
password=SecretStr(parameters["password"]),
|
277
259
|
),
|
278
260
|
db_case_config=GPUCAGRAConfig(
|
279
261
|
intermediate_graph_degree=parameters["intermediate_graph_degree"],
|
@@ -1,12 +1,31 @@
|
|
1
|
-
from pydantic import BaseModel, SecretStr
|
2
|
-
|
1
|
+
from pydantic import BaseModel, SecretStr, validator
|
2
|
+
|
3
|
+
from ..api import DBCaseConfig, DBConfig, IndexType, MetricType
|
3
4
|
|
4
5
|
|
5
6
|
class MilvusConfig(DBConfig):
|
6
7
|
uri: SecretStr = "http://localhost:19530"
|
8
|
+
user: str | None = None
|
9
|
+
password: SecretStr | None = None
|
7
10
|
|
8
11
|
def to_dict(self) -> dict:
|
9
|
-
return {
|
12
|
+
return {
|
13
|
+
"uri": self.uri.get_secret_value(),
|
14
|
+
"user": self.user if self.user else None,
|
15
|
+
"password": self.password.get_secret_value() if self.password else None,
|
16
|
+
}
|
17
|
+
|
18
|
+
@validator("*")
|
19
|
+
def not_empty_field(cls, v: any, field: any):
|
20
|
+
if (
|
21
|
+
field.name in cls.common_short_configs()
|
22
|
+
or field.name in cls.common_long_configs()
|
23
|
+
or field.name in ["user", "password"]
|
24
|
+
):
|
25
|
+
return v
|
26
|
+
if isinstance(v, str | SecretStr) and len(v) == 0:
|
27
|
+
raise ValueError("Empty string!")
|
28
|
+
return v
|
10
29
|
|
11
30
|
|
12
31
|
class MilvusIndexConfig(BaseModel):
|
@@ -14,10 +33,14 @@ class MilvusIndexConfig(BaseModel):
|
|
14
33
|
|
15
34
|
index: IndexType
|
16
35
|
metric_type: MetricType | None = None
|
17
|
-
|
36
|
+
|
18
37
|
@property
|
19
38
|
def is_gpu_index(self) -> bool:
|
20
|
-
return self.index in [
|
39
|
+
return self.index in [
|
40
|
+
IndexType.GPU_CAGRA,
|
41
|
+
IndexType.GPU_IVF_FLAT,
|
42
|
+
IndexType.GPU_IVF_PQ,
|
43
|
+
]
|
21
44
|
|
22
45
|
def parse_metric(self) -> str:
|
23
46
|
if not self.metric_type:
|
@@ -99,7 +122,8 @@ class IVFFlatConfig(MilvusIndexConfig, DBCaseConfig):
|
|
99
122
|
"metric_type": self.parse_metric(),
|
100
123
|
"params": {"nprobe": self.nprobe},
|
101
124
|
}
|
102
|
-
|
125
|
+
|
126
|
+
|
103
127
|
class IVFSQ8Config(MilvusIndexConfig, DBCaseConfig):
|
104
128
|
nlist: int
|
105
129
|
nprobe: int | None = None
|
@@ -196,7 +220,7 @@ class GPUCAGRAConfig(MilvusIndexConfig, DBCaseConfig):
|
|
196
220
|
search_width: int = 4
|
197
221
|
min_iterations: int = 0
|
198
222
|
max_iterations: int = 0
|
199
|
-
build_algo: str = "IVF_PQ"
|
223
|
+
build_algo: str = "IVF_PQ" # IVF_PQ; NN_DESCENT;
|
200
224
|
cache_dataset_on_device: str
|
201
225
|
refine_ratio: float | None = None
|
202
226
|
index: IndexType = IndexType.GPU_CAGRA
|
@@ -2,19 +2,18 @@
|
|
2
2
|
|
3
3
|
import logging
|
4
4
|
import time
|
5
|
+
from collections.abc import Iterable
|
5
6
|
from contextlib import contextmanager
|
6
|
-
from typing import Iterable
|
7
7
|
|
8
|
-
from pymilvus import Collection, utility
|
9
|
-
from pymilvus import CollectionSchema, DataType, FieldSchema, MilvusException
|
8
|
+
from pymilvus import Collection, CollectionSchema, DataType, FieldSchema, MilvusException, utility
|
10
9
|
|
11
|
-
from ..api import VectorDB
|
10
|
+
from ..api import VectorDB
|
12
11
|
from .config import MilvusIndexConfig
|
13
12
|
|
14
|
-
|
15
13
|
log = logging.getLogger(__name__)
|
16
14
|
|
17
|
-
MILVUS_LOAD_REQS_SIZE = 1.5 * 1024 *1024
|
15
|
+
MILVUS_LOAD_REQS_SIZE = 1.5 * 1024 * 1024
|
16
|
+
|
18
17
|
|
19
18
|
class Milvus(VectorDB):
|
20
19
|
def __init__(
|
@@ -32,7 +31,7 @@ class Milvus(VectorDB):
|
|
32
31
|
self.db_config = db_config
|
33
32
|
self.case_config = db_case_config
|
34
33
|
self.collection_name = collection_name
|
35
|
-
self.batch_size = int(MILVUS_LOAD_REQS_SIZE / (dim *4))
|
34
|
+
self.batch_size = int(MILVUS_LOAD_REQS_SIZE / (dim * 4))
|
36
35
|
|
37
36
|
self._primary_field = "pk"
|
38
37
|
self._scalar_field = "id"
|
@@ -40,6 +39,7 @@ class Milvus(VectorDB):
|
|
40
39
|
self._index_name = "vector_idx"
|
41
40
|
|
42
41
|
from pymilvus import connections
|
42
|
+
|
43
43
|
connections.connect(**self.db_config, timeout=30)
|
44
44
|
if drop_old and utility.has_collection(self.collection_name):
|
45
45
|
log.info(f"{self.name} client drop_old collection: {self.collection_name}")
|
@@ -49,7 +49,7 @@ class Milvus(VectorDB):
|
|
49
49
|
fields = [
|
50
50
|
FieldSchema(self._primary_field, DataType.INT64, is_primary=True),
|
51
51
|
FieldSchema(self._scalar_field, DataType.INT64),
|
52
|
-
FieldSchema(self._vector_field, DataType.FLOAT_VECTOR, dim=dim)
|
52
|
+
FieldSchema(self._vector_field, DataType.FLOAT_VECTOR, dim=dim),
|
53
53
|
]
|
54
54
|
|
55
55
|
log.info(f"{self.name} create collection: {self.collection_name}")
|
@@ -66,8 +66,7 @@ class Milvus(VectorDB):
|
|
66
66
|
self.case_config.index_param(),
|
67
67
|
index_name=self._index_name,
|
68
68
|
)
|
69
|
-
|
70
|
-
self._pre_load(col)
|
69
|
+
col.load()
|
71
70
|
|
72
71
|
connections.disconnect("default")
|
73
72
|
|
@@ -80,6 +79,7 @@ class Milvus(VectorDB):
|
|
80
79
|
>>> self.search_embedding()
|
81
80
|
"""
|
82
81
|
from pymilvus import connections
|
82
|
+
|
83
83
|
self.col: Collection | None = None
|
84
84
|
|
85
85
|
connections.connect(**self.db_config, timeout=60)
|
@@ -90,16 +90,15 @@ class Milvus(VectorDB):
|
|
90
90
|
connections.disconnect("default")
|
91
91
|
|
92
92
|
def _optimize(self):
|
93
|
-
self._post_insert()
|
94
93
|
log.info(f"{self.name} optimizing before search")
|
94
|
+
self._post_insert()
|
95
95
|
try:
|
96
|
-
self.col.load()
|
96
|
+
self.col.load(refresh=True)
|
97
97
|
except Exception as e:
|
98
98
|
log.warning(f"{self.name} optimize error: {e}")
|
99
99
|
raise e from None
|
100
100
|
|
101
101
|
def _post_insert(self):
|
102
|
-
log.info(f"{self.name} post insert before optimize")
|
103
102
|
try:
|
104
103
|
self.col.flush()
|
105
104
|
# wait for index done and load refresh
|
@@ -110,6 +109,7 @@ class Milvus(VectorDB):
|
|
110
109
|
)
|
111
110
|
|
112
111
|
utility.wait_for_index_building_complete(self.collection_name)
|
112
|
+
|
113
113
|
def wait_index():
|
114
114
|
while True:
|
115
115
|
progress = utility.index_building_progress(self.collection_name)
|
@@ -122,18 +122,17 @@ class Milvus(VectorDB):
|
|
122
122
|
# Skip compaction if use GPU indexType
|
123
123
|
if self.case_config.is_gpu_index:
|
124
124
|
log.debug("skip compaction for gpu index type.")
|
125
|
-
else
|
125
|
+
else:
|
126
126
|
try:
|
127
127
|
self.col.compact()
|
128
128
|
self.col.wait_for_compaction_completed()
|
129
129
|
except Exception as e:
|
130
130
|
log.warning(f"{self.name} compact error: {e}")
|
131
|
-
if hasattr(e,
|
132
|
-
if e.code().name ==
|
133
|
-
log.warning(
|
134
|
-
pass
|
131
|
+
if hasattr(e, "code"):
|
132
|
+
if e.code().name == "PERMISSION_DENIED":
|
133
|
+
log.warning("Skip compact due to permission denied.")
|
135
134
|
else:
|
136
|
-
raise e
|
135
|
+
raise e from e
|
137
136
|
wait_index()
|
138
137
|
except Exception as e:
|
139
138
|
log.warning(f"{self.name} optimize error: {e}")
|
@@ -158,7 +157,6 @@ class Milvus(VectorDB):
|
|
158
157
|
log.warning(f"{self.name} pre load error: {e}")
|
159
158
|
raise e from None
|
160
159
|
|
161
|
-
|
162
160
|
def optimize(self):
|
163
161
|
assert self.col, "Please call self.init() before"
|
164
162
|
self._optimize()
|
@@ -166,7 +164,7 @@ class Milvus(VectorDB):
|
|
166
164
|
def need_normalize_cosine(self) -> bool:
|
167
165
|
"""Wheather this database need to normalize dataset to support COSINE"""
|
168
166
|
if self.case_config.is_gpu_index:
|
169
|
-
log.info(
|
167
|
+
log.info("current gpu_index only supports IP / L2, cosine dataset need normalize.")
|
170
168
|
return True
|
171
169
|
|
172
170
|
return False
|
@@ -186,9 +184,9 @@ class Milvus(VectorDB):
|
|
186
184
|
for batch_start_offset in range(0, len(embeddings), self.batch_size):
|
187
185
|
batch_end_offset = min(batch_start_offset + self.batch_size, len(embeddings))
|
188
186
|
insert_data = [
|
189
|
-
|
190
|
-
|
191
|
-
|
187
|
+
metadata[batch_start_offset:batch_end_offset],
|
188
|
+
metadata[batch_start_offset:batch_end_offset],
|
189
|
+
embeddings[batch_start_offset:batch_end_offset],
|
192
190
|
]
|
193
191
|
res = self.col.insert(insert_data)
|
194
192
|
insert_count += len(res.primary_keys)
|
@@ -219,5 +217,4 @@ class Milvus(VectorDB):
|
|
219
217
|
)
|
220
218
|
|
221
219
|
# Organize results.
|
222
|
-
|
223
|
-
return ret
|
220
|
+
return [result.id for result in res[0]]
|
@@ -1,57 +1,63 @@
|
|
1
|
-
import click
|
2
1
|
import os
|
2
|
+
from typing import Annotated, Unpack
|
3
|
+
|
4
|
+
import click
|
3
5
|
from pydantic import SecretStr
|
4
6
|
|
7
|
+
from vectordb_bench.backend.clients import DB
|
8
|
+
|
5
9
|
from ....cli.cli import (
|
6
10
|
CommonTypedDict,
|
7
11
|
cli,
|
8
12
|
click_parameter_decorators_from_typed_dict,
|
9
13
|
run,
|
10
14
|
)
|
11
|
-
from typing import Annotated, Optional, Unpack
|
12
|
-
from vectordb_bench.backend.clients import DB
|
13
15
|
|
14
16
|
|
15
17
|
class PgDiskAnnTypedDict(CommonTypedDict):
|
16
18
|
user_name: Annotated[
|
17
|
-
str,
|
19
|
+
str,
|
20
|
+
click.option("--user-name", type=str, help="Db username", required=True),
|
18
21
|
]
|
19
22
|
password: Annotated[
|
20
23
|
str,
|
21
|
-
click.option(
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
24
|
+
click.option(
|
25
|
+
"--password",
|
26
|
+
type=str,
|
27
|
+
help="Postgres database password",
|
28
|
+
default=lambda: os.environ.get("POSTGRES_PASSWORD", ""),
|
29
|
+
show_default="$POSTGRES_PASSWORD",
|
30
|
+
),
|
27
31
|
]
|
28
32
|
|
29
|
-
host: Annotated[
|
30
|
-
|
31
|
-
]
|
32
|
-
db_name: Annotated[
|
33
|
-
str, click.option("--db-name", type=str, help="Db name", required=True)
|
34
|
-
]
|
33
|
+
host: Annotated[str, click.option("--host", type=str, help="Db host", required=True)]
|
34
|
+
db_name: Annotated[str, click.option("--db-name", type=str, help="Db name", required=True)]
|
35
35
|
max_neighbors: Annotated[
|
36
36
|
int,
|
37
37
|
click.option(
|
38
|
-
"--max-neighbors",
|
38
|
+
"--max-neighbors",
|
39
|
+
type=int,
|
40
|
+
help="PgDiskAnn max neighbors",
|
39
41
|
),
|
40
42
|
]
|
41
43
|
l_value_ib: Annotated[
|
42
44
|
int,
|
43
45
|
click.option(
|
44
|
-
"--l-value-ib",
|
46
|
+
"--l-value-ib",
|
47
|
+
type=int,
|
48
|
+
help="PgDiskAnn l_value_ib",
|
45
49
|
),
|
46
50
|
]
|
47
51
|
l_value_is: Annotated[
|
48
52
|
float,
|
49
53
|
click.option(
|
50
|
-
"--l-value-is",
|
54
|
+
"--l-value-is",
|
55
|
+
type=float,
|
56
|
+
help="PgDiskAnn l_value_is",
|
51
57
|
),
|
52
58
|
]
|
53
59
|
maintenance_work_mem: Annotated[
|
54
|
-
|
60
|
+
str | None,
|
55
61
|
click.option(
|
56
62
|
"--maintenance-work-mem",
|
57
63
|
type=str,
|
@@ -63,7 +69,7 @@ class PgDiskAnnTypedDict(CommonTypedDict):
|
|
63
69
|
),
|
64
70
|
]
|
65
71
|
max_parallel_workers: Annotated[
|
66
|
-
|
72
|
+
int | None,
|
67
73
|
click.option(
|
68
74
|
"--max-parallel-workers",
|
69
75
|
type=int,
|
@@ -72,6 +78,7 @@ class PgDiskAnnTypedDict(CommonTypedDict):
|
|
72
78
|
),
|
73
79
|
]
|
74
80
|
|
81
|
+
|
75
82
|
@cli.command()
|
76
83
|
@click_parameter_decorators_from_typed_dict(PgDiskAnnTypedDict)
|
77
84
|
def PgDiskAnn(
|
@@ -96,4 +103,4 @@ def PgDiskAnn(
|
|
96
103
|
maintenance_work_mem=parameters["maintenance_work_mem"],
|
97
104
|
),
|
98
105
|
**parameters,
|
99
|
-
)
|
106
|
+
)
|