vectordb-bench 0.0.19__py3-none-any.whl → 0.0.20__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vectordb_bench/__init__.py +49 -24
- vectordb_bench/__main__.py +4 -3
- vectordb_bench/backend/assembler.py +12 -13
- vectordb_bench/backend/cases.py +55 -45
- vectordb_bench/backend/clients/__init__.py +75 -14
- vectordb_bench/backend/clients/aliyun_elasticsearch/aliyun_elasticsearch.py +1 -2
- vectordb_bench/backend/clients/aliyun_elasticsearch/config.py +3 -4
- vectordb_bench/backend/clients/aliyun_opensearch/aliyun_opensearch.py +111 -70
- vectordb_bench/backend/clients/aliyun_opensearch/config.py +6 -7
- vectordb_bench/backend/clients/alloydb/alloydb.py +58 -80
- vectordb_bench/backend/clients/alloydb/cli.py +51 -34
- vectordb_bench/backend/clients/alloydb/config.py +30 -30
- vectordb_bench/backend/clients/api.py +5 -9
- vectordb_bench/backend/clients/aws_opensearch/aws_opensearch.py +46 -47
- vectordb_bench/backend/clients/aws_opensearch/cli.py +4 -7
- vectordb_bench/backend/clients/aws_opensearch/config.py +13 -9
- vectordb_bench/backend/clients/aws_opensearch/run.py +69 -59
- vectordb_bench/backend/clients/chroma/chroma.py +38 -36
- vectordb_bench/backend/clients/chroma/config.py +4 -2
- vectordb_bench/backend/clients/elastic_cloud/config.py +5 -5
- vectordb_bench/backend/clients/elastic_cloud/elastic_cloud.py +23 -22
- vectordb_bench/backend/clients/memorydb/cli.py +8 -8
- vectordb_bench/backend/clients/memorydb/config.py +2 -2
- vectordb_bench/backend/clients/memorydb/memorydb.py +65 -53
- vectordb_bench/backend/clients/milvus/cli.py +41 -83
- vectordb_bench/backend/clients/milvus/config.py +18 -8
- vectordb_bench/backend/clients/milvus/milvus.py +18 -19
- vectordb_bench/backend/clients/pgdiskann/cli.py +29 -22
- vectordb_bench/backend/clients/pgdiskann/config.py +29 -26
- vectordb_bench/backend/clients/pgdiskann/pgdiskann.py +55 -73
- vectordb_bench/backend/clients/pgvecto_rs/cli.py +9 -11
- vectordb_bench/backend/clients/pgvecto_rs/config.py +8 -14
- vectordb_bench/backend/clients/pgvecto_rs/pgvecto_rs.py +33 -34
- vectordb_bench/backend/clients/pgvector/cli.py +40 -31
- vectordb_bench/backend/clients/pgvector/config.py +63 -73
- vectordb_bench/backend/clients/pgvector/pgvector.py +97 -98
- vectordb_bench/backend/clients/pgvectorscale/cli.py +38 -24
- vectordb_bench/backend/clients/pgvectorscale/config.py +14 -15
- vectordb_bench/backend/clients/pgvectorscale/pgvectorscale.py +38 -43
- vectordb_bench/backend/clients/pinecone/config.py +1 -0
- vectordb_bench/backend/clients/pinecone/pinecone.py +14 -21
- vectordb_bench/backend/clients/qdrant_cloud/config.py +11 -10
- vectordb_bench/backend/clients/qdrant_cloud/qdrant_cloud.py +40 -31
- vectordb_bench/backend/clients/redis/cli.py +6 -12
- vectordb_bench/backend/clients/redis/config.py +7 -5
- vectordb_bench/backend/clients/redis/redis.py +94 -58
- vectordb_bench/backend/clients/test/cli.py +1 -2
- vectordb_bench/backend/clients/test/config.py +2 -2
- vectordb_bench/backend/clients/test/test.py +4 -5
- vectordb_bench/backend/clients/weaviate_cloud/cli.py +3 -4
- vectordb_bench/backend/clients/weaviate_cloud/config.py +2 -2
- vectordb_bench/backend/clients/weaviate_cloud/weaviate_cloud.py +36 -22
- vectordb_bench/backend/clients/zilliz_cloud/cli.py +14 -11
- vectordb_bench/backend/clients/zilliz_cloud/config.py +2 -4
- vectordb_bench/backend/clients/zilliz_cloud/zilliz_cloud.py +1 -1
- vectordb_bench/backend/data_source.py +30 -18
- vectordb_bench/backend/dataset.py +47 -27
- vectordb_bench/backend/result_collector.py +2 -3
- vectordb_bench/backend/runner/__init__.py +4 -6
- vectordb_bench/backend/runner/mp_runner.py +85 -34
- vectordb_bench/backend/runner/rate_runner.py +30 -19
- vectordb_bench/backend/runner/read_write_runner.py +51 -23
- vectordb_bench/backend/runner/serial_runner.py +91 -48
- vectordb_bench/backend/runner/util.py +4 -3
- vectordb_bench/backend/task_runner.py +92 -72
- vectordb_bench/backend/utils.py +17 -10
- vectordb_bench/base.py +0 -1
- vectordb_bench/cli/cli.py +65 -60
- vectordb_bench/cli/vectordbbench.py +6 -7
- vectordb_bench/frontend/components/check_results/charts.py +8 -19
- vectordb_bench/frontend/components/check_results/data.py +4 -16
- vectordb_bench/frontend/components/check_results/filters.py +8 -16
- vectordb_bench/frontend/components/check_results/nav.py +4 -4
- vectordb_bench/frontend/components/check_results/priceTable.py +1 -3
- vectordb_bench/frontend/components/check_results/stPageConfig.py +2 -1
- vectordb_bench/frontend/components/concurrent/charts.py +12 -12
- vectordb_bench/frontend/components/custom/displayCustomCase.py +17 -11
- vectordb_bench/frontend/components/custom/displaypPrams.py +4 -2
- vectordb_bench/frontend/components/custom/getCustomConfig.py +1 -2
- vectordb_bench/frontend/components/custom/initStyle.py +1 -1
- vectordb_bench/frontend/components/get_results/saveAsImage.py +2 -0
- vectordb_bench/frontend/components/run_test/caseSelector.py +3 -9
- vectordb_bench/frontend/components/run_test/dbConfigSetting.py +1 -4
- vectordb_bench/frontend/components/run_test/dbSelector.py +1 -1
- vectordb_bench/frontend/components/run_test/generateTasks.py +8 -8
- vectordb_bench/frontend/components/run_test/submitTask.py +14 -18
- vectordb_bench/frontend/components/tables/data.py +3 -6
- vectordb_bench/frontend/config/dbCaseConfigs.py +51 -84
- vectordb_bench/frontend/pages/concurrent.py +3 -5
- vectordb_bench/frontend/pages/custom.py +30 -9
- vectordb_bench/frontend/pages/quries_per_dollar.py +3 -3
- vectordb_bench/frontend/pages/run_test.py +3 -7
- vectordb_bench/frontend/utils.py +1 -1
- vectordb_bench/frontend/vdb_benchmark.py +4 -6
- vectordb_bench/interface.py +56 -26
- vectordb_bench/log_util.py +59 -64
- vectordb_bench/metric.py +10 -11
- vectordb_bench/models.py +26 -43
- {vectordb_bench-0.0.19.dist-info → vectordb_bench-0.0.20.dist-info}/METADATA +22 -15
- vectordb_bench-0.0.20.dist-info/RECORD +135 -0
- {vectordb_bench-0.0.19.dist-info → vectordb_bench-0.0.20.dist-info}/WHEEL +1 -1
- vectordb_bench-0.0.19.dist-info/RECORD +0 -135
- {vectordb_bench-0.0.19.dist-info → vectordb_bench-0.0.20.dist-info}/LICENSE +0 -0
- {vectordb_bench-0.0.19.dist-info → vectordb_bench-0.0.20.dist-info}/entry_points.txt +0 -0
- {vectordb_bench-0.0.19.dist-info → vectordb_bench-0.0.20.dist-info}/top_level.txt +0 -0
@@ -1,8 +1,9 @@
|
|
1
|
-
from typing import Annotated, TypedDict, Unpack
|
1
|
+
from typing import Annotated, TypedDict, Unpack
|
2
2
|
|
3
3
|
import click
|
4
4
|
from pydantic import SecretStr
|
5
5
|
|
6
|
+
from vectordb_bench.backend.clients import DB
|
6
7
|
from vectordb_bench.cli.cli import (
|
7
8
|
CommonTypedDict,
|
8
9
|
HNSWFlavor3,
|
@@ -10,33 +11,33 @@ from vectordb_bench.cli.cli import (
|
|
10
11
|
cli,
|
11
12
|
click_parameter_decorators_from_typed_dict,
|
12
13
|
run,
|
13
|
-
|
14
14
|
)
|
15
|
-
from vectordb_bench.backend.clients import DB
|
16
15
|
|
17
16
|
DBTYPE = DB.Milvus
|
18
17
|
|
19
18
|
|
20
19
|
class MilvusTypedDict(TypedDict):
|
21
20
|
uri: Annotated[
|
22
|
-
str,
|
21
|
+
str,
|
22
|
+
click.option("--uri", type=str, help="uri connection string", required=True),
|
23
23
|
]
|
24
24
|
user_name: Annotated[
|
25
|
-
|
25
|
+
str | None,
|
26
|
+
click.option("--user-name", type=str, help="Db username", required=False),
|
26
27
|
]
|
27
28
|
password: Annotated[
|
28
|
-
|
29
|
+
str | None,
|
30
|
+
click.option("--password", type=str, help="Db password", required=False),
|
29
31
|
]
|
30
32
|
|
31
33
|
|
32
|
-
class MilvusAutoIndexTypedDict(CommonTypedDict, MilvusTypedDict):
|
33
|
-
...
|
34
|
+
class MilvusAutoIndexTypedDict(CommonTypedDict, MilvusTypedDict): ...
|
34
35
|
|
35
36
|
|
36
37
|
@cli.command()
|
37
38
|
@click_parameter_decorators_from_typed_dict(MilvusAutoIndexTypedDict)
|
38
39
|
def MilvusAutoIndex(**parameters: Unpack[MilvusAutoIndexTypedDict]):
|
39
|
-
from .config import
|
40
|
+
from .config import AutoIndexConfig, MilvusConfig
|
40
41
|
|
41
42
|
run(
|
42
43
|
db=DBTYPE,
|
@@ -54,7 +55,7 @@ def MilvusAutoIndex(**parameters: Unpack[MilvusAutoIndexTypedDict]):
|
|
54
55
|
@cli.command()
|
55
56
|
@click_parameter_decorators_from_typed_dict(MilvusAutoIndexTypedDict)
|
56
57
|
def MilvusFlat(**parameters: Unpack[MilvusAutoIndexTypedDict]):
|
57
|
-
from .config import
|
58
|
+
from .config import FLATConfig, MilvusConfig
|
58
59
|
|
59
60
|
run(
|
60
61
|
db=DBTYPE,
|
@@ -69,14 +70,13 @@ def MilvusFlat(**parameters: Unpack[MilvusAutoIndexTypedDict]):
|
|
69
70
|
)
|
70
71
|
|
71
72
|
|
72
|
-
class MilvusHNSWTypedDict(CommonTypedDict, MilvusTypedDict, HNSWFlavor3):
|
73
|
-
...
|
73
|
+
class MilvusHNSWTypedDict(CommonTypedDict, MilvusTypedDict, HNSWFlavor3): ...
|
74
74
|
|
75
75
|
|
76
76
|
@cli.command()
|
77
77
|
@click_parameter_decorators_from_typed_dict(MilvusHNSWTypedDict)
|
78
78
|
def MilvusHNSW(**parameters: Unpack[MilvusHNSWTypedDict]):
|
79
|
-
from .config import
|
79
|
+
from .config import HNSWConfig, MilvusConfig
|
80
80
|
|
81
81
|
run(
|
82
82
|
db=DBTYPE,
|
@@ -95,14 +95,13 @@ def MilvusHNSW(**parameters: Unpack[MilvusHNSWTypedDict]):
|
|
95
95
|
)
|
96
96
|
|
97
97
|
|
98
|
-
class MilvusIVFFlatTypedDict(CommonTypedDict, MilvusTypedDict, IVFFlatTypedDictN):
|
99
|
-
...
|
98
|
+
class MilvusIVFFlatTypedDict(CommonTypedDict, MilvusTypedDict, IVFFlatTypedDictN): ...
|
100
99
|
|
101
100
|
|
102
101
|
@cli.command()
|
103
102
|
@click_parameter_decorators_from_typed_dict(MilvusIVFFlatTypedDict)
|
104
103
|
def MilvusIVFFlat(**parameters: Unpack[MilvusIVFFlatTypedDict]):
|
105
|
-
from .config import
|
104
|
+
from .config import IVFFlatConfig, MilvusConfig
|
106
105
|
|
107
106
|
run(
|
108
107
|
db=DBTYPE,
|
@@ -123,7 +122,7 @@ def MilvusIVFFlat(**parameters: Unpack[MilvusIVFFlatTypedDict]):
|
|
123
122
|
@cli.command()
|
124
123
|
@click_parameter_decorators_from_typed_dict(MilvusIVFFlatTypedDict)
|
125
124
|
def MilvusIVFSQ8(**parameters: Unpack[MilvusIVFFlatTypedDict]):
|
126
|
-
from .config import
|
125
|
+
from .config import IVFSQ8Config, MilvusConfig
|
127
126
|
|
128
127
|
run(
|
129
128
|
db=DBTYPE,
|
@@ -142,17 +141,13 @@ def MilvusIVFSQ8(**parameters: Unpack[MilvusIVFFlatTypedDict]):
|
|
142
141
|
|
143
142
|
|
144
143
|
class MilvusDISKANNTypedDict(CommonTypedDict, MilvusTypedDict):
|
145
|
-
search_list: Annotated[
|
146
|
-
str, click.option("--search-list",
|
147
|
-
type=int,
|
148
|
-
required=True)
|
149
|
-
]
|
144
|
+
search_list: Annotated[str, click.option("--search-list", type=int, required=True)]
|
150
145
|
|
151
146
|
|
152
147
|
@cli.command()
|
153
148
|
@click_parameter_decorators_from_typed_dict(MilvusDISKANNTypedDict)
|
154
149
|
def MilvusDISKANN(**parameters: Unpack[MilvusDISKANNTypedDict]):
|
155
|
-
from .config import
|
150
|
+
from .config import DISKANNConfig, MilvusConfig
|
156
151
|
|
157
152
|
run(
|
158
153
|
db=DBTYPE,
|
@@ -171,21 +166,16 @@ def MilvusDISKANN(**parameters: Unpack[MilvusDISKANNTypedDict]):
|
|
171
166
|
|
172
167
|
class MilvusGPUIVFTypedDict(CommonTypedDict, MilvusTypedDict, MilvusIVFFlatTypedDict):
|
173
168
|
cache_dataset_on_device: Annotated[
|
174
|
-
str,
|
175
|
-
|
176
|
-
required=True)
|
177
|
-
]
|
178
|
-
refine_ratio: Annotated[
|
179
|
-
str, click.option("--refine-ratio",
|
180
|
-
type=float,
|
181
|
-
required=True)
|
169
|
+
str,
|
170
|
+
click.option("--cache-dataset-on-device", type=str, required=True),
|
182
171
|
]
|
172
|
+
refine_ratio: Annotated[str, click.option("--refine-ratio", type=float, required=True)]
|
183
173
|
|
184
174
|
|
185
175
|
@cli.command()
|
186
176
|
@click_parameter_decorators_from_typed_dict(MilvusGPUIVFTypedDict)
|
187
177
|
def MilvusGPUIVFFlat(**parameters: Unpack[MilvusGPUIVFTypedDict]):
|
188
|
-
from .config import
|
178
|
+
from .config import GPUIVFFlatConfig, MilvusConfig
|
189
179
|
|
190
180
|
run(
|
191
181
|
db=DBTYPE,
|
@@ -205,23 +195,20 @@ def MilvusGPUIVFFlat(**parameters: Unpack[MilvusGPUIVFTypedDict]):
|
|
205
195
|
)
|
206
196
|
|
207
197
|
|
208
|
-
class MilvusGPUIVFPQTypedDict(
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
type=int,
|
217
|
-
required=True)
|
218
|
-
]
|
198
|
+
class MilvusGPUIVFPQTypedDict(
|
199
|
+
CommonTypedDict,
|
200
|
+
MilvusTypedDict,
|
201
|
+
MilvusIVFFlatTypedDict,
|
202
|
+
MilvusGPUIVFTypedDict,
|
203
|
+
):
|
204
|
+
m: Annotated[str, click.option("--m", type=int, help="hnsw m", required=True)]
|
205
|
+
nbits: Annotated[str, click.option("--nbits", type=int, required=True)]
|
219
206
|
|
220
207
|
|
221
208
|
@cli.command()
|
222
209
|
@click_parameter_decorators_from_typed_dict(MilvusGPUIVFPQTypedDict)
|
223
210
|
def MilvusGPUIVFPQ(**parameters: Unpack[MilvusGPUIVFPQTypedDict]):
|
224
|
-
from .config import
|
211
|
+
from .config import GPUIVFPQConfig, MilvusConfig
|
225
212
|
|
226
213
|
run(
|
227
214
|
db=DBTYPE,
|
@@ -245,51 +232,22 @@ def MilvusGPUIVFPQ(**parameters: Unpack[MilvusGPUIVFPQTypedDict]):
|
|
245
232
|
|
246
233
|
class MilvusGPUCAGRATypedDict(CommonTypedDict, MilvusTypedDict, MilvusGPUIVFTypedDict):
|
247
234
|
intermediate_graph_degree: Annotated[
|
248
|
-
str,
|
249
|
-
|
250
|
-
required=True)
|
251
|
-
]
|
252
|
-
graph_degree: Annotated[
|
253
|
-
str, click.option("--graph-degree",
|
254
|
-
type=int,
|
255
|
-
required=True)
|
256
|
-
]
|
257
|
-
build_algo: Annotated[
|
258
|
-
str, click.option("--build_algo",
|
259
|
-
type=str,
|
260
|
-
required=True)
|
261
|
-
]
|
262
|
-
team_size: Annotated[
|
263
|
-
str, click.option("--team-size",
|
264
|
-
type=int,
|
265
|
-
required=True)
|
266
|
-
]
|
267
|
-
search_width: Annotated[
|
268
|
-
str, click.option("--search-width",
|
269
|
-
type=int,
|
270
|
-
required=True)
|
271
|
-
]
|
272
|
-
itopk_size: Annotated[
|
273
|
-
str, click.option("--itopk-size",
|
274
|
-
type=int,
|
275
|
-
required=True)
|
276
|
-
]
|
277
|
-
min_iterations: Annotated[
|
278
|
-
str, click.option("--min-iterations",
|
279
|
-
type=int,
|
280
|
-
required=True)
|
281
|
-
]
|
282
|
-
max_iterations: Annotated[
|
283
|
-
str, click.option("--max-iterations",
|
284
|
-
type=int,
|
285
|
-
required=True)
|
235
|
+
str,
|
236
|
+
click.option("--intermediate-graph-degree", type=int, required=True),
|
286
237
|
]
|
238
|
+
graph_degree: Annotated[str, click.option("--graph-degree", type=int, required=True)]
|
239
|
+
build_algo: Annotated[str, click.option("--build_algo", type=str, required=True)]
|
240
|
+
team_size: Annotated[str, click.option("--team-size", type=int, required=True)]
|
241
|
+
search_width: Annotated[str, click.option("--search-width", type=int, required=True)]
|
242
|
+
itopk_size: Annotated[str, click.option("--itopk-size", type=int, required=True)]
|
243
|
+
min_iterations: Annotated[str, click.option("--min-iterations", type=int, required=True)]
|
244
|
+
max_iterations: Annotated[str, click.option("--max-iterations", type=int, required=True)]
|
287
245
|
|
288
246
|
|
289
247
|
@cli.command()
|
290
248
|
@click_parameter_decorators_from_typed_dict(MilvusGPUCAGRATypedDict)
|
291
249
|
def MilvusGPUCAGRA(**parameters: Unpack[MilvusGPUCAGRATypedDict]):
|
292
|
-
from .config import
|
250
|
+
from .config import GPUCAGRAConfig, MilvusConfig
|
293
251
|
|
294
252
|
run(
|
295
253
|
db=DBTYPE,
|
@@ -1,5 +1,6 @@
|
|
1
1
|
from pydantic import BaseModel, SecretStr, validator
|
2
|
-
|
2
|
+
|
3
|
+
from ..api import DBCaseConfig, DBConfig, IndexType, MetricType
|
3
4
|
|
4
5
|
|
5
6
|
class MilvusConfig(DBConfig):
|
@@ -15,10 +16,14 @@ class MilvusConfig(DBConfig):
|
|
15
16
|
}
|
16
17
|
|
17
18
|
@validator("*")
|
18
|
-
def not_empty_field(cls, v, field):
|
19
|
-
if
|
19
|
+
def not_empty_field(cls, v: any, field: any):
|
20
|
+
if (
|
21
|
+
field.name in cls.common_short_configs()
|
22
|
+
or field.name in cls.common_long_configs()
|
23
|
+
or field.name in ["user", "password"]
|
24
|
+
):
|
20
25
|
return v
|
21
|
-
if isinstance(v,
|
26
|
+
if isinstance(v, str | SecretStr) and len(v) == 0:
|
22
27
|
raise ValueError("Empty string!")
|
23
28
|
return v
|
24
29
|
|
@@ -28,10 +33,14 @@ class MilvusIndexConfig(BaseModel):
|
|
28
33
|
|
29
34
|
index: IndexType
|
30
35
|
metric_type: MetricType | None = None
|
31
|
-
|
36
|
+
|
32
37
|
@property
|
33
38
|
def is_gpu_index(self) -> bool:
|
34
|
-
return self.index in [
|
39
|
+
return self.index in [
|
40
|
+
IndexType.GPU_CAGRA,
|
41
|
+
IndexType.GPU_IVF_FLAT,
|
42
|
+
IndexType.GPU_IVF_PQ,
|
43
|
+
]
|
35
44
|
|
36
45
|
def parse_metric(self) -> str:
|
37
46
|
if not self.metric_type:
|
@@ -113,7 +122,8 @@ class IVFFlatConfig(MilvusIndexConfig, DBCaseConfig):
|
|
113
122
|
"metric_type": self.parse_metric(),
|
114
123
|
"params": {"nprobe": self.nprobe},
|
115
124
|
}
|
116
|
-
|
125
|
+
|
126
|
+
|
117
127
|
class IVFSQ8Config(MilvusIndexConfig, DBCaseConfig):
|
118
128
|
nlist: int
|
119
129
|
nprobe: int | None = None
|
@@ -210,7 +220,7 @@ class GPUCAGRAConfig(MilvusIndexConfig, DBCaseConfig):
|
|
210
220
|
search_width: int = 4
|
211
221
|
min_iterations: int = 0
|
212
222
|
max_iterations: int = 0
|
213
|
-
build_algo: str = "IVF_PQ"
|
223
|
+
build_algo: str = "IVF_PQ" # IVF_PQ; NN_DESCENT;
|
214
224
|
cache_dataset_on_device: str
|
215
225
|
refine_ratio: float | None = None
|
216
226
|
index: IndexType = IndexType.GPU_CAGRA
|
@@ -2,19 +2,18 @@
|
|
2
2
|
|
3
3
|
import logging
|
4
4
|
import time
|
5
|
+
from collections.abc import Iterable
|
5
6
|
from contextlib import contextmanager
|
6
|
-
from typing import Iterable
|
7
7
|
|
8
|
-
from pymilvus import Collection, utility
|
9
|
-
from pymilvus import CollectionSchema, DataType, FieldSchema, MilvusException
|
8
|
+
from pymilvus import Collection, CollectionSchema, DataType, FieldSchema, MilvusException, utility
|
10
9
|
|
11
10
|
from ..api import VectorDB
|
12
11
|
from .config import MilvusIndexConfig
|
13
12
|
|
14
|
-
|
15
13
|
log = logging.getLogger(__name__)
|
16
14
|
|
17
|
-
MILVUS_LOAD_REQS_SIZE = 1.5 * 1024 *1024
|
15
|
+
MILVUS_LOAD_REQS_SIZE = 1.5 * 1024 * 1024
|
16
|
+
|
18
17
|
|
19
18
|
class Milvus(VectorDB):
|
20
19
|
def __init__(
|
@@ -32,7 +31,7 @@ class Milvus(VectorDB):
|
|
32
31
|
self.db_config = db_config
|
33
32
|
self.case_config = db_case_config
|
34
33
|
self.collection_name = collection_name
|
35
|
-
self.batch_size = int(MILVUS_LOAD_REQS_SIZE / (dim *4))
|
34
|
+
self.batch_size = int(MILVUS_LOAD_REQS_SIZE / (dim * 4))
|
36
35
|
|
37
36
|
self._primary_field = "pk"
|
38
37
|
self._scalar_field = "id"
|
@@ -40,6 +39,7 @@ class Milvus(VectorDB):
|
|
40
39
|
self._index_name = "vector_idx"
|
41
40
|
|
42
41
|
from pymilvus import connections
|
42
|
+
|
43
43
|
connections.connect(**self.db_config, timeout=30)
|
44
44
|
if drop_old and utility.has_collection(self.collection_name):
|
45
45
|
log.info(f"{self.name} client drop_old collection: {self.collection_name}")
|
@@ -49,7 +49,7 @@ class Milvus(VectorDB):
|
|
49
49
|
fields = [
|
50
50
|
FieldSchema(self._primary_field, DataType.INT64, is_primary=True),
|
51
51
|
FieldSchema(self._scalar_field, DataType.INT64),
|
52
|
-
FieldSchema(self._vector_field, DataType.FLOAT_VECTOR, dim=dim)
|
52
|
+
FieldSchema(self._vector_field, DataType.FLOAT_VECTOR, dim=dim),
|
53
53
|
]
|
54
54
|
|
55
55
|
log.info(f"{self.name} create collection: {self.collection_name}")
|
@@ -79,6 +79,7 @@ class Milvus(VectorDB):
|
|
79
79
|
>>> self.search_embedding()
|
80
80
|
"""
|
81
81
|
from pymilvus import connections
|
82
|
+
|
82
83
|
self.col: Collection | None = None
|
83
84
|
|
84
85
|
connections.connect(**self.db_config, timeout=60)
|
@@ -108,6 +109,7 @@ class Milvus(VectorDB):
|
|
108
109
|
)
|
109
110
|
|
110
111
|
utility.wait_for_index_building_complete(self.collection_name)
|
112
|
+
|
111
113
|
def wait_index():
|
112
114
|
while True:
|
113
115
|
progress = utility.index_building_progress(self.collection_name)
|
@@ -120,18 +122,17 @@ class Milvus(VectorDB):
|
|
120
122
|
# Skip compaction if use GPU indexType
|
121
123
|
if self.case_config.is_gpu_index:
|
122
124
|
log.debug("skip compaction for gpu index type.")
|
123
|
-
else
|
125
|
+
else:
|
124
126
|
try:
|
125
127
|
self.col.compact()
|
126
128
|
self.col.wait_for_compaction_completed()
|
127
129
|
except Exception as e:
|
128
130
|
log.warning(f"{self.name} compact error: {e}")
|
129
|
-
if hasattr(e,
|
130
|
-
if e.code().name ==
|
131
|
+
if hasattr(e, "code"):
|
132
|
+
if e.code().name == "PERMISSION_DENIED":
|
131
133
|
log.warning("Skip compact due to permission denied.")
|
132
|
-
pass
|
133
134
|
else:
|
134
|
-
raise e
|
135
|
+
raise e from e
|
135
136
|
wait_index()
|
136
137
|
except Exception as e:
|
137
138
|
log.warning(f"{self.name} optimize error: {e}")
|
@@ -156,7 +157,6 @@ class Milvus(VectorDB):
|
|
156
157
|
log.warning(f"{self.name} pre load error: {e}")
|
157
158
|
raise e from None
|
158
159
|
|
159
|
-
|
160
160
|
def optimize(self):
|
161
161
|
assert self.col, "Please call self.init() before"
|
162
162
|
self._optimize()
|
@@ -164,7 +164,7 @@ class Milvus(VectorDB):
|
|
164
164
|
def need_normalize_cosine(self) -> bool:
|
165
165
|
"""Wheather this database need to normalize dataset to support COSINE"""
|
166
166
|
if self.case_config.is_gpu_index:
|
167
|
-
log.info(
|
167
|
+
log.info("current gpu_index only supports IP / L2, cosine dataset need normalize.")
|
168
168
|
return True
|
169
169
|
|
170
170
|
return False
|
@@ -184,9 +184,9 @@ class Milvus(VectorDB):
|
|
184
184
|
for batch_start_offset in range(0, len(embeddings), self.batch_size):
|
185
185
|
batch_end_offset = min(batch_start_offset + self.batch_size, len(embeddings))
|
186
186
|
insert_data = [
|
187
|
-
|
188
|
-
|
189
|
-
|
187
|
+
metadata[batch_start_offset:batch_end_offset],
|
188
|
+
metadata[batch_start_offset:batch_end_offset],
|
189
|
+
embeddings[batch_start_offset:batch_end_offset],
|
190
190
|
]
|
191
191
|
res = self.col.insert(insert_data)
|
192
192
|
insert_count += len(res.primary_keys)
|
@@ -217,5 +217,4 @@ class Milvus(VectorDB):
|
|
217
217
|
)
|
218
218
|
|
219
219
|
# Organize results.
|
220
|
-
|
221
|
-
return ret
|
220
|
+
return [result.id for result in res[0]]
|
@@ -1,57 +1,63 @@
|
|
1
|
-
import click
|
2
1
|
import os
|
2
|
+
from typing import Annotated, Unpack
|
3
|
+
|
4
|
+
import click
|
3
5
|
from pydantic import SecretStr
|
4
6
|
|
7
|
+
from vectordb_bench.backend.clients import DB
|
8
|
+
|
5
9
|
from ....cli.cli import (
|
6
10
|
CommonTypedDict,
|
7
11
|
cli,
|
8
12
|
click_parameter_decorators_from_typed_dict,
|
9
13
|
run,
|
10
14
|
)
|
11
|
-
from typing import Annotated, Optional, Unpack
|
12
|
-
from vectordb_bench.backend.clients import DB
|
13
15
|
|
14
16
|
|
15
17
|
class PgDiskAnnTypedDict(CommonTypedDict):
|
16
18
|
user_name: Annotated[
|
17
|
-
str,
|
19
|
+
str,
|
20
|
+
click.option("--user-name", type=str, help="Db username", required=True),
|
18
21
|
]
|
19
22
|
password: Annotated[
|
20
23
|
str,
|
21
|
-
click.option(
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
24
|
+
click.option(
|
25
|
+
"--password",
|
26
|
+
type=str,
|
27
|
+
help="Postgres database password",
|
28
|
+
default=lambda: os.environ.get("POSTGRES_PASSWORD", ""),
|
29
|
+
show_default="$POSTGRES_PASSWORD",
|
30
|
+
),
|
27
31
|
]
|
28
32
|
|
29
|
-
host: Annotated[
|
30
|
-
|
31
|
-
]
|
32
|
-
db_name: Annotated[
|
33
|
-
str, click.option("--db-name", type=str, help="Db name", required=True)
|
34
|
-
]
|
33
|
+
host: Annotated[str, click.option("--host", type=str, help="Db host", required=True)]
|
34
|
+
db_name: Annotated[str, click.option("--db-name", type=str, help="Db name", required=True)]
|
35
35
|
max_neighbors: Annotated[
|
36
36
|
int,
|
37
37
|
click.option(
|
38
|
-
"--max-neighbors",
|
38
|
+
"--max-neighbors",
|
39
|
+
type=int,
|
40
|
+
help="PgDiskAnn max neighbors",
|
39
41
|
),
|
40
42
|
]
|
41
43
|
l_value_ib: Annotated[
|
42
44
|
int,
|
43
45
|
click.option(
|
44
|
-
"--l-value-ib",
|
46
|
+
"--l-value-ib",
|
47
|
+
type=int,
|
48
|
+
help="PgDiskAnn l_value_ib",
|
45
49
|
),
|
46
50
|
]
|
47
51
|
l_value_is: Annotated[
|
48
52
|
float,
|
49
53
|
click.option(
|
50
|
-
"--l-value-is",
|
54
|
+
"--l-value-is",
|
55
|
+
type=float,
|
56
|
+
help="PgDiskAnn l_value_is",
|
51
57
|
),
|
52
58
|
]
|
53
59
|
maintenance_work_mem: Annotated[
|
54
|
-
|
60
|
+
str | None,
|
55
61
|
click.option(
|
56
62
|
"--maintenance-work-mem",
|
57
63
|
type=str,
|
@@ -63,7 +69,7 @@ class PgDiskAnnTypedDict(CommonTypedDict):
|
|
63
69
|
),
|
64
70
|
]
|
65
71
|
max_parallel_workers: Annotated[
|
66
|
-
|
72
|
+
int | None,
|
67
73
|
click.option(
|
68
74
|
"--max-parallel-workers",
|
69
75
|
type=int,
|
@@ -72,6 +78,7 @@ class PgDiskAnnTypedDict(CommonTypedDict):
|
|
72
78
|
),
|
73
79
|
]
|
74
80
|
|
81
|
+
|
75
82
|
@cli.command()
|
76
83
|
@click_parameter_decorators_from_typed_dict(PgDiskAnnTypedDict)
|
77
84
|
def PgDiskAnn(
|
@@ -96,4 +103,4 @@ def PgDiskAnn(
|
|
96
103
|
maintenance_work_mem=parameters["maintenance_work_mem"],
|
97
104
|
),
|
98
105
|
**parameters,
|
99
|
-
)
|
106
|
+
)
|
@@ -1,7 +1,9 @@
|
|
1
1
|
from abc import abstractmethod
|
2
|
-
from
|
2
|
+
from collections.abc import Mapping, Sequence
|
3
|
+
from typing import Any, LiteralString, TypedDict
|
4
|
+
|
3
5
|
from pydantic import BaseModel, SecretStr
|
4
|
-
|
6
|
+
|
5
7
|
from ..api import DBCaseConfig, DBConfig, IndexType, MetricType
|
6
8
|
|
7
9
|
POSTGRE_URL_PLACEHOLDER = "postgresql://%s:%s@%s/%s"
|
@@ -9,7 +11,7 @@ POSTGRE_URL_PLACEHOLDER = "postgresql://%s:%s@%s/%s"
|
|
9
11
|
|
10
12
|
class PgDiskANNConfigDict(TypedDict):
|
11
13
|
"""These keys will be directly used as kwargs in psycopg connection string,
|
12
|
-
|
14
|
+
so the names must match exactly psycopg API"""
|
13
15
|
|
14
16
|
user: str
|
15
17
|
password: str
|
@@ -41,44 +43,43 @@ class PgDiskANNIndexConfig(BaseModel, DBCaseConfig):
|
|
41
43
|
metric_type: MetricType | None = None
|
42
44
|
create_index_before_load: bool = False
|
43
45
|
create_index_after_load: bool = True
|
44
|
-
maintenance_work_mem:
|
45
|
-
max_parallel_workers:
|
46
|
+
maintenance_work_mem: str | None
|
47
|
+
max_parallel_workers: int | None
|
46
48
|
|
47
49
|
def parse_metric(self) -> str:
|
48
50
|
if self.metric_type == MetricType.L2:
|
49
51
|
return "vector_l2_ops"
|
50
|
-
|
52
|
+
if self.metric_type == MetricType.IP:
|
51
53
|
return "vector_ip_ops"
|
52
54
|
return "vector_cosine_ops"
|
53
55
|
|
54
56
|
def parse_metric_fun_op(self) -> LiteralString:
|
55
57
|
if self.metric_type == MetricType.L2:
|
56
58
|
return "<->"
|
57
|
-
|
59
|
+
if self.metric_type == MetricType.IP:
|
58
60
|
return "<#>"
|
59
61
|
return "<=>"
|
60
62
|
|
61
63
|
def parse_metric_fun_str(self) -> str:
|
62
64
|
if self.metric_type == MetricType.L2:
|
63
65
|
return "l2_distance"
|
64
|
-
|
66
|
+
if self.metric_type == MetricType.IP:
|
65
67
|
return "max_inner_product"
|
66
68
|
return "cosine_distance"
|
67
|
-
|
69
|
+
|
68
70
|
@abstractmethod
|
69
|
-
def index_param(self) -> dict:
|
70
|
-
...
|
71
|
+
def index_param(self) -> dict: ...
|
71
72
|
|
72
73
|
@abstractmethod
|
73
|
-
def search_param(self) -> dict:
|
74
|
-
...
|
74
|
+
def search_param(self) -> dict: ...
|
75
75
|
|
76
76
|
@abstractmethod
|
77
|
-
def session_param(self) -> dict:
|
78
|
-
...
|
77
|
+
def session_param(self) -> dict: ...
|
79
78
|
|
80
79
|
@staticmethod
|
81
|
-
def _optionally_build_with_options(
|
80
|
+
def _optionally_build_with_options(
|
81
|
+
with_options: Mapping[str, Any],
|
82
|
+
) -> Sequence[dict[str, Any]]:
|
82
83
|
"""Walk through mappings, creating a List of {key1 = value} pairs. That will be used to build a where clause"""
|
83
84
|
options = []
|
84
85
|
for option_name, value in with_options.items():
|
@@ -87,35 +88,36 @@ class PgDiskANNIndexConfig(BaseModel, DBCaseConfig):
|
|
87
88
|
{
|
88
89
|
"option_name": option_name,
|
89
90
|
"val": str(value),
|
90
|
-
}
|
91
|
+
},
|
91
92
|
)
|
92
93
|
return options
|
93
94
|
|
94
95
|
@staticmethod
|
95
96
|
def _optionally_build_set_options(
|
96
|
-
set_mapping: Mapping[str, Any]
|
97
|
+
set_mapping: Mapping[str, Any],
|
97
98
|
) -> Sequence[dict[str, Any]]:
|
98
99
|
"""Walk through options, creating 'SET 'key1 = "value1";' list"""
|
99
100
|
session_options = []
|
100
101
|
for setting_name, value in set_mapping.items():
|
101
102
|
if value:
|
102
103
|
session_options.append(
|
103
|
-
{
|
104
|
+
{
|
105
|
+
"parameter": {
|
104
106
|
"setting_name": setting_name,
|
105
107
|
"val": str(value),
|
106
108
|
},
|
107
|
-
}
|
109
|
+
},
|
108
110
|
)
|
109
111
|
return session_options
|
110
|
-
|
112
|
+
|
111
113
|
|
112
114
|
class PgDiskANNImplConfig(PgDiskANNIndexConfig):
|
113
115
|
index: IndexType = IndexType.DISKANN
|
114
116
|
max_neighbors: int | None
|
115
117
|
l_value_ib: int | None
|
116
118
|
l_value_is: float | None
|
117
|
-
maintenance_work_mem:
|
118
|
-
max_parallel_workers:
|
119
|
+
maintenance_work_mem: str | None = None
|
120
|
+
max_parallel_workers: int | None = None
|
119
121
|
|
120
122
|
def index_param(self) -> dict:
|
121
123
|
return {
|
@@ -128,18 +130,19 @@ class PgDiskANNImplConfig(PgDiskANNIndexConfig):
|
|
128
130
|
"maintenance_work_mem": self.maintenance_work_mem,
|
129
131
|
"max_parallel_workers": self.max_parallel_workers,
|
130
132
|
}
|
131
|
-
|
133
|
+
|
132
134
|
def search_param(self) -> dict:
|
133
135
|
return {
|
134
136
|
"metric": self.parse_metric(),
|
135
137
|
"metric_fun_op": self.parse_metric_fun_op(),
|
136
138
|
}
|
137
|
-
|
139
|
+
|
138
140
|
def session_param(self) -> dict:
|
139
141
|
return {
|
140
142
|
"diskann.l_value_is": self.l_value_is,
|
141
143
|
}
|
142
|
-
|
144
|
+
|
145
|
+
|
143
146
|
_pgdiskann_case_config = {
|
144
147
|
IndexType.DISKANN: PgDiskANNImplConfig,
|
145
148
|
}
|