vectordb-bench 0.0.19__py3-none-any.whl → 0.0.21__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vectordb_bench/__init__.py +49 -24
- vectordb_bench/__main__.py +4 -3
- vectordb_bench/backend/assembler.py +12 -13
- vectordb_bench/backend/cases.py +55 -45
- vectordb_bench/backend/clients/__init__.py +85 -14
- vectordb_bench/backend/clients/aliyun_elasticsearch/aliyun_elasticsearch.py +1 -2
- vectordb_bench/backend/clients/aliyun_elasticsearch/config.py +3 -4
- vectordb_bench/backend/clients/aliyun_opensearch/aliyun_opensearch.py +112 -77
- vectordb_bench/backend/clients/aliyun_opensearch/config.py +6 -7
- vectordb_bench/backend/clients/alloydb/alloydb.py +59 -84
- vectordb_bench/backend/clients/alloydb/cli.py +51 -34
- vectordb_bench/backend/clients/alloydb/config.py +30 -30
- vectordb_bench/backend/clients/api.py +13 -24
- vectordb_bench/backend/clients/aws_opensearch/aws_opensearch.py +50 -54
- vectordb_bench/backend/clients/aws_opensearch/cli.py +4 -7
- vectordb_bench/backend/clients/aws_opensearch/config.py +13 -9
- vectordb_bench/backend/clients/aws_opensearch/run.py +69 -59
- vectordb_bench/backend/clients/chroma/chroma.py +39 -40
- vectordb_bench/backend/clients/chroma/config.py +4 -2
- vectordb_bench/backend/clients/elastic_cloud/config.py +5 -5
- vectordb_bench/backend/clients/elastic_cloud/elastic_cloud.py +24 -26
- vectordb_bench/backend/clients/memorydb/cli.py +8 -8
- vectordb_bench/backend/clients/memorydb/config.py +2 -2
- vectordb_bench/backend/clients/memorydb/memorydb.py +67 -58
- vectordb_bench/backend/clients/milvus/cli.py +41 -83
- vectordb_bench/backend/clients/milvus/config.py +18 -8
- vectordb_bench/backend/clients/milvus/milvus.py +19 -39
- vectordb_bench/backend/clients/pgdiskann/cli.py +29 -22
- vectordb_bench/backend/clients/pgdiskann/config.py +29 -26
- vectordb_bench/backend/clients/pgdiskann/pgdiskann.py +56 -77
- vectordb_bench/backend/clients/pgvecto_rs/cli.py +9 -11
- vectordb_bench/backend/clients/pgvecto_rs/config.py +8 -14
- vectordb_bench/backend/clients/pgvecto_rs/pgvecto_rs.py +34 -43
- vectordb_bench/backend/clients/pgvector/cli.py +40 -31
- vectordb_bench/backend/clients/pgvector/config.py +63 -73
- vectordb_bench/backend/clients/pgvector/pgvector.py +98 -104
- vectordb_bench/backend/clients/pgvectorscale/cli.py +38 -24
- vectordb_bench/backend/clients/pgvectorscale/config.py +14 -15
- vectordb_bench/backend/clients/pgvectorscale/pgvectorscale.py +39 -49
- vectordb_bench/backend/clients/pinecone/config.py +1 -0
- vectordb_bench/backend/clients/pinecone/pinecone.py +15 -25
- vectordb_bench/backend/clients/qdrant_cloud/config.py +11 -10
- vectordb_bench/backend/clients/qdrant_cloud/qdrant_cloud.py +41 -35
- vectordb_bench/backend/clients/redis/cli.py +6 -12
- vectordb_bench/backend/clients/redis/config.py +7 -5
- vectordb_bench/backend/clients/redis/redis.py +95 -62
- vectordb_bench/backend/clients/test/cli.py +2 -3
- vectordb_bench/backend/clients/test/config.py +2 -2
- vectordb_bench/backend/clients/test/test.py +5 -9
- vectordb_bench/backend/clients/weaviate_cloud/cli.py +3 -4
- vectordb_bench/backend/clients/weaviate_cloud/config.py +2 -2
- vectordb_bench/backend/clients/weaviate_cloud/weaviate_cloud.py +37 -26
- vectordb_bench/backend/clients/zilliz_cloud/cli.py +14 -11
- vectordb_bench/backend/clients/zilliz_cloud/config.py +2 -4
- vectordb_bench/backend/clients/zilliz_cloud/zilliz_cloud.py +1 -1
- vectordb_bench/backend/data_source.py +18 -14
- vectordb_bench/backend/dataset.py +47 -27
- vectordb_bench/backend/result_collector.py +2 -3
- vectordb_bench/backend/runner/__init__.py +4 -6
- vectordb_bench/backend/runner/mp_runner.py +56 -23
- vectordb_bench/backend/runner/rate_runner.py +30 -19
- vectordb_bench/backend/runner/read_write_runner.py +46 -22
- vectordb_bench/backend/runner/serial_runner.py +81 -46
- vectordb_bench/backend/runner/util.py +4 -3
- vectordb_bench/backend/task_runner.py +92 -92
- vectordb_bench/backend/utils.py +17 -10
- vectordb_bench/base.py +0 -1
- vectordb_bench/cli/cli.py +65 -60
- vectordb_bench/cli/vectordbbench.py +6 -7
- vectordb_bench/frontend/components/check_results/charts.py +8 -19
- vectordb_bench/frontend/components/check_results/data.py +4 -16
- vectordb_bench/frontend/components/check_results/filters.py +8 -16
- vectordb_bench/frontend/components/check_results/nav.py +4 -4
- vectordb_bench/frontend/components/check_results/priceTable.py +1 -3
- vectordb_bench/frontend/components/check_results/stPageConfig.py +2 -1
- vectordb_bench/frontend/components/concurrent/charts.py +12 -12
- vectordb_bench/frontend/components/custom/displayCustomCase.py +17 -11
- vectordb_bench/frontend/components/custom/displaypPrams.py +4 -2
- vectordb_bench/frontend/components/custom/getCustomConfig.py +1 -2
- vectordb_bench/frontend/components/custom/initStyle.py +1 -1
- vectordb_bench/frontend/components/get_results/saveAsImage.py +2 -0
- vectordb_bench/frontend/components/run_test/caseSelector.py +3 -9
- vectordb_bench/frontend/components/run_test/dbConfigSetting.py +1 -4
- vectordb_bench/frontend/components/run_test/dbSelector.py +1 -1
- vectordb_bench/frontend/components/run_test/generateTasks.py +8 -8
- vectordb_bench/frontend/components/run_test/submitTask.py +14 -18
- vectordb_bench/frontend/components/tables/data.py +3 -6
- vectordb_bench/frontend/config/dbCaseConfigs.py +51 -84
- vectordb_bench/frontend/pages/concurrent.py +3 -5
- vectordb_bench/frontend/pages/custom.py +30 -9
- vectordb_bench/frontend/pages/quries_per_dollar.py +3 -3
- vectordb_bench/frontend/pages/run_test.py +3 -7
- vectordb_bench/frontend/utils.py +1 -1
- vectordb_bench/frontend/vdb_benchmark.py +4 -6
- vectordb_bench/interface.py +45 -24
- vectordb_bench/log_util.py +59 -64
- vectordb_bench/metric.py +10 -11
- vectordb_bench/models.py +26 -43
- {vectordb_bench-0.0.19.dist-info → vectordb_bench-0.0.21.dist-info}/METADATA +22 -15
- vectordb_bench-0.0.21.dist-info/RECORD +135 -0
- {vectordb_bench-0.0.19.dist-info → vectordb_bench-0.0.21.dist-info}/WHEEL +1 -1
- vectordb_bench-0.0.19.dist-info/RECORD +0 -135
- {vectordb_bench-0.0.19.dist-info → vectordb_bench-0.0.21.dist-info}/LICENSE +0 -0
- {vectordb_bench-0.0.19.dist-info → vectordb_bench-0.0.21.dist-info}/entry_points.txt +0 -0
- {vectordb_bench-0.0.19.dist-info → vectordb_bench-0.0.21.dist-info}/top_level.txt +0 -0
@@ -1,8 +1,9 @@
|
|
1
|
-
from typing import Annotated, TypedDict, Unpack
|
1
|
+
from typing import Annotated, TypedDict, Unpack
|
2
2
|
|
3
3
|
import click
|
4
4
|
from pydantic import SecretStr
|
5
5
|
|
6
|
+
from vectordb_bench.backend.clients import DB
|
6
7
|
from vectordb_bench.cli.cli import (
|
7
8
|
CommonTypedDict,
|
8
9
|
HNSWFlavor3,
|
@@ -10,33 +11,33 @@ from vectordb_bench.cli.cli import (
|
|
10
11
|
cli,
|
11
12
|
click_parameter_decorators_from_typed_dict,
|
12
13
|
run,
|
13
|
-
|
14
14
|
)
|
15
|
-
from vectordb_bench.backend.clients import DB
|
16
15
|
|
17
16
|
DBTYPE = DB.Milvus
|
18
17
|
|
19
18
|
|
20
19
|
class MilvusTypedDict(TypedDict):
|
21
20
|
uri: Annotated[
|
22
|
-
str,
|
21
|
+
str,
|
22
|
+
click.option("--uri", type=str, help="uri connection string", required=True),
|
23
23
|
]
|
24
24
|
user_name: Annotated[
|
25
|
-
|
25
|
+
str | None,
|
26
|
+
click.option("--user-name", type=str, help="Db username", required=False),
|
26
27
|
]
|
27
28
|
password: Annotated[
|
28
|
-
|
29
|
+
str | None,
|
30
|
+
click.option("--password", type=str, help="Db password", required=False),
|
29
31
|
]
|
30
32
|
|
31
33
|
|
32
|
-
class MilvusAutoIndexTypedDict(CommonTypedDict, MilvusTypedDict):
|
33
|
-
...
|
34
|
+
class MilvusAutoIndexTypedDict(CommonTypedDict, MilvusTypedDict): ...
|
34
35
|
|
35
36
|
|
36
37
|
@cli.command()
|
37
38
|
@click_parameter_decorators_from_typed_dict(MilvusAutoIndexTypedDict)
|
38
39
|
def MilvusAutoIndex(**parameters: Unpack[MilvusAutoIndexTypedDict]):
|
39
|
-
from .config import
|
40
|
+
from .config import AutoIndexConfig, MilvusConfig
|
40
41
|
|
41
42
|
run(
|
42
43
|
db=DBTYPE,
|
@@ -54,7 +55,7 @@ def MilvusAutoIndex(**parameters: Unpack[MilvusAutoIndexTypedDict]):
|
|
54
55
|
@cli.command()
|
55
56
|
@click_parameter_decorators_from_typed_dict(MilvusAutoIndexTypedDict)
|
56
57
|
def MilvusFlat(**parameters: Unpack[MilvusAutoIndexTypedDict]):
|
57
|
-
from .config import
|
58
|
+
from .config import FLATConfig, MilvusConfig
|
58
59
|
|
59
60
|
run(
|
60
61
|
db=DBTYPE,
|
@@ -69,14 +70,13 @@ def MilvusFlat(**parameters: Unpack[MilvusAutoIndexTypedDict]):
|
|
69
70
|
)
|
70
71
|
|
71
72
|
|
72
|
-
class MilvusHNSWTypedDict(CommonTypedDict, MilvusTypedDict, HNSWFlavor3):
|
73
|
-
...
|
73
|
+
class MilvusHNSWTypedDict(CommonTypedDict, MilvusTypedDict, HNSWFlavor3): ...
|
74
74
|
|
75
75
|
|
76
76
|
@cli.command()
|
77
77
|
@click_parameter_decorators_from_typed_dict(MilvusHNSWTypedDict)
|
78
78
|
def MilvusHNSW(**parameters: Unpack[MilvusHNSWTypedDict]):
|
79
|
-
from .config import
|
79
|
+
from .config import HNSWConfig, MilvusConfig
|
80
80
|
|
81
81
|
run(
|
82
82
|
db=DBTYPE,
|
@@ -95,14 +95,13 @@ def MilvusHNSW(**parameters: Unpack[MilvusHNSWTypedDict]):
|
|
95
95
|
)
|
96
96
|
|
97
97
|
|
98
|
-
class MilvusIVFFlatTypedDict(CommonTypedDict, MilvusTypedDict, IVFFlatTypedDictN):
|
99
|
-
...
|
98
|
+
class MilvusIVFFlatTypedDict(CommonTypedDict, MilvusTypedDict, IVFFlatTypedDictN): ...
|
100
99
|
|
101
100
|
|
102
101
|
@cli.command()
|
103
102
|
@click_parameter_decorators_from_typed_dict(MilvusIVFFlatTypedDict)
|
104
103
|
def MilvusIVFFlat(**parameters: Unpack[MilvusIVFFlatTypedDict]):
|
105
|
-
from .config import
|
104
|
+
from .config import IVFFlatConfig, MilvusConfig
|
106
105
|
|
107
106
|
run(
|
108
107
|
db=DBTYPE,
|
@@ -123,7 +122,7 @@ def MilvusIVFFlat(**parameters: Unpack[MilvusIVFFlatTypedDict]):
|
|
123
122
|
@cli.command()
|
124
123
|
@click_parameter_decorators_from_typed_dict(MilvusIVFFlatTypedDict)
|
125
124
|
def MilvusIVFSQ8(**parameters: Unpack[MilvusIVFFlatTypedDict]):
|
126
|
-
from .config import
|
125
|
+
from .config import IVFSQ8Config, MilvusConfig
|
127
126
|
|
128
127
|
run(
|
129
128
|
db=DBTYPE,
|
@@ -142,17 +141,13 @@ def MilvusIVFSQ8(**parameters: Unpack[MilvusIVFFlatTypedDict]):
|
|
142
141
|
|
143
142
|
|
144
143
|
class MilvusDISKANNTypedDict(CommonTypedDict, MilvusTypedDict):
|
145
|
-
search_list: Annotated[
|
146
|
-
str, click.option("--search-list",
|
147
|
-
type=int,
|
148
|
-
required=True)
|
149
|
-
]
|
144
|
+
search_list: Annotated[str, click.option("--search-list", type=int, required=True)]
|
150
145
|
|
151
146
|
|
152
147
|
@cli.command()
|
153
148
|
@click_parameter_decorators_from_typed_dict(MilvusDISKANNTypedDict)
|
154
149
|
def MilvusDISKANN(**parameters: Unpack[MilvusDISKANNTypedDict]):
|
155
|
-
from .config import
|
150
|
+
from .config import DISKANNConfig, MilvusConfig
|
156
151
|
|
157
152
|
run(
|
158
153
|
db=DBTYPE,
|
@@ -171,21 +166,16 @@ def MilvusDISKANN(**parameters: Unpack[MilvusDISKANNTypedDict]):
|
|
171
166
|
|
172
167
|
class MilvusGPUIVFTypedDict(CommonTypedDict, MilvusTypedDict, MilvusIVFFlatTypedDict):
|
173
168
|
cache_dataset_on_device: Annotated[
|
174
|
-
str,
|
175
|
-
|
176
|
-
required=True)
|
177
|
-
]
|
178
|
-
refine_ratio: Annotated[
|
179
|
-
str, click.option("--refine-ratio",
|
180
|
-
type=float,
|
181
|
-
required=True)
|
169
|
+
str,
|
170
|
+
click.option("--cache-dataset-on-device", type=str, required=True),
|
182
171
|
]
|
172
|
+
refine_ratio: Annotated[str, click.option("--refine-ratio", type=float, required=True)]
|
183
173
|
|
184
174
|
|
185
175
|
@cli.command()
|
186
176
|
@click_parameter_decorators_from_typed_dict(MilvusGPUIVFTypedDict)
|
187
177
|
def MilvusGPUIVFFlat(**parameters: Unpack[MilvusGPUIVFTypedDict]):
|
188
|
-
from .config import
|
178
|
+
from .config import GPUIVFFlatConfig, MilvusConfig
|
189
179
|
|
190
180
|
run(
|
191
181
|
db=DBTYPE,
|
@@ -205,23 +195,20 @@ def MilvusGPUIVFFlat(**parameters: Unpack[MilvusGPUIVFTypedDict]):
|
|
205
195
|
)
|
206
196
|
|
207
197
|
|
208
|
-
class MilvusGPUIVFPQTypedDict(
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
type=int,
|
217
|
-
required=True)
|
218
|
-
]
|
198
|
+
class MilvusGPUIVFPQTypedDict(
|
199
|
+
CommonTypedDict,
|
200
|
+
MilvusTypedDict,
|
201
|
+
MilvusIVFFlatTypedDict,
|
202
|
+
MilvusGPUIVFTypedDict,
|
203
|
+
):
|
204
|
+
m: Annotated[str, click.option("--m", type=int, help="hnsw m", required=True)]
|
205
|
+
nbits: Annotated[str, click.option("--nbits", type=int, required=True)]
|
219
206
|
|
220
207
|
|
221
208
|
@cli.command()
|
222
209
|
@click_parameter_decorators_from_typed_dict(MilvusGPUIVFPQTypedDict)
|
223
210
|
def MilvusGPUIVFPQ(**parameters: Unpack[MilvusGPUIVFPQTypedDict]):
|
224
|
-
from .config import
|
211
|
+
from .config import GPUIVFPQConfig, MilvusConfig
|
225
212
|
|
226
213
|
run(
|
227
214
|
db=DBTYPE,
|
@@ -245,51 +232,22 @@ def MilvusGPUIVFPQ(**parameters: Unpack[MilvusGPUIVFPQTypedDict]):
|
|
245
232
|
|
246
233
|
class MilvusGPUCAGRATypedDict(CommonTypedDict, MilvusTypedDict, MilvusGPUIVFTypedDict):
|
247
234
|
intermediate_graph_degree: Annotated[
|
248
|
-
str,
|
249
|
-
|
250
|
-
required=True)
|
251
|
-
]
|
252
|
-
graph_degree: Annotated[
|
253
|
-
str, click.option("--graph-degree",
|
254
|
-
type=int,
|
255
|
-
required=True)
|
256
|
-
]
|
257
|
-
build_algo: Annotated[
|
258
|
-
str, click.option("--build_algo",
|
259
|
-
type=str,
|
260
|
-
required=True)
|
261
|
-
]
|
262
|
-
team_size: Annotated[
|
263
|
-
str, click.option("--team-size",
|
264
|
-
type=int,
|
265
|
-
required=True)
|
266
|
-
]
|
267
|
-
search_width: Annotated[
|
268
|
-
str, click.option("--search-width",
|
269
|
-
type=int,
|
270
|
-
required=True)
|
271
|
-
]
|
272
|
-
itopk_size: Annotated[
|
273
|
-
str, click.option("--itopk-size",
|
274
|
-
type=int,
|
275
|
-
required=True)
|
276
|
-
]
|
277
|
-
min_iterations: Annotated[
|
278
|
-
str, click.option("--min-iterations",
|
279
|
-
type=int,
|
280
|
-
required=True)
|
281
|
-
]
|
282
|
-
max_iterations: Annotated[
|
283
|
-
str, click.option("--max-iterations",
|
284
|
-
type=int,
|
285
|
-
required=True)
|
235
|
+
str,
|
236
|
+
click.option("--intermediate-graph-degree", type=int, required=True),
|
286
237
|
]
|
238
|
+
graph_degree: Annotated[str, click.option("--graph-degree", type=int, required=True)]
|
239
|
+
build_algo: Annotated[str, click.option("--build_algo", type=str, required=True)]
|
240
|
+
team_size: Annotated[str, click.option("--team-size", type=int, required=True)]
|
241
|
+
search_width: Annotated[str, click.option("--search-width", type=int, required=True)]
|
242
|
+
itopk_size: Annotated[str, click.option("--itopk-size", type=int, required=True)]
|
243
|
+
min_iterations: Annotated[str, click.option("--min-iterations", type=int, required=True)]
|
244
|
+
max_iterations: Annotated[str, click.option("--max-iterations", type=int, required=True)]
|
287
245
|
|
288
246
|
|
289
247
|
@cli.command()
|
290
248
|
@click_parameter_decorators_from_typed_dict(MilvusGPUCAGRATypedDict)
|
291
249
|
def MilvusGPUCAGRA(**parameters: Unpack[MilvusGPUCAGRATypedDict]):
|
292
|
-
from .config import
|
250
|
+
from .config import GPUCAGRAConfig, MilvusConfig
|
293
251
|
|
294
252
|
run(
|
295
253
|
db=DBTYPE,
|
@@ -1,5 +1,6 @@
|
|
1
1
|
from pydantic import BaseModel, SecretStr, validator
|
2
|
-
|
2
|
+
|
3
|
+
from ..api import DBCaseConfig, DBConfig, IndexType, MetricType
|
3
4
|
|
4
5
|
|
5
6
|
class MilvusConfig(DBConfig):
|
@@ -15,10 +16,14 @@ class MilvusConfig(DBConfig):
|
|
15
16
|
}
|
16
17
|
|
17
18
|
@validator("*")
|
18
|
-
def not_empty_field(cls, v, field):
|
19
|
-
if
|
19
|
+
def not_empty_field(cls, v: any, field: any):
|
20
|
+
if (
|
21
|
+
field.name in cls.common_short_configs()
|
22
|
+
or field.name in cls.common_long_configs()
|
23
|
+
or field.name in ["user", "password"]
|
24
|
+
):
|
20
25
|
return v
|
21
|
-
if isinstance(v,
|
26
|
+
if isinstance(v, str | SecretStr) and len(v) == 0:
|
22
27
|
raise ValueError("Empty string!")
|
23
28
|
return v
|
24
29
|
|
@@ -28,10 +33,14 @@ class MilvusIndexConfig(BaseModel):
|
|
28
33
|
|
29
34
|
index: IndexType
|
30
35
|
metric_type: MetricType | None = None
|
31
|
-
|
36
|
+
|
32
37
|
@property
|
33
38
|
def is_gpu_index(self) -> bool:
|
34
|
-
return self.index in [
|
39
|
+
return self.index in [
|
40
|
+
IndexType.GPU_CAGRA,
|
41
|
+
IndexType.GPU_IVF_FLAT,
|
42
|
+
IndexType.GPU_IVF_PQ,
|
43
|
+
]
|
35
44
|
|
36
45
|
def parse_metric(self) -> str:
|
37
46
|
if not self.metric_type:
|
@@ -113,7 +122,8 @@ class IVFFlatConfig(MilvusIndexConfig, DBCaseConfig):
|
|
113
122
|
"metric_type": self.parse_metric(),
|
114
123
|
"params": {"nprobe": self.nprobe},
|
115
124
|
}
|
116
|
-
|
125
|
+
|
126
|
+
|
117
127
|
class IVFSQ8Config(MilvusIndexConfig, DBCaseConfig):
|
118
128
|
nlist: int
|
119
129
|
nprobe: int | None = None
|
@@ -210,7 +220,7 @@ class GPUCAGRAConfig(MilvusIndexConfig, DBCaseConfig):
|
|
210
220
|
search_width: int = 4
|
211
221
|
min_iterations: int = 0
|
212
222
|
max_iterations: int = 0
|
213
|
-
build_algo: str = "IVF_PQ"
|
223
|
+
build_algo: str = "IVF_PQ" # IVF_PQ; NN_DESCENT;
|
214
224
|
cache_dataset_on_device: str
|
215
225
|
refine_ratio: float | None = None
|
216
226
|
index: IndexType = IndexType.GPU_CAGRA
|
@@ -2,19 +2,18 @@
|
|
2
2
|
|
3
3
|
import logging
|
4
4
|
import time
|
5
|
+
from collections.abc import Iterable
|
5
6
|
from contextlib import contextmanager
|
6
|
-
from typing import Iterable
|
7
7
|
|
8
|
-
from pymilvus import Collection, utility
|
9
|
-
from pymilvus import CollectionSchema, DataType, FieldSchema, MilvusException
|
8
|
+
from pymilvus import Collection, CollectionSchema, DataType, FieldSchema, MilvusException, utility
|
10
9
|
|
11
10
|
from ..api import VectorDB
|
12
11
|
from .config import MilvusIndexConfig
|
13
12
|
|
14
|
-
|
15
13
|
log = logging.getLogger(__name__)
|
16
14
|
|
17
|
-
MILVUS_LOAD_REQS_SIZE = 1.5 * 1024 *1024
|
15
|
+
MILVUS_LOAD_REQS_SIZE = 1.5 * 1024 * 1024
|
16
|
+
|
18
17
|
|
19
18
|
class Milvus(VectorDB):
|
20
19
|
def __init__(
|
@@ -32,7 +31,7 @@ class Milvus(VectorDB):
|
|
32
31
|
self.db_config = db_config
|
33
32
|
self.case_config = db_case_config
|
34
33
|
self.collection_name = collection_name
|
35
|
-
self.batch_size = int(MILVUS_LOAD_REQS_SIZE / (dim *4))
|
34
|
+
self.batch_size = int(MILVUS_LOAD_REQS_SIZE / (dim * 4))
|
36
35
|
|
37
36
|
self._primary_field = "pk"
|
38
37
|
self._scalar_field = "id"
|
@@ -40,6 +39,7 @@ class Milvus(VectorDB):
|
|
40
39
|
self._index_name = "vector_idx"
|
41
40
|
|
42
41
|
from pymilvus import connections
|
42
|
+
|
43
43
|
connections.connect(**self.db_config, timeout=30)
|
44
44
|
if drop_old and utility.has_collection(self.collection_name):
|
45
45
|
log.info(f"{self.name} client drop_old collection: {self.collection_name}")
|
@@ -49,7 +49,7 @@ class Milvus(VectorDB):
|
|
49
49
|
fields = [
|
50
50
|
FieldSchema(self._primary_field, DataType.INT64, is_primary=True),
|
51
51
|
FieldSchema(self._scalar_field, DataType.INT64),
|
52
|
-
FieldSchema(self._vector_field, DataType.FLOAT_VECTOR, dim=dim)
|
52
|
+
FieldSchema(self._vector_field, DataType.FLOAT_VECTOR, dim=dim),
|
53
53
|
]
|
54
54
|
|
55
55
|
log.info(f"{self.name} create collection: {self.collection_name}")
|
@@ -79,6 +79,7 @@ class Milvus(VectorDB):
|
|
79
79
|
>>> self.search_embedding()
|
80
80
|
"""
|
81
81
|
from pymilvus import connections
|
82
|
+
|
82
83
|
self.col: Collection | None = None
|
83
84
|
|
84
85
|
connections.connect(**self.db_config, timeout=60)
|
@@ -108,6 +109,7 @@ class Milvus(VectorDB):
|
|
108
109
|
)
|
109
110
|
|
110
111
|
utility.wait_for_index_building_complete(self.collection_name)
|
112
|
+
|
111
113
|
def wait_index():
|
112
114
|
while True:
|
113
115
|
progress = utility.index_building_progress(self.collection_name)
|
@@ -120,51 +122,30 @@ class Milvus(VectorDB):
|
|
120
122
|
# Skip compaction if use GPU indexType
|
121
123
|
if self.case_config.is_gpu_index:
|
122
124
|
log.debug("skip compaction for gpu index type.")
|
123
|
-
else
|
125
|
+
else:
|
124
126
|
try:
|
125
127
|
self.col.compact()
|
126
128
|
self.col.wait_for_compaction_completed()
|
127
129
|
except Exception as e:
|
128
130
|
log.warning(f"{self.name} compact error: {e}")
|
129
|
-
if hasattr(e,
|
130
|
-
if e.code().name ==
|
131
|
+
if hasattr(e, "code"):
|
132
|
+
if e.code().name == "PERMISSION_DENIED":
|
131
133
|
log.warning("Skip compact due to permission denied.")
|
132
|
-
pass
|
133
134
|
else:
|
134
|
-
raise e
|
135
|
+
raise e from e
|
135
136
|
wait_index()
|
136
137
|
except Exception as e:
|
137
138
|
log.warning(f"{self.name} optimize error: {e}")
|
138
139
|
raise e from None
|
139
140
|
|
140
|
-
def
|
141
|
-
assert self.col, "Please call self.init() before"
|
142
|
-
self._pre_load(self.col)
|
143
|
-
|
144
|
-
def _pre_load(self, coll: Collection):
|
145
|
-
try:
|
146
|
-
if not coll.has_index(index_name=self._index_name):
|
147
|
-
log.info(f"{self.name} create index")
|
148
|
-
coll.create_index(
|
149
|
-
self._vector_field,
|
150
|
-
self.case_config.index_param(),
|
151
|
-
index_name=self._index_name,
|
152
|
-
)
|
153
|
-
coll.load()
|
154
|
-
log.info(f"{self.name} load")
|
155
|
-
except Exception as e:
|
156
|
-
log.warning(f"{self.name} pre load error: {e}")
|
157
|
-
raise e from None
|
158
|
-
|
159
|
-
|
160
|
-
def optimize(self):
|
141
|
+
def optimize(self, data_size: int | None = None):
|
161
142
|
assert self.col, "Please call self.init() before"
|
162
143
|
self._optimize()
|
163
144
|
|
164
145
|
def need_normalize_cosine(self) -> bool:
|
165
146
|
"""Wheather this database need to normalize dataset to support COSINE"""
|
166
147
|
if self.case_config.is_gpu_index:
|
167
|
-
log.info(
|
148
|
+
log.info("current gpu_index only supports IP / L2, cosine dataset need normalize.")
|
168
149
|
return True
|
169
150
|
|
170
151
|
return False
|
@@ -184,9 +165,9 @@ class Milvus(VectorDB):
|
|
184
165
|
for batch_start_offset in range(0, len(embeddings), self.batch_size):
|
185
166
|
batch_end_offset = min(batch_start_offset + self.batch_size, len(embeddings))
|
186
167
|
insert_data = [
|
187
|
-
|
188
|
-
|
189
|
-
|
168
|
+
metadata[batch_start_offset:batch_end_offset],
|
169
|
+
metadata[batch_start_offset:batch_end_offset],
|
170
|
+
embeddings[batch_start_offset:batch_end_offset],
|
190
171
|
]
|
191
172
|
res = self.col.insert(insert_data)
|
192
173
|
insert_count += len(res.primary_keys)
|
@@ -217,5 +198,4 @@ class Milvus(VectorDB):
|
|
217
198
|
)
|
218
199
|
|
219
200
|
# Organize results.
|
220
|
-
|
221
|
-
return ret
|
201
|
+
return [result.id for result in res[0]]
|
@@ -1,57 +1,63 @@
|
|
1
|
-
import click
|
2
1
|
import os
|
2
|
+
from typing import Annotated, Unpack
|
3
|
+
|
4
|
+
import click
|
3
5
|
from pydantic import SecretStr
|
4
6
|
|
7
|
+
from vectordb_bench.backend.clients import DB
|
8
|
+
|
5
9
|
from ....cli.cli import (
|
6
10
|
CommonTypedDict,
|
7
11
|
cli,
|
8
12
|
click_parameter_decorators_from_typed_dict,
|
9
13
|
run,
|
10
14
|
)
|
11
|
-
from typing import Annotated, Optional, Unpack
|
12
|
-
from vectordb_bench.backend.clients import DB
|
13
15
|
|
14
16
|
|
15
17
|
class PgDiskAnnTypedDict(CommonTypedDict):
|
16
18
|
user_name: Annotated[
|
17
|
-
str,
|
19
|
+
str,
|
20
|
+
click.option("--user-name", type=str, help="Db username", required=True),
|
18
21
|
]
|
19
22
|
password: Annotated[
|
20
23
|
str,
|
21
|
-
click.option(
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
24
|
+
click.option(
|
25
|
+
"--password",
|
26
|
+
type=str,
|
27
|
+
help="Postgres database password",
|
28
|
+
default=lambda: os.environ.get("POSTGRES_PASSWORD", ""),
|
29
|
+
show_default="$POSTGRES_PASSWORD",
|
30
|
+
),
|
27
31
|
]
|
28
32
|
|
29
|
-
host: Annotated[
|
30
|
-
|
31
|
-
]
|
32
|
-
db_name: Annotated[
|
33
|
-
str, click.option("--db-name", type=str, help="Db name", required=True)
|
34
|
-
]
|
33
|
+
host: Annotated[str, click.option("--host", type=str, help="Db host", required=True)]
|
34
|
+
db_name: Annotated[str, click.option("--db-name", type=str, help="Db name", required=True)]
|
35
35
|
max_neighbors: Annotated[
|
36
36
|
int,
|
37
37
|
click.option(
|
38
|
-
"--max-neighbors",
|
38
|
+
"--max-neighbors",
|
39
|
+
type=int,
|
40
|
+
help="PgDiskAnn max neighbors",
|
39
41
|
),
|
40
42
|
]
|
41
43
|
l_value_ib: Annotated[
|
42
44
|
int,
|
43
45
|
click.option(
|
44
|
-
"--l-value-ib",
|
46
|
+
"--l-value-ib",
|
47
|
+
type=int,
|
48
|
+
help="PgDiskAnn l_value_ib",
|
45
49
|
),
|
46
50
|
]
|
47
51
|
l_value_is: Annotated[
|
48
52
|
float,
|
49
53
|
click.option(
|
50
|
-
"--l-value-is",
|
54
|
+
"--l-value-is",
|
55
|
+
type=float,
|
56
|
+
help="PgDiskAnn l_value_is",
|
51
57
|
),
|
52
58
|
]
|
53
59
|
maintenance_work_mem: Annotated[
|
54
|
-
|
60
|
+
str | None,
|
55
61
|
click.option(
|
56
62
|
"--maintenance-work-mem",
|
57
63
|
type=str,
|
@@ -63,7 +69,7 @@ class PgDiskAnnTypedDict(CommonTypedDict):
|
|
63
69
|
),
|
64
70
|
]
|
65
71
|
max_parallel_workers: Annotated[
|
66
|
-
|
72
|
+
int | None,
|
67
73
|
click.option(
|
68
74
|
"--max-parallel-workers",
|
69
75
|
type=int,
|
@@ -72,6 +78,7 @@ class PgDiskAnnTypedDict(CommonTypedDict):
|
|
72
78
|
),
|
73
79
|
]
|
74
80
|
|
81
|
+
|
75
82
|
@cli.command()
|
76
83
|
@click_parameter_decorators_from_typed_dict(PgDiskAnnTypedDict)
|
77
84
|
def PgDiskAnn(
|
@@ -96,4 +103,4 @@ def PgDiskAnn(
|
|
96
103
|
maintenance_work_mem=parameters["maintenance_work_mem"],
|
97
104
|
),
|
98
105
|
**parameters,
|
99
|
-
)
|
106
|
+
)
|