vectordb-bench 0.0.18__py3-none-any.whl → 0.0.20__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (105) hide show
  1. vectordb_bench/__init__.py +49 -24
  2. vectordb_bench/__main__.py +4 -3
  3. vectordb_bench/backend/assembler.py +12 -13
  4. vectordb_bench/backend/cases.py +56 -46
  5. vectordb_bench/backend/clients/__init__.py +101 -14
  6. vectordb_bench/backend/clients/aliyun_elasticsearch/aliyun_elasticsearch.py +26 -0
  7. vectordb_bench/backend/clients/aliyun_elasticsearch/config.py +18 -0
  8. vectordb_bench/backend/clients/aliyun_opensearch/aliyun_opensearch.py +345 -0
  9. vectordb_bench/backend/clients/aliyun_opensearch/config.py +47 -0
  10. vectordb_bench/backend/clients/alloydb/alloydb.py +58 -80
  11. vectordb_bench/backend/clients/alloydb/cli.py +52 -35
  12. vectordb_bench/backend/clients/alloydb/config.py +30 -30
  13. vectordb_bench/backend/clients/api.py +8 -9
  14. vectordb_bench/backend/clients/aws_opensearch/aws_opensearch.py +46 -47
  15. vectordb_bench/backend/clients/aws_opensearch/cli.py +4 -7
  16. vectordb_bench/backend/clients/aws_opensearch/config.py +13 -9
  17. vectordb_bench/backend/clients/aws_opensearch/run.py +69 -59
  18. vectordb_bench/backend/clients/chroma/chroma.py +38 -36
  19. vectordb_bench/backend/clients/chroma/config.py +4 -2
  20. vectordb_bench/backend/clients/elastic_cloud/config.py +5 -5
  21. vectordb_bench/backend/clients/elastic_cloud/elastic_cloud.py +23 -22
  22. vectordb_bench/backend/clients/memorydb/cli.py +8 -8
  23. vectordb_bench/backend/clients/memorydb/config.py +2 -2
  24. vectordb_bench/backend/clients/memorydb/memorydb.py +65 -53
  25. vectordb_bench/backend/clients/milvus/cli.py +62 -80
  26. vectordb_bench/backend/clients/milvus/config.py +31 -7
  27. vectordb_bench/backend/clients/milvus/milvus.py +23 -26
  28. vectordb_bench/backend/clients/pgdiskann/cli.py +29 -22
  29. vectordb_bench/backend/clients/pgdiskann/config.py +29 -26
  30. vectordb_bench/backend/clients/pgdiskann/pgdiskann.py +55 -73
  31. vectordb_bench/backend/clients/pgvecto_rs/cli.py +9 -11
  32. vectordb_bench/backend/clients/pgvecto_rs/config.py +8 -14
  33. vectordb_bench/backend/clients/pgvecto_rs/pgvecto_rs.py +33 -34
  34. vectordb_bench/backend/clients/pgvector/cli.py +40 -31
  35. vectordb_bench/backend/clients/pgvector/config.py +63 -73
  36. vectordb_bench/backend/clients/pgvector/pgvector.py +97 -98
  37. vectordb_bench/backend/clients/pgvectorscale/cli.py +38 -24
  38. vectordb_bench/backend/clients/pgvectorscale/config.py +14 -15
  39. vectordb_bench/backend/clients/pgvectorscale/pgvectorscale.py +38 -43
  40. vectordb_bench/backend/clients/pinecone/config.py +1 -0
  41. vectordb_bench/backend/clients/pinecone/pinecone.py +14 -21
  42. vectordb_bench/backend/clients/qdrant_cloud/config.py +11 -10
  43. vectordb_bench/backend/clients/qdrant_cloud/qdrant_cloud.py +40 -31
  44. vectordb_bench/backend/clients/redis/cli.py +6 -12
  45. vectordb_bench/backend/clients/redis/config.py +7 -5
  46. vectordb_bench/backend/clients/redis/redis.py +94 -58
  47. vectordb_bench/backend/clients/test/cli.py +1 -2
  48. vectordb_bench/backend/clients/test/config.py +2 -2
  49. vectordb_bench/backend/clients/test/test.py +4 -5
  50. vectordb_bench/backend/clients/weaviate_cloud/cli.py +3 -4
  51. vectordb_bench/backend/clients/weaviate_cloud/config.py +2 -2
  52. vectordb_bench/backend/clients/weaviate_cloud/weaviate_cloud.py +36 -22
  53. vectordb_bench/backend/clients/zilliz_cloud/cli.py +14 -11
  54. vectordb_bench/backend/clients/zilliz_cloud/config.py +2 -4
  55. vectordb_bench/backend/clients/zilliz_cloud/zilliz_cloud.py +1 -1
  56. vectordb_bench/backend/data_source.py +30 -18
  57. vectordb_bench/backend/dataset.py +47 -27
  58. vectordb_bench/backend/result_collector.py +2 -3
  59. vectordb_bench/backend/runner/__init__.py +4 -6
  60. vectordb_bench/backend/runner/mp_runner.py +85 -34
  61. vectordb_bench/backend/runner/rate_runner.py +51 -23
  62. vectordb_bench/backend/runner/read_write_runner.py +140 -46
  63. vectordb_bench/backend/runner/serial_runner.py +99 -50
  64. vectordb_bench/backend/runner/util.py +4 -19
  65. vectordb_bench/backend/task_runner.py +95 -74
  66. vectordb_bench/backend/utils.py +17 -9
  67. vectordb_bench/base.py +0 -1
  68. vectordb_bench/cli/cli.py +65 -60
  69. vectordb_bench/cli/vectordbbench.py +6 -7
  70. vectordb_bench/frontend/components/check_results/charts.py +8 -19
  71. vectordb_bench/frontend/components/check_results/data.py +4 -16
  72. vectordb_bench/frontend/components/check_results/filters.py +8 -16
  73. vectordb_bench/frontend/components/check_results/nav.py +4 -4
  74. vectordb_bench/frontend/components/check_results/priceTable.py +1 -3
  75. vectordb_bench/frontend/components/check_results/stPageConfig.py +2 -1
  76. vectordb_bench/frontend/components/concurrent/charts.py +12 -12
  77. vectordb_bench/frontend/components/custom/displayCustomCase.py +17 -11
  78. vectordb_bench/frontend/components/custom/displaypPrams.py +4 -2
  79. vectordb_bench/frontend/components/custom/getCustomConfig.py +1 -2
  80. vectordb_bench/frontend/components/custom/initStyle.py +1 -1
  81. vectordb_bench/frontend/components/get_results/saveAsImage.py +2 -0
  82. vectordb_bench/frontend/components/run_test/caseSelector.py +3 -9
  83. vectordb_bench/frontend/components/run_test/dbConfigSetting.py +1 -4
  84. vectordb_bench/frontend/components/run_test/dbSelector.py +1 -1
  85. vectordb_bench/frontend/components/run_test/generateTasks.py +8 -8
  86. vectordb_bench/frontend/components/run_test/submitTask.py +14 -18
  87. vectordb_bench/frontend/components/tables/data.py +3 -6
  88. vectordb_bench/frontend/config/dbCaseConfigs.py +108 -83
  89. vectordb_bench/frontend/pages/concurrent.py +3 -5
  90. vectordb_bench/frontend/pages/custom.py +30 -9
  91. vectordb_bench/frontend/pages/quries_per_dollar.py +3 -3
  92. vectordb_bench/frontend/pages/run_test.py +3 -7
  93. vectordb_bench/frontend/utils.py +1 -1
  94. vectordb_bench/frontend/vdb_benchmark.py +4 -6
  95. vectordb_bench/interface.py +56 -26
  96. vectordb_bench/log_util.py +59 -64
  97. vectordb_bench/metric.py +10 -11
  98. vectordb_bench/models.py +26 -43
  99. {vectordb_bench-0.0.18.dist-info → vectordb_bench-0.0.20.dist-info}/METADATA +34 -42
  100. vectordb_bench-0.0.20.dist-info/RECORD +135 -0
  101. {vectordb_bench-0.0.18.dist-info → vectordb_bench-0.0.20.dist-info}/WHEEL +1 -1
  102. vectordb_bench-0.0.18.dist-info/RECORD +0 -131
  103. {vectordb_bench-0.0.18.dist-info → vectordb_bench-0.0.20.dist-info}/LICENSE +0 -0
  104. {vectordb_bench-0.0.18.dist-info → vectordb_bench-0.0.20.dist-info}/entry_points.txt +0 -0
  105. {vectordb_bench-0.0.18.dist-info → vectordb_bench-0.0.20.dist-info}/top_level.txt +0 -0
@@ -3,6 +3,7 @@ from typing import Annotated, TypedDict, Unpack
3
3
  import click
4
4
  from pydantic import SecretStr
5
5
 
6
+ from vectordb_bench.backend.clients import DB
6
7
  from vectordb_bench.cli.cli import (
7
8
  CommonTypedDict,
8
9
  HNSWFlavor3,
@@ -10,33 +11,41 @@ from vectordb_bench.cli.cli import (
10
11
  cli,
11
12
  click_parameter_decorators_from_typed_dict,
12
13
  run,
13
-
14
14
  )
15
- from vectordb_bench.backend.clients import DB
16
15
 
17
16
  DBTYPE = DB.Milvus
18
17
 
19
18
 
20
19
  class MilvusTypedDict(TypedDict):
21
20
  uri: Annotated[
22
- str, click.option("--uri", type=str, help="uri connection string", required=True)
21
+ str,
22
+ click.option("--uri", type=str, help="uri connection string", required=True),
23
+ ]
24
+ user_name: Annotated[
25
+ str | None,
26
+ click.option("--user-name", type=str, help="Db username", required=False),
27
+ ]
28
+ password: Annotated[
29
+ str | None,
30
+ click.option("--password", type=str, help="Db password", required=False),
23
31
  ]
24
32
 
25
33
 
26
- class MilvusAutoIndexTypedDict(CommonTypedDict, MilvusTypedDict):
27
- ...
34
+ class MilvusAutoIndexTypedDict(CommonTypedDict, MilvusTypedDict): ...
28
35
 
29
36
 
30
37
  @cli.command()
31
38
  @click_parameter_decorators_from_typed_dict(MilvusAutoIndexTypedDict)
32
39
  def MilvusAutoIndex(**parameters: Unpack[MilvusAutoIndexTypedDict]):
33
- from .config import MilvusConfig, AutoIndexConfig
40
+ from .config import AutoIndexConfig, MilvusConfig
34
41
 
35
42
  run(
36
43
  db=DBTYPE,
37
44
  db_config=MilvusConfig(
38
45
  db_label=parameters["db_label"],
39
46
  uri=SecretStr(parameters["uri"]),
47
+ user=parameters["user_name"],
48
+ password=SecretStr(parameters["password"]),
40
49
  ),
41
50
  db_case_config=AutoIndexConfig(),
42
51
  **parameters,
@@ -46,33 +55,36 @@ def MilvusAutoIndex(**parameters: Unpack[MilvusAutoIndexTypedDict]):
46
55
  @cli.command()
47
56
  @click_parameter_decorators_from_typed_dict(MilvusAutoIndexTypedDict)
48
57
  def MilvusFlat(**parameters: Unpack[MilvusAutoIndexTypedDict]):
49
- from .config import MilvusConfig, FLATConfig
58
+ from .config import FLATConfig, MilvusConfig
50
59
 
51
60
  run(
52
61
  db=DBTYPE,
53
62
  db_config=MilvusConfig(
54
63
  db_label=parameters["db_label"],
55
64
  uri=SecretStr(parameters["uri"]),
65
+ user=parameters["user_name"],
66
+ password=SecretStr(parameters["password"]),
56
67
  ),
57
68
  db_case_config=FLATConfig(),
58
69
  **parameters,
59
70
  )
60
71
 
61
72
 
62
- class MilvusHNSWTypedDict(CommonTypedDict, MilvusTypedDict, HNSWFlavor3):
63
- ...
73
+ class MilvusHNSWTypedDict(CommonTypedDict, MilvusTypedDict, HNSWFlavor3): ...
64
74
 
65
75
 
66
76
  @cli.command()
67
77
  @click_parameter_decorators_from_typed_dict(MilvusHNSWTypedDict)
68
78
  def MilvusHNSW(**parameters: Unpack[MilvusHNSWTypedDict]):
69
- from .config import MilvusConfig, HNSWConfig
79
+ from .config import HNSWConfig, MilvusConfig
70
80
 
71
81
  run(
72
82
  db=DBTYPE,
73
83
  db_config=MilvusConfig(
74
84
  db_label=parameters["db_label"],
75
85
  uri=SecretStr(parameters["uri"]),
86
+ user=parameters["user_name"],
87
+ password=SecretStr(parameters["password"]) if parameters["password"] else None,
76
88
  ),
77
89
  db_case_config=HNSWConfig(
78
90
  M=parameters["m"],
@@ -83,20 +95,21 @@ def MilvusHNSW(**parameters: Unpack[MilvusHNSWTypedDict]):
83
95
  )
84
96
 
85
97
 
86
- class MilvusIVFFlatTypedDict(CommonTypedDict, MilvusTypedDict, IVFFlatTypedDictN):
87
- ...
98
+ class MilvusIVFFlatTypedDict(CommonTypedDict, MilvusTypedDict, IVFFlatTypedDictN): ...
88
99
 
89
100
 
90
101
  @cli.command()
91
102
  @click_parameter_decorators_from_typed_dict(MilvusIVFFlatTypedDict)
92
103
  def MilvusIVFFlat(**parameters: Unpack[MilvusIVFFlatTypedDict]):
93
- from .config import MilvusConfig, IVFFlatConfig
104
+ from .config import IVFFlatConfig, MilvusConfig
94
105
 
95
106
  run(
96
107
  db=DBTYPE,
97
108
  db_config=MilvusConfig(
98
109
  db_label=parameters["db_label"],
99
110
  uri=SecretStr(parameters["uri"]),
111
+ user=parameters["user_name"],
112
+ password=SecretStr(parameters["password"]),
100
113
  ),
101
114
  db_case_config=IVFFlatConfig(
102
115
  nlist=parameters["nlist"],
@@ -109,13 +122,15 @@ def MilvusIVFFlat(**parameters: Unpack[MilvusIVFFlatTypedDict]):
109
122
  @cli.command()
110
123
  @click_parameter_decorators_from_typed_dict(MilvusIVFFlatTypedDict)
111
124
  def MilvusIVFSQ8(**parameters: Unpack[MilvusIVFFlatTypedDict]):
112
- from .config import MilvusConfig, IVFSQ8Config
125
+ from .config import IVFSQ8Config, MilvusConfig
113
126
 
114
127
  run(
115
128
  db=DBTYPE,
116
129
  db_config=MilvusConfig(
117
130
  db_label=parameters["db_label"],
118
131
  uri=SecretStr(parameters["uri"]),
132
+ user=parameters["user_name"],
133
+ password=SecretStr(parameters["password"]),
119
134
  ),
120
135
  db_case_config=IVFSQ8Config(
121
136
  nlist=parameters["nlist"],
@@ -126,23 +141,21 @@ def MilvusIVFSQ8(**parameters: Unpack[MilvusIVFFlatTypedDict]):
126
141
 
127
142
 
128
143
  class MilvusDISKANNTypedDict(CommonTypedDict, MilvusTypedDict):
129
- search_list: Annotated[
130
- str, click.option("--search-list",
131
- type=int,
132
- required=True)
133
- ]
144
+ search_list: Annotated[str, click.option("--search-list", type=int, required=True)]
134
145
 
135
146
 
136
147
  @cli.command()
137
148
  @click_parameter_decorators_from_typed_dict(MilvusDISKANNTypedDict)
138
149
  def MilvusDISKANN(**parameters: Unpack[MilvusDISKANNTypedDict]):
139
- from .config import MilvusConfig, DISKANNConfig
150
+ from .config import DISKANNConfig, MilvusConfig
140
151
 
141
152
  run(
142
153
  db=DBTYPE,
143
154
  db_config=MilvusConfig(
144
155
  db_label=parameters["db_label"],
145
156
  uri=SecretStr(parameters["uri"]),
157
+ user=parameters["user_name"],
158
+ password=SecretStr(parameters["password"]),
146
159
  ),
147
160
  db_case_config=DISKANNConfig(
148
161
  search_list=parameters["search_list"],
@@ -153,27 +166,24 @@ def MilvusDISKANN(**parameters: Unpack[MilvusDISKANNTypedDict]):
153
166
 
154
167
  class MilvusGPUIVFTypedDict(CommonTypedDict, MilvusTypedDict, MilvusIVFFlatTypedDict):
155
168
  cache_dataset_on_device: Annotated[
156
- str, click.option("--cache-dataset-on-device",
157
- type=str,
158
- required=True)
159
- ]
160
- refine_ratio: Annotated[
161
- str, click.option("--refine-ratio",
162
- type=float,
163
- required=True)
169
+ str,
170
+ click.option("--cache-dataset-on-device", type=str, required=True),
164
171
  ]
172
+ refine_ratio: Annotated[str, click.option("--refine-ratio", type=float, required=True)]
165
173
 
166
174
 
167
175
  @cli.command()
168
176
  @click_parameter_decorators_from_typed_dict(MilvusGPUIVFTypedDict)
169
177
  def MilvusGPUIVFFlat(**parameters: Unpack[MilvusGPUIVFTypedDict]):
170
- from .config import MilvusConfig, GPUIVFFlatConfig
178
+ from .config import GPUIVFFlatConfig, MilvusConfig
171
179
 
172
180
  run(
173
181
  db=DBTYPE,
174
182
  db_config=MilvusConfig(
175
183
  db_label=parameters["db_label"],
176
184
  uri=SecretStr(parameters["uri"]),
185
+ user=parameters["user_name"],
186
+ password=SecretStr(parameters["password"]),
177
187
  ),
178
188
  db_case_config=GPUIVFFlatConfig(
179
189
  nlist=parameters["nlist"],
@@ -185,29 +195,28 @@ def MilvusGPUIVFFlat(**parameters: Unpack[MilvusGPUIVFTypedDict]):
185
195
  )
186
196
 
187
197
 
188
- class MilvusGPUIVFPQTypedDict(CommonTypedDict, MilvusTypedDict, MilvusIVFFlatTypedDict, MilvusGPUIVFTypedDict):
189
- m: Annotated[
190
- str, click.option("--m",
191
- type=int, help="hnsw m",
192
- required=True)
193
- ]
194
- nbits: Annotated[
195
- str, click.option("--nbits",
196
- type=int,
197
- required=True)
198
- ]
198
+ class MilvusGPUIVFPQTypedDict(
199
+ CommonTypedDict,
200
+ MilvusTypedDict,
201
+ MilvusIVFFlatTypedDict,
202
+ MilvusGPUIVFTypedDict,
203
+ ):
204
+ m: Annotated[str, click.option("--m", type=int, help="hnsw m", required=True)]
205
+ nbits: Annotated[str, click.option("--nbits", type=int, required=True)]
199
206
 
200
207
 
201
208
  @cli.command()
202
209
  @click_parameter_decorators_from_typed_dict(MilvusGPUIVFPQTypedDict)
203
210
  def MilvusGPUIVFPQ(**parameters: Unpack[MilvusGPUIVFPQTypedDict]):
204
- from .config import MilvusConfig, GPUIVFPQConfig
211
+ from .config import GPUIVFPQConfig, MilvusConfig
205
212
 
206
213
  run(
207
214
  db=DBTYPE,
208
215
  db_config=MilvusConfig(
209
216
  db_label=parameters["db_label"],
210
217
  uri=SecretStr(parameters["uri"]),
218
+ user=parameters["user_name"],
219
+ password=SecretStr(parameters["password"]),
211
220
  ),
212
221
  db_case_config=GPUIVFPQConfig(
213
222
  nlist=parameters["nlist"],
@@ -223,57 +232,30 @@ def MilvusGPUIVFPQ(**parameters: Unpack[MilvusGPUIVFPQTypedDict]):
223
232
 
224
233
  class MilvusGPUCAGRATypedDict(CommonTypedDict, MilvusTypedDict, MilvusGPUIVFTypedDict):
225
234
  intermediate_graph_degree: Annotated[
226
- str, click.option("--intermediate-graph-degree",
227
- type=int,
228
- required=True)
229
- ]
230
- graph_degree: Annotated[
231
- str, click.option("--graph-degree",
232
- type=int,
233
- required=True)
234
- ]
235
- build_algo: Annotated[
236
- str, click.option("--build_algo",
237
- type=str,
238
- required=True)
239
- ]
240
- team_size: Annotated[
241
- str, click.option("--team-size",
242
- type=int,
243
- required=True)
244
- ]
245
- search_width: Annotated[
246
- str, click.option("--search-width",
247
- type=int,
248
- required=True)
249
- ]
250
- itopk_size: Annotated[
251
- str, click.option("--itopk-size",
252
- type=int,
253
- required=True)
254
- ]
255
- min_iterations: Annotated[
256
- str, click.option("--min-iterations",
257
- type=int,
258
- required=True)
259
- ]
260
- max_iterations: Annotated[
261
- str, click.option("--max-iterations",
262
- type=int,
263
- required=True)
235
+ str,
236
+ click.option("--intermediate-graph-degree", type=int, required=True),
264
237
  ]
238
+ graph_degree: Annotated[str, click.option("--graph-degree", type=int, required=True)]
239
+ build_algo: Annotated[str, click.option("--build_algo", type=str, required=True)]
240
+ team_size: Annotated[str, click.option("--team-size", type=int, required=True)]
241
+ search_width: Annotated[str, click.option("--search-width", type=int, required=True)]
242
+ itopk_size: Annotated[str, click.option("--itopk-size", type=int, required=True)]
243
+ min_iterations: Annotated[str, click.option("--min-iterations", type=int, required=True)]
244
+ max_iterations: Annotated[str, click.option("--max-iterations", type=int, required=True)]
265
245
 
266
246
 
267
247
  @cli.command()
268
248
  @click_parameter_decorators_from_typed_dict(MilvusGPUCAGRATypedDict)
269
249
  def MilvusGPUCAGRA(**parameters: Unpack[MilvusGPUCAGRATypedDict]):
270
- from .config import MilvusConfig, GPUCAGRAConfig
250
+ from .config import GPUCAGRAConfig, MilvusConfig
271
251
 
272
252
  run(
273
253
  db=DBTYPE,
274
254
  db_config=MilvusConfig(
275
255
  db_label=parameters["db_label"],
276
256
  uri=SecretStr(parameters["uri"]),
257
+ user=parameters["user_name"],
258
+ password=SecretStr(parameters["password"]),
277
259
  ),
278
260
  db_case_config=GPUCAGRAConfig(
279
261
  intermediate_graph_degree=parameters["intermediate_graph_degree"],
@@ -1,12 +1,31 @@
1
- from pydantic import BaseModel, SecretStr
2
- from ..api import DBConfig, DBCaseConfig, MetricType, IndexType
1
+ from pydantic import BaseModel, SecretStr, validator
2
+
3
+ from ..api import DBCaseConfig, DBConfig, IndexType, MetricType
3
4
 
4
5
 
5
6
  class MilvusConfig(DBConfig):
6
7
  uri: SecretStr = "http://localhost:19530"
8
+ user: str | None = None
9
+ password: SecretStr | None = None
7
10
 
8
11
  def to_dict(self) -> dict:
9
- return {"uri": self.uri.get_secret_value()}
12
+ return {
13
+ "uri": self.uri.get_secret_value(),
14
+ "user": self.user if self.user else None,
15
+ "password": self.password.get_secret_value() if self.password else None,
16
+ }
17
+
18
+ @validator("*")
19
+ def not_empty_field(cls, v: any, field: any):
20
+ if (
21
+ field.name in cls.common_short_configs()
22
+ or field.name in cls.common_long_configs()
23
+ or field.name in ["user", "password"]
24
+ ):
25
+ return v
26
+ if isinstance(v, str | SecretStr) and len(v) == 0:
27
+ raise ValueError("Empty string!")
28
+ return v
10
29
 
11
30
 
12
31
  class MilvusIndexConfig(BaseModel):
@@ -14,10 +33,14 @@ class MilvusIndexConfig(BaseModel):
14
33
 
15
34
  index: IndexType
16
35
  metric_type: MetricType | None = None
17
-
36
+
18
37
  @property
19
38
  def is_gpu_index(self) -> bool:
20
- return self.index in [IndexType.GPU_CAGRA, IndexType.GPU_IVF_FLAT, IndexType.GPU_IVF_PQ]
39
+ return self.index in [
40
+ IndexType.GPU_CAGRA,
41
+ IndexType.GPU_IVF_FLAT,
42
+ IndexType.GPU_IVF_PQ,
43
+ ]
21
44
 
22
45
  def parse_metric(self) -> str:
23
46
  if not self.metric_type:
@@ -99,7 +122,8 @@ class IVFFlatConfig(MilvusIndexConfig, DBCaseConfig):
99
122
  "metric_type": self.parse_metric(),
100
123
  "params": {"nprobe": self.nprobe},
101
124
  }
102
-
125
+
126
+
103
127
  class IVFSQ8Config(MilvusIndexConfig, DBCaseConfig):
104
128
  nlist: int
105
129
  nprobe: int | None = None
@@ -196,7 +220,7 @@ class GPUCAGRAConfig(MilvusIndexConfig, DBCaseConfig):
196
220
  search_width: int = 4
197
221
  min_iterations: int = 0
198
222
  max_iterations: int = 0
199
- build_algo: str = "IVF_PQ" # IVF_PQ; NN_DESCENT;
223
+ build_algo: str = "IVF_PQ" # IVF_PQ; NN_DESCENT;
200
224
  cache_dataset_on_device: str
201
225
  refine_ratio: float | None = None
202
226
  index: IndexType = IndexType.GPU_CAGRA
@@ -2,19 +2,18 @@
2
2
 
3
3
  import logging
4
4
  import time
5
+ from collections.abc import Iterable
5
6
  from contextlib import contextmanager
6
- from typing import Iterable
7
7
 
8
- from pymilvus import Collection, utility
9
- from pymilvus import CollectionSchema, DataType, FieldSchema, MilvusException
8
+ from pymilvus import Collection, CollectionSchema, DataType, FieldSchema, MilvusException, utility
10
9
 
11
- from ..api import VectorDB, IndexType
10
+ from ..api import VectorDB
12
11
  from .config import MilvusIndexConfig
13
12
 
14
-
15
13
  log = logging.getLogger(__name__)
16
14
 
17
- MILVUS_LOAD_REQS_SIZE = 1.5 * 1024 *1024
15
+ MILVUS_LOAD_REQS_SIZE = 1.5 * 1024 * 1024
16
+
18
17
 
19
18
  class Milvus(VectorDB):
20
19
  def __init__(
@@ -32,7 +31,7 @@ class Milvus(VectorDB):
32
31
  self.db_config = db_config
33
32
  self.case_config = db_case_config
34
33
  self.collection_name = collection_name
35
- self.batch_size = int(MILVUS_LOAD_REQS_SIZE / (dim *4))
34
+ self.batch_size = int(MILVUS_LOAD_REQS_SIZE / (dim * 4))
36
35
 
37
36
  self._primary_field = "pk"
38
37
  self._scalar_field = "id"
@@ -40,6 +39,7 @@ class Milvus(VectorDB):
40
39
  self._index_name = "vector_idx"
41
40
 
42
41
  from pymilvus import connections
42
+
43
43
  connections.connect(**self.db_config, timeout=30)
44
44
  if drop_old and utility.has_collection(self.collection_name):
45
45
  log.info(f"{self.name} client drop_old collection: {self.collection_name}")
@@ -49,7 +49,7 @@ class Milvus(VectorDB):
49
49
  fields = [
50
50
  FieldSchema(self._primary_field, DataType.INT64, is_primary=True),
51
51
  FieldSchema(self._scalar_field, DataType.INT64),
52
- FieldSchema(self._vector_field, DataType.FLOAT_VECTOR, dim=dim)
52
+ FieldSchema(self._vector_field, DataType.FLOAT_VECTOR, dim=dim),
53
53
  ]
54
54
 
55
55
  log.info(f"{self.name} create collection: {self.collection_name}")
@@ -66,8 +66,7 @@ class Milvus(VectorDB):
66
66
  self.case_config.index_param(),
67
67
  index_name=self._index_name,
68
68
  )
69
- if kwargs.get("pre_load") is True:
70
- self._pre_load(col)
69
+ col.load()
71
70
 
72
71
  connections.disconnect("default")
73
72
 
@@ -80,6 +79,7 @@ class Milvus(VectorDB):
80
79
  >>> self.search_embedding()
81
80
  """
82
81
  from pymilvus import connections
82
+
83
83
  self.col: Collection | None = None
84
84
 
85
85
  connections.connect(**self.db_config, timeout=60)
@@ -90,16 +90,15 @@ class Milvus(VectorDB):
90
90
  connections.disconnect("default")
91
91
 
92
92
  def _optimize(self):
93
- self._post_insert()
94
93
  log.info(f"{self.name} optimizing before search")
94
+ self._post_insert()
95
95
  try:
96
- self.col.load()
96
+ self.col.load(refresh=True)
97
97
  except Exception as e:
98
98
  log.warning(f"{self.name} optimize error: {e}")
99
99
  raise e from None
100
100
 
101
101
  def _post_insert(self):
102
- log.info(f"{self.name} post insert before optimize")
103
102
  try:
104
103
  self.col.flush()
105
104
  # wait for index done and load refresh
@@ -110,6 +109,7 @@ class Milvus(VectorDB):
110
109
  )
111
110
 
112
111
  utility.wait_for_index_building_complete(self.collection_name)
112
+
113
113
  def wait_index():
114
114
  while True:
115
115
  progress = utility.index_building_progress(self.collection_name)
@@ -122,18 +122,17 @@ class Milvus(VectorDB):
122
122
  # Skip compaction if use GPU indexType
123
123
  if self.case_config.is_gpu_index:
124
124
  log.debug("skip compaction for gpu index type.")
125
- else :
125
+ else:
126
126
  try:
127
127
  self.col.compact()
128
128
  self.col.wait_for_compaction_completed()
129
129
  except Exception as e:
130
130
  log.warning(f"{self.name} compact error: {e}")
131
- if hasattr(e, 'code'):
132
- if e.code().name == 'PERMISSION_DENIED':
133
- log.warning(f"Skip compact due to permission denied.")
134
- pass
131
+ if hasattr(e, "code"):
132
+ if e.code().name == "PERMISSION_DENIED":
133
+ log.warning("Skip compact due to permission denied.")
135
134
  else:
136
- raise e
135
+ raise e from e
137
136
  wait_index()
138
137
  except Exception as e:
139
138
  log.warning(f"{self.name} optimize error: {e}")
@@ -158,7 +157,6 @@ class Milvus(VectorDB):
158
157
  log.warning(f"{self.name} pre load error: {e}")
159
158
  raise e from None
160
159
 
161
-
162
160
  def optimize(self):
163
161
  assert self.col, "Please call self.init() before"
164
162
  self._optimize()
@@ -166,7 +164,7 @@ class Milvus(VectorDB):
166
164
  def need_normalize_cosine(self) -> bool:
167
165
  """Wheather this database need to normalize dataset to support COSINE"""
168
166
  if self.case_config.is_gpu_index:
169
- log.info(f"current gpu_index only supports IP / L2, cosine dataset need normalize.")
167
+ log.info("current gpu_index only supports IP / L2, cosine dataset need normalize.")
170
168
  return True
171
169
 
172
170
  return False
@@ -186,9 +184,9 @@ class Milvus(VectorDB):
186
184
  for batch_start_offset in range(0, len(embeddings), self.batch_size):
187
185
  batch_end_offset = min(batch_start_offset + self.batch_size, len(embeddings))
188
186
  insert_data = [
189
- metadata[batch_start_offset : batch_end_offset],
190
- metadata[batch_start_offset : batch_end_offset],
191
- embeddings[batch_start_offset : batch_end_offset],
187
+ metadata[batch_start_offset:batch_end_offset],
188
+ metadata[batch_start_offset:batch_end_offset],
189
+ embeddings[batch_start_offset:batch_end_offset],
192
190
  ]
193
191
  res = self.col.insert(insert_data)
194
192
  insert_count += len(res.primary_keys)
@@ -219,5 +217,4 @@ class Milvus(VectorDB):
219
217
  )
220
218
 
221
219
  # Organize results.
222
- ret = [result.id for result in res[0]]
223
- return ret
220
+ return [result.id for result in res[0]]
@@ -1,57 +1,63 @@
1
- import click
2
1
  import os
2
+ from typing import Annotated, Unpack
3
+
4
+ import click
3
5
  from pydantic import SecretStr
4
6
 
7
+ from vectordb_bench.backend.clients import DB
8
+
5
9
  from ....cli.cli import (
6
10
  CommonTypedDict,
7
11
  cli,
8
12
  click_parameter_decorators_from_typed_dict,
9
13
  run,
10
14
  )
11
- from typing import Annotated, Optional, Unpack
12
- from vectordb_bench.backend.clients import DB
13
15
 
14
16
 
15
17
  class PgDiskAnnTypedDict(CommonTypedDict):
16
18
  user_name: Annotated[
17
- str, click.option("--user-name", type=str, help="Db username", required=True)
19
+ str,
20
+ click.option("--user-name", type=str, help="Db username", required=True),
18
21
  ]
19
22
  password: Annotated[
20
23
  str,
21
- click.option("--password",
22
- type=str,
23
- help="Postgres database password",
24
- default=lambda: os.environ.get("POSTGRES_PASSWORD", ""),
25
- show_default="$POSTGRES_PASSWORD",
26
- ),
24
+ click.option(
25
+ "--password",
26
+ type=str,
27
+ help="Postgres database password",
28
+ default=lambda: os.environ.get("POSTGRES_PASSWORD", ""),
29
+ show_default="$POSTGRES_PASSWORD",
30
+ ),
27
31
  ]
28
32
 
29
- host: Annotated[
30
- str, click.option("--host", type=str, help="Db host", required=True)
31
- ]
32
- db_name: Annotated[
33
- str, click.option("--db-name", type=str, help="Db name", required=True)
34
- ]
33
+ host: Annotated[str, click.option("--host", type=str, help="Db host", required=True)]
34
+ db_name: Annotated[str, click.option("--db-name", type=str, help="Db name", required=True)]
35
35
  max_neighbors: Annotated[
36
36
  int,
37
37
  click.option(
38
- "--max-neighbors", type=int, help="PgDiskAnn max neighbors",
38
+ "--max-neighbors",
39
+ type=int,
40
+ help="PgDiskAnn max neighbors",
39
41
  ),
40
42
  ]
41
43
  l_value_ib: Annotated[
42
44
  int,
43
45
  click.option(
44
- "--l-value-ib", type=int, help="PgDiskAnn l_value_ib",
46
+ "--l-value-ib",
47
+ type=int,
48
+ help="PgDiskAnn l_value_ib",
45
49
  ),
46
50
  ]
47
51
  l_value_is: Annotated[
48
52
  float,
49
53
  click.option(
50
- "--l-value-is", type=float, help="PgDiskAnn l_value_is",
54
+ "--l-value-is",
55
+ type=float,
56
+ help="PgDiskAnn l_value_is",
51
57
  ),
52
58
  ]
53
59
  maintenance_work_mem: Annotated[
54
- Optional[str],
60
+ str | None,
55
61
  click.option(
56
62
  "--maintenance-work-mem",
57
63
  type=str,
@@ -63,7 +69,7 @@ class PgDiskAnnTypedDict(CommonTypedDict):
63
69
  ),
64
70
  ]
65
71
  max_parallel_workers: Annotated[
66
- Optional[int],
72
+ int | None,
67
73
  click.option(
68
74
  "--max-parallel-workers",
69
75
  type=int,
@@ -72,6 +78,7 @@ class PgDiskAnnTypedDict(CommonTypedDict):
72
78
  ),
73
79
  ]
74
80
 
81
+
75
82
  @cli.command()
76
83
  @click_parameter_decorators_from_typed_dict(PgDiskAnnTypedDict)
77
84
  def PgDiskAnn(
@@ -96,4 +103,4 @@ def PgDiskAnn(
96
103
  maintenance_work_mem=parameters["maintenance_work_mem"],
97
104
  ),
98
105
  **parameters,
99
- )
106
+ )