vectordb-bench 0.0.19__py3-none-any.whl → 0.0.21__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (105) hide show
  1. vectordb_bench/__init__.py +49 -24
  2. vectordb_bench/__main__.py +4 -3
  3. vectordb_bench/backend/assembler.py +12 -13
  4. vectordb_bench/backend/cases.py +55 -45
  5. vectordb_bench/backend/clients/__init__.py +85 -14
  6. vectordb_bench/backend/clients/aliyun_elasticsearch/aliyun_elasticsearch.py +1 -2
  7. vectordb_bench/backend/clients/aliyun_elasticsearch/config.py +3 -4
  8. vectordb_bench/backend/clients/aliyun_opensearch/aliyun_opensearch.py +112 -77
  9. vectordb_bench/backend/clients/aliyun_opensearch/config.py +6 -7
  10. vectordb_bench/backend/clients/alloydb/alloydb.py +59 -84
  11. vectordb_bench/backend/clients/alloydb/cli.py +51 -34
  12. vectordb_bench/backend/clients/alloydb/config.py +30 -30
  13. vectordb_bench/backend/clients/api.py +13 -24
  14. vectordb_bench/backend/clients/aws_opensearch/aws_opensearch.py +50 -54
  15. vectordb_bench/backend/clients/aws_opensearch/cli.py +4 -7
  16. vectordb_bench/backend/clients/aws_opensearch/config.py +13 -9
  17. vectordb_bench/backend/clients/aws_opensearch/run.py +69 -59
  18. vectordb_bench/backend/clients/chroma/chroma.py +39 -40
  19. vectordb_bench/backend/clients/chroma/config.py +4 -2
  20. vectordb_bench/backend/clients/elastic_cloud/config.py +5 -5
  21. vectordb_bench/backend/clients/elastic_cloud/elastic_cloud.py +24 -26
  22. vectordb_bench/backend/clients/memorydb/cli.py +8 -8
  23. vectordb_bench/backend/clients/memorydb/config.py +2 -2
  24. vectordb_bench/backend/clients/memorydb/memorydb.py +67 -58
  25. vectordb_bench/backend/clients/milvus/cli.py +41 -83
  26. vectordb_bench/backend/clients/milvus/config.py +18 -8
  27. vectordb_bench/backend/clients/milvus/milvus.py +19 -39
  28. vectordb_bench/backend/clients/pgdiskann/cli.py +29 -22
  29. vectordb_bench/backend/clients/pgdiskann/config.py +29 -26
  30. vectordb_bench/backend/clients/pgdiskann/pgdiskann.py +56 -77
  31. vectordb_bench/backend/clients/pgvecto_rs/cli.py +9 -11
  32. vectordb_bench/backend/clients/pgvecto_rs/config.py +8 -14
  33. vectordb_bench/backend/clients/pgvecto_rs/pgvecto_rs.py +34 -43
  34. vectordb_bench/backend/clients/pgvector/cli.py +40 -31
  35. vectordb_bench/backend/clients/pgvector/config.py +63 -73
  36. vectordb_bench/backend/clients/pgvector/pgvector.py +98 -104
  37. vectordb_bench/backend/clients/pgvectorscale/cli.py +38 -24
  38. vectordb_bench/backend/clients/pgvectorscale/config.py +14 -15
  39. vectordb_bench/backend/clients/pgvectorscale/pgvectorscale.py +39 -49
  40. vectordb_bench/backend/clients/pinecone/config.py +1 -0
  41. vectordb_bench/backend/clients/pinecone/pinecone.py +15 -25
  42. vectordb_bench/backend/clients/qdrant_cloud/config.py +11 -10
  43. vectordb_bench/backend/clients/qdrant_cloud/qdrant_cloud.py +41 -35
  44. vectordb_bench/backend/clients/redis/cli.py +6 -12
  45. vectordb_bench/backend/clients/redis/config.py +7 -5
  46. vectordb_bench/backend/clients/redis/redis.py +95 -62
  47. vectordb_bench/backend/clients/test/cli.py +2 -3
  48. vectordb_bench/backend/clients/test/config.py +2 -2
  49. vectordb_bench/backend/clients/test/test.py +5 -9
  50. vectordb_bench/backend/clients/weaviate_cloud/cli.py +3 -4
  51. vectordb_bench/backend/clients/weaviate_cloud/config.py +2 -2
  52. vectordb_bench/backend/clients/weaviate_cloud/weaviate_cloud.py +37 -26
  53. vectordb_bench/backend/clients/zilliz_cloud/cli.py +14 -11
  54. vectordb_bench/backend/clients/zilliz_cloud/config.py +2 -4
  55. vectordb_bench/backend/clients/zilliz_cloud/zilliz_cloud.py +1 -1
  56. vectordb_bench/backend/data_source.py +18 -14
  57. vectordb_bench/backend/dataset.py +47 -27
  58. vectordb_bench/backend/result_collector.py +2 -3
  59. vectordb_bench/backend/runner/__init__.py +4 -6
  60. vectordb_bench/backend/runner/mp_runner.py +56 -23
  61. vectordb_bench/backend/runner/rate_runner.py +30 -19
  62. vectordb_bench/backend/runner/read_write_runner.py +46 -22
  63. vectordb_bench/backend/runner/serial_runner.py +81 -46
  64. vectordb_bench/backend/runner/util.py +4 -3
  65. vectordb_bench/backend/task_runner.py +92 -92
  66. vectordb_bench/backend/utils.py +17 -10
  67. vectordb_bench/base.py +0 -1
  68. vectordb_bench/cli/cli.py +65 -60
  69. vectordb_bench/cli/vectordbbench.py +6 -7
  70. vectordb_bench/frontend/components/check_results/charts.py +8 -19
  71. vectordb_bench/frontend/components/check_results/data.py +4 -16
  72. vectordb_bench/frontend/components/check_results/filters.py +8 -16
  73. vectordb_bench/frontend/components/check_results/nav.py +4 -4
  74. vectordb_bench/frontend/components/check_results/priceTable.py +1 -3
  75. vectordb_bench/frontend/components/check_results/stPageConfig.py +2 -1
  76. vectordb_bench/frontend/components/concurrent/charts.py +12 -12
  77. vectordb_bench/frontend/components/custom/displayCustomCase.py +17 -11
  78. vectordb_bench/frontend/components/custom/displaypPrams.py +4 -2
  79. vectordb_bench/frontend/components/custom/getCustomConfig.py +1 -2
  80. vectordb_bench/frontend/components/custom/initStyle.py +1 -1
  81. vectordb_bench/frontend/components/get_results/saveAsImage.py +2 -0
  82. vectordb_bench/frontend/components/run_test/caseSelector.py +3 -9
  83. vectordb_bench/frontend/components/run_test/dbConfigSetting.py +1 -4
  84. vectordb_bench/frontend/components/run_test/dbSelector.py +1 -1
  85. vectordb_bench/frontend/components/run_test/generateTasks.py +8 -8
  86. vectordb_bench/frontend/components/run_test/submitTask.py +14 -18
  87. vectordb_bench/frontend/components/tables/data.py +3 -6
  88. vectordb_bench/frontend/config/dbCaseConfigs.py +51 -84
  89. vectordb_bench/frontend/pages/concurrent.py +3 -5
  90. vectordb_bench/frontend/pages/custom.py +30 -9
  91. vectordb_bench/frontend/pages/quries_per_dollar.py +3 -3
  92. vectordb_bench/frontend/pages/run_test.py +3 -7
  93. vectordb_bench/frontend/utils.py +1 -1
  94. vectordb_bench/frontend/vdb_benchmark.py +4 -6
  95. vectordb_bench/interface.py +45 -24
  96. vectordb_bench/log_util.py +59 -64
  97. vectordb_bench/metric.py +10 -11
  98. vectordb_bench/models.py +26 -43
  99. {vectordb_bench-0.0.19.dist-info → vectordb_bench-0.0.21.dist-info}/METADATA +22 -15
  100. vectordb_bench-0.0.21.dist-info/RECORD +135 -0
  101. {vectordb_bench-0.0.19.dist-info → vectordb_bench-0.0.21.dist-info}/WHEEL +1 -1
  102. vectordb_bench-0.0.19.dist-info/RECORD +0 -135
  103. {vectordb_bench-0.0.19.dist-info → vectordb_bench-0.0.21.dist-info}/LICENSE +0 -0
  104. {vectordb_bench-0.0.19.dist-info → vectordb_bench-0.0.21.dist-info}/entry_points.txt +0 -0
  105. {vectordb_bench-0.0.19.dist-info → vectordb_bench-0.0.21.dist-info}/top_level.txt +0 -0
@@ -1,8 +1,9 @@
1
- from typing import Annotated, TypedDict, Unpack, Optional
1
+ from typing import Annotated, TypedDict, Unpack
2
2
 
3
3
  import click
4
4
  from pydantic import SecretStr
5
5
 
6
+ from vectordb_bench.backend.clients import DB
6
7
  from vectordb_bench.cli.cli import (
7
8
  CommonTypedDict,
8
9
  HNSWFlavor3,
@@ -10,33 +11,33 @@ from vectordb_bench.cli.cli import (
10
11
  cli,
11
12
  click_parameter_decorators_from_typed_dict,
12
13
  run,
13
-
14
14
  )
15
- from vectordb_bench.backend.clients import DB
16
15
 
17
16
  DBTYPE = DB.Milvus
18
17
 
19
18
 
20
19
  class MilvusTypedDict(TypedDict):
21
20
  uri: Annotated[
22
- str, click.option("--uri", type=str, help="uri connection string", required=True)
21
+ str,
22
+ click.option("--uri", type=str, help="uri connection string", required=True),
23
23
  ]
24
24
  user_name: Annotated[
25
- Optional[str], click.option("--user-name", type=str, help="Db username", required=False)
25
+ str | None,
26
+ click.option("--user-name", type=str, help="Db username", required=False),
26
27
  ]
27
28
  password: Annotated[
28
- Optional[str], click.option("--password", type=str, help="Db password", required=False)
29
+ str | None,
30
+ click.option("--password", type=str, help="Db password", required=False),
29
31
  ]
30
32
 
31
33
 
32
- class MilvusAutoIndexTypedDict(CommonTypedDict, MilvusTypedDict):
33
- ...
34
+ class MilvusAutoIndexTypedDict(CommonTypedDict, MilvusTypedDict): ...
34
35
 
35
36
 
36
37
  @cli.command()
37
38
  @click_parameter_decorators_from_typed_dict(MilvusAutoIndexTypedDict)
38
39
  def MilvusAutoIndex(**parameters: Unpack[MilvusAutoIndexTypedDict]):
39
- from .config import MilvusConfig, AutoIndexConfig
40
+ from .config import AutoIndexConfig, MilvusConfig
40
41
 
41
42
  run(
42
43
  db=DBTYPE,
@@ -54,7 +55,7 @@ def MilvusAutoIndex(**parameters: Unpack[MilvusAutoIndexTypedDict]):
54
55
  @cli.command()
55
56
  @click_parameter_decorators_from_typed_dict(MilvusAutoIndexTypedDict)
56
57
  def MilvusFlat(**parameters: Unpack[MilvusAutoIndexTypedDict]):
57
- from .config import MilvusConfig, FLATConfig
58
+ from .config import FLATConfig, MilvusConfig
58
59
 
59
60
  run(
60
61
  db=DBTYPE,
@@ -69,14 +70,13 @@ def MilvusFlat(**parameters: Unpack[MilvusAutoIndexTypedDict]):
69
70
  )
70
71
 
71
72
 
72
- class MilvusHNSWTypedDict(CommonTypedDict, MilvusTypedDict, HNSWFlavor3):
73
- ...
73
+ class MilvusHNSWTypedDict(CommonTypedDict, MilvusTypedDict, HNSWFlavor3): ...
74
74
 
75
75
 
76
76
  @cli.command()
77
77
  @click_parameter_decorators_from_typed_dict(MilvusHNSWTypedDict)
78
78
  def MilvusHNSW(**parameters: Unpack[MilvusHNSWTypedDict]):
79
- from .config import MilvusConfig, HNSWConfig
79
+ from .config import HNSWConfig, MilvusConfig
80
80
 
81
81
  run(
82
82
  db=DBTYPE,
@@ -95,14 +95,13 @@ def MilvusHNSW(**parameters: Unpack[MilvusHNSWTypedDict]):
95
95
  )
96
96
 
97
97
 
98
- class MilvusIVFFlatTypedDict(CommonTypedDict, MilvusTypedDict, IVFFlatTypedDictN):
99
- ...
98
+ class MilvusIVFFlatTypedDict(CommonTypedDict, MilvusTypedDict, IVFFlatTypedDictN): ...
100
99
 
101
100
 
102
101
  @cli.command()
103
102
  @click_parameter_decorators_from_typed_dict(MilvusIVFFlatTypedDict)
104
103
  def MilvusIVFFlat(**parameters: Unpack[MilvusIVFFlatTypedDict]):
105
- from .config import MilvusConfig, IVFFlatConfig
104
+ from .config import IVFFlatConfig, MilvusConfig
106
105
 
107
106
  run(
108
107
  db=DBTYPE,
@@ -123,7 +122,7 @@ def MilvusIVFFlat(**parameters: Unpack[MilvusIVFFlatTypedDict]):
123
122
  @cli.command()
124
123
  @click_parameter_decorators_from_typed_dict(MilvusIVFFlatTypedDict)
125
124
  def MilvusIVFSQ8(**parameters: Unpack[MilvusIVFFlatTypedDict]):
126
- from .config import MilvusConfig, IVFSQ8Config
125
+ from .config import IVFSQ8Config, MilvusConfig
127
126
 
128
127
  run(
129
128
  db=DBTYPE,
@@ -142,17 +141,13 @@ def MilvusIVFSQ8(**parameters: Unpack[MilvusIVFFlatTypedDict]):
142
141
 
143
142
 
144
143
  class MilvusDISKANNTypedDict(CommonTypedDict, MilvusTypedDict):
145
- search_list: Annotated[
146
- str, click.option("--search-list",
147
- type=int,
148
- required=True)
149
- ]
144
+ search_list: Annotated[str, click.option("--search-list", type=int, required=True)]
150
145
 
151
146
 
152
147
  @cli.command()
153
148
  @click_parameter_decorators_from_typed_dict(MilvusDISKANNTypedDict)
154
149
  def MilvusDISKANN(**parameters: Unpack[MilvusDISKANNTypedDict]):
155
- from .config import MilvusConfig, DISKANNConfig
150
+ from .config import DISKANNConfig, MilvusConfig
156
151
 
157
152
  run(
158
153
  db=DBTYPE,
@@ -171,21 +166,16 @@ def MilvusDISKANN(**parameters: Unpack[MilvusDISKANNTypedDict]):
171
166
 
172
167
  class MilvusGPUIVFTypedDict(CommonTypedDict, MilvusTypedDict, MilvusIVFFlatTypedDict):
173
168
  cache_dataset_on_device: Annotated[
174
- str, click.option("--cache-dataset-on-device",
175
- type=str,
176
- required=True)
177
- ]
178
- refine_ratio: Annotated[
179
- str, click.option("--refine-ratio",
180
- type=float,
181
- required=True)
169
+ str,
170
+ click.option("--cache-dataset-on-device", type=str, required=True),
182
171
  ]
172
+ refine_ratio: Annotated[str, click.option("--refine-ratio", type=float, required=True)]
183
173
 
184
174
 
185
175
  @cli.command()
186
176
  @click_parameter_decorators_from_typed_dict(MilvusGPUIVFTypedDict)
187
177
  def MilvusGPUIVFFlat(**parameters: Unpack[MilvusGPUIVFTypedDict]):
188
- from .config import MilvusConfig, GPUIVFFlatConfig
178
+ from .config import GPUIVFFlatConfig, MilvusConfig
189
179
 
190
180
  run(
191
181
  db=DBTYPE,
@@ -205,23 +195,20 @@ def MilvusGPUIVFFlat(**parameters: Unpack[MilvusGPUIVFTypedDict]):
205
195
  )
206
196
 
207
197
 
208
- class MilvusGPUIVFPQTypedDict(CommonTypedDict, MilvusTypedDict, MilvusIVFFlatTypedDict, MilvusGPUIVFTypedDict):
209
- m: Annotated[
210
- str, click.option("--m",
211
- type=int, help="hnsw m",
212
- required=True)
213
- ]
214
- nbits: Annotated[
215
- str, click.option("--nbits",
216
- type=int,
217
- required=True)
218
- ]
198
+ class MilvusGPUIVFPQTypedDict(
199
+ CommonTypedDict,
200
+ MilvusTypedDict,
201
+ MilvusIVFFlatTypedDict,
202
+ MilvusGPUIVFTypedDict,
203
+ ):
204
+ m: Annotated[str, click.option("--m", type=int, help="hnsw m", required=True)]
205
+ nbits: Annotated[str, click.option("--nbits", type=int, required=True)]
219
206
 
220
207
 
221
208
  @cli.command()
222
209
  @click_parameter_decorators_from_typed_dict(MilvusGPUIVFPQTypedDict)
223
210
  def MilvusGPUIVFPQ(**parameters: Unpack[MilvusGPUIVFPQTypedDict]):
224
- from .config import MilvusConfig, GPUIVFPQConfig
211
+ from .config import GPUIVFPQConfig, MilvusConfig
225
212
 
226
213
  run(
227
214
  db=DBTYPE,
@@ -245,51 +232,22 @@ def MilvusGPUIVFPQ(**parameters: Unpack[MilvusGPUIVFPQTypedDict]):
245
232
 
246
233
  class MilvusGPUCAGRATypedDict(CommonTypedDict, MilvusTypedDict, MilvusGPUIVFTypedDict):
247
234
  intermediate_graph_degree: Annotated[
248
- str, click.option("--intermediate-graph-degree",
249
- type=int,
250
- required=True)
251
- ]
252
- graph_degree: Annotated[
253
- str, click.option("--graph-degree",
254
- type=int,
255
- required=True)
256
- ]
257
- build_algo: Annotated[
258
- str, click.option("--build_algo",
259
- type=str,
260
- required=True)
261
- ]
262
- team_size: Annotated[
263
- str, click.option("--team-size",
264
- type=int,
265
- required=True)
266
- ]
267
- search_width: Annotated[
268
- str, click.option("--search-width",
269
- type=int,
270
- required=True)
271
- ]
272
- itopk_size: Annotated[
273
- str, click.option("--itopk-size",
274
- type=int,
275
- required=True)
276
- ]
277
- min_iterations: Annotated[
278
- str, click.option("--min-iterations",
279
- type=int,
280
- required=True)
281
- ]
282
- max_iterations: Annotated[
283
- str, click.option("--max-iterations",
284
- type=int,
285
- required=True)
235
+ str,
236
+ click.option("--intermediate-graph-degree", type=int, required=True),
286
237
  ]
238
+ graph_degree: Annotated[str, click.option("--graph-degree", type=int, required=True)]
239
+ build_algo: Annotated[str, click.option("--build_algo", type=str, required=True)]
240
+ team_size: Annotated[str, click.option("--team-size", type=int, required=True)]
241
+ search_width: Annotated[str, click.option("--search-width", type=int, required=True)]
242
+ itopk_size: Annotated[str, click.option("--itopk-size", type=int, required=True)]
243
+ min_iterations: Annotated[str, click.option("--min-iterations", type=int, required=True)]
244
+ max_iterations: Annotated[str, click.option("--max-iterations", type=int, required=True)]
287
245
 
288
246
 
289
247
  @cli.command()
290
248
  @click_parameter_decorators_from_typed_dict(MilvusGPUCAGRATypedDict)
291
249
  def MilvusGPUCAGRA(**parameters: Unpack[MilvusGPUCAGRATypedDict]):
292
- from .config import MilvusConfig, GPUCAGRAConfig
250
+ from .config import GPUCAGRAConfig, MilvusConfig
293
251
 
294
252
  run(
295
253
  db=DBTYPE,
@@ -1,5 +1,6 @@
1
1
  from pydantic import BaseModel, SecretStr, validator
2
- from ..api import DBConfig, DBCaseConfig, MetricType, IndexType
2
+
3
+ from ..api import DBCaseConfig, DBConfig, IndexType, MetricType
3
4
 
4
5
 
5
6
  class MilvusConfig(DBConfig):
@@ -15,10 +16,14 @@ class MilvusConfig(DBConfig):
15
16
  }
16
17
 
17
18
  @validator("*")
18
- def not_empty_field(cls, v, field):
19
- if field.name in cls.common_short_configs() or field.name in cls.common_long_configs() or field.name in ["user", "password"]:
19
+ def not_empty_field(cls, v: any, field: any):
20
+ if (
21
+ field.name in cls.common_short_configs()
22
+ or field.name in cls.common_long_configs()
23
+ or field.name in ["user", "password"]
24
+ ):
20
25
  return v
21
- if isinstance(v, (str, SecretStr)) and len(v) == 0:
26
+ if isinstance(v, str | SecretStr) and len(v) == 0:
22
27
  raise ValueError("Empty string!")
23
28
  return v
24
29
 
@@ -28,10 +33,14 @@ class MilvusIndexConfig(BaseModel):
28
33
 
29
34
  index: IndexType
30
35
  metric_type: MetricType | None = None
31
-
36
+
32
37
  @property
33
38
  def is_gpu_index(self) -> bool:
34
- return self.index in [IndexType.GPU_CAGRA, IndexType.GPU_IVF_FLAT, IndexType.GPU_IVF_PQ]
39
+ return self.index in [
40
+ IndexType.GPU_CAGRA,
41
+ IndexType.GPU_IVF_FLAT,
42
+ IndexType.GPU_IVF_PQ,
43
+ ]
35
44
 
36
45
  def parse_metric(self) -> str:
37
46
  if not self.metric_type:
@@ -113,7 +122,8 @@ class IVFFlatConfig(MilvusIndexConfig, DBCaseConfig):
113
122
  "metric_type": self.parse_metric(),
114
123
  "params": {"nprobe": self.nprobe},
115
124
  }
116
-
125
+
126
+
117
127
  class IVFSQ8Config(MilvusIndexConfig, DBCaseConfig):
118
128
  nlist: int
119
129
  nprobe: int | None = None
@@ -210,7 +220,7 @@ class GPUCAGRAConfig(MilvusIndexConfig, DBCaseConfig):
210
220
  search_width: int = 4
211
221
  min_iterations: int = 0
212
222
  max_iterations: int = 0
213
- build_algo: str = "IVF_PQ" # IVF_PQ; NN_DESCENT;
223
+ build_algo: str = "IVF_PQ" # IVF_PQ; NN_DESCENT;
214
224
  cache_dataset_on_device: str
215
225
  refine_ratio: float | None = None
216
226
  index: IndexType = IndexType.GPU_CAGRA
@@ -2,19 +2,18 @@
2
2
 
3
3
  import logging
4
4
  import time
5
+ from collections.abc import Iterable
5
6
  from contextlib import contextmanager
6
- from typing import Iterable
7
7
 
8
- from pymilvus import Collection, utility
9
- from pymilvus import CollectionSchema, DataType, FieldSchema, MilvusException
8
+ from pymilvus import Collection, CollectionSchema, DataType, FieldSchema, MilvusException, utility
10
9
 
11
10
  from ..api import VectorDB
12
11
  from .config import MilvusIndexConfig
13
12
 
14
-
15
13
  log = logging.getLogger(__name__)
16
14
 
17
- MILVUS_LOAD_REQS_SIZE = 1.5 * 1024 *1024
15
+ MILVUS_LOAD_REQS_SIZE = 1.5 * 1024 * 1024
16
+
18
17
 
19
18
  class Milvus(VectorDB):
20
19
  def __init__(
@@ -32,7 +31,7 @@ class Milvus(VectorDB):
32
31
  self.db_config = db_config
33
32
  self.case_config = db_case_config
34
33
  self.collection_name = collection_name
35
- self.batch_size = int(MILVUS_LOAD_REQS_SIZE / (dim *4))
34
+ self.batch_size = int(MILVUS_LOAD_REQS_SIZE / (dim * 4))
36
35
 
37
36
  self._primary_field = "pk"
38
37
  self._scalar_field = "id"
@@ -40,6 +39,7 @@ class Milvus(VectorDB):
40
39
  self._index_name = "vector_idx"
41
40
 
42
41
  from pymilvus import connections
42
+
43
43
  connections.connect(**self.db_config, timeout=30)
44
44
  if drop_old and utility.has_collection(self.collection_name):
45
45
  log.info(f"{self.name} client drop_old collection: {self.collection_name}")
@@ -49,7 +49,7 @@ class Milvus(VectorDB):
49
49
  fields = [
50
50
  FieldSchema(self._primary_field, DataType.INT64, is_primary=True),
51
51
  FieldSchema(self._scalar_field, DataType.INT64),
52
- FieldSchema(self._vector_field, DataType.FLOAT_VECTOR, dim=dim)
52
+ FieldSchema(self._vector_field, DataType.FLOAT_VECTOR, dim=dim),
53
53
  ]
54
54
 
55
55
  log.info(f"{self.name} create collection: {self.collection_name}")
@@ -79,6 +79,7 @@ class Milvus(VectorDB):
79
79
  >>> self.search_embedding()
80
80
  """
81
81
  from pymilvus import connections
82
+
82
83
  self.col: Collection | None = None
83
84
 
84
85
  connections.connect(**self.db_config, timeout=60)
@@ -108,6 +109,7 @@ class Milvus(VectorDB):
108
109
  )
109
110
 
110
111
  utility.wait_for_index_building_complete(self.collection_name)
112
+
111
113
  def wait_index():
112
114
  while True:
113
115
  progress = utility.index_building_progress(self.collection_name)
@@ -120,51 +122,30 @@ class Milvus(VectorDB):
120
122
  # Skip compaction if use GPU indexType
121
123
  if self.case_config.is_gpu_index:
122
124
  log.debug("skip compaction for gpu index type.")
123
- else :
125
+ else:
124
126
  try:
125
127
  self.col.compact()
126
128
  self.col.wait_for_compaction_completed()
127
129
  except Exception as e:
128
130
  log.warning(f"{self.name} compact error: {e}")
129
- if hasattr(e, 'code'):
130
- if e.code().name == 'PERMISSION_DENIED':
131
+ if hasattr(e, "code"):
132
+ if e.code().name == "PERMISSION_DENIED":
131
133
  log.warning("Skip compact due to permission denied.")
132
- pass
133
134
  else:
134
- raise e
135
+ raise e from e
135
136
  wait_index()
136
137
  except Exception as e:
137
138
  log.warning(f"{self.name} optimize error: {e}")
138
139
  raise e from None
139
140
 
140
- def ready_to_load(self):
141
- assert self.col, "Please call self.init() before"
142
- self._pre_load(self.col)
143
-
144
- def _pre_load(self, coll: Collection):
145
- try:
146
- if not coll.has_index(index_name=self._index_name):
147
- log.info(f"{self.name} create index")
148
- coll.create_index(
149
- self._vector_field,
150
- self.case_config.index_param(),
151
- index_name=self._index_name,
152
- )
153
- coll.load()
154
- log.info(f"{self.name} load")
155
- except Exception as e:
156
- log.warning(f"{self.name} pre load error: {e}")
157
- raise e from None
158
-
159
-
160
- def optimize(self):
141
+ def optimize(self, data_size: int | None = None):
161
142
  assert self.col, "Please call self.init() before"
162
143
  self._optimize()
163
144
 
164
145
  def need_normalize_cosine(self) -> bool:
165
146
  """Wheather this database need to normalize dataset to support COSINE"""
166
147
  if self.case_config.is_gpu_index:
167
- log.info(f"current gpu_index only supports IP / L2, cosine dataset need normalize.")
148
+ log.info("current gpu_index only supports IP / L2, cosine dataset need normalize.")
168
149
  return True
169
150
 
170
151
  return False
@@ -184,9 +165,9 @@ class Milvus(VectorDB):
184
165
  for batch_start_offset in range(0, len(embeddings), self.batch_size):
185
166
  batch_end_offset = min(batch_start_offset + self.batch_size, len(embeddings))
186
167
  insert_data = [
187
- metadata[batch_start_offset : batch_end_offset],
188
- metadata[batch_start_offset : batch_end_offset],
189
- embeddings[batch_start_offset : batch_end_offset],
168
+ metadata[batch_start_offset:batch_end_offset],
169
+ metadata[batch_start_offset:batch_end_offset],
170
+ embeddings[batch_start_offset:batch_end_offset],
190
171
  ]
191
172
  res = self.col.insert(insert_data)
192
173
  insert_count += len(res.primary_keys)
@@ -217,5 +198,4 @@ class Milvus(VectorDB):
217
198
  )
218
199
 
219
200
  # Organize results.
220
- ret = [result.id for result in res[0]]
221
- return ret
201
+ return [result.id for result in res[0]]
@@ -1,57 +1,63 @@
1
- import click
2
1
  import os
2
+ from typing import Annotated, Unpack
3
+
4
+ import click
3
5
  from pydantic import SecretStr
4
6
 
7
+ from vectordb_bench.backend.clients import DB
8
+
5
9
  from ....cli.cli import (
6
10
  CommonTypedDict,
7
11
  cli,
8
12
  click_parameter_decorators_from_typed_dict,
9
13
  run,
10
14
  )
11
- from typing import Annotated, Optional, Unpack
12
- from vectordb_bench.backend.clients import DB
13
15
 
14
16
 
15
17
  class PgDiskAnnTypedDict(CommonTypedDict):
16
18
  user_name: Annotated[
17
- str, click.option("--user-name", type=str, help="Db username", required=True)
19
+ str,
20
+ click.option("--user-name", type=str, help="Db username", required=True),
18
21
  ]
19
22
  password: Annotated[
20
23
  str,
21
- click.option("--password",
22
- type=str,
23
- help="Postgres database password",
24
- default=lambda: os.environ.get("POSTGRES_PASSWORD", ""),
25
- show_default="$POSTGRES_PASSWORD",
26
- ),
24
+ click.option(
25
+ "--password",
26
+ type=str,
27
+ help="Postgres database password",
28
+ default=lambda: os.environ.get("POSTGRES_PASSWORD", ""),
29
+ show_default="$POSTGRES_PASSWORD",
30
+ ),
27
31
  ]
28
32
 
29
- host: Annotated[
30
- str, click.option("--host", type=str, help="Db host", required=True)
31
- ]
32
- db_name: Annotated[
33
- str, click.option("--db-name", type=str, help="Db name", required=True)
34
- ]
33
+ host: Annotated[str, click.option("--host", type=str, help="Db host", required=True)]
34
+ db_name: Annotated[str, click.option("--db-name", type=str, help="Db name", required=True)]
35
35
  max_neighbors: Annotated[
36
36
  int,
37
37
  click.option(
38
- "--max-neighbors", type=int, help="PgDiskAnn max neighbors",
38
+ "--max-neighbors",
39
+ type=int,
40
+ help="PgDiskAnn max neighbors",
39
41
  ),
40
42
  ]
41
43
  l_value_ib: Annotated[
42
44
  int,
43
45
  click.option(
44
- "--l-value-ib", type=int, help="PgDiskAnn l_value_ib",
46
+ "--l-value-ib",
47
+ type=int,
48
+ help="PgDiskAnn l_value_ib",
45
49
  ),
46
50
  ]
47
51
  l_value_is: Annotated[
48
52
  float,
49
53
  click.option(
50
- "--l-value-is", type=float, help="PgDiskAnn l_value_is",
54
+ "--l-value-is",
55
+ type=float,
56
+ help="PgDiskAnn l_value_is",
51
57
  ),
52
58
  ]
53
59
  maintenance_work_mem: Annotated[
54
- Optional[str],
60
+ str | None,
55
61
  click.option(
56
62
  "--maintenance-work-mem",
57
63
  type=str,
@@ -63,7 +69,7 @@ class PgDiskAnnTypedDict(CommonTypedDict):
63
69
  ),
64
70
  ]
65
71
  max_parallel_workers: Annotated[
66
- Optional[int],
72
+ int | None,
67
73
  click.option(
68
74
  "--max-parallel-workers",
69
75
  type=int,
@@ -72,6 +78,7 @@ class PgDiskAnnTypedDict(CommonTypedDict):
72
78
  ),
73
79
  ]
74
80
 
81
+
75
82
  @cli.command()
76
83
  @click_parameter_decorators_from_typed_dict(PgDiskAnnTypedDict)
77
84
  def PgDiskAnn(
@@ -96,4 +103,4 @@ def PgDiskAnn(
96
103
  maintenance_work_mem=parameters["maintenance_work_mem"],
97
104
  ),
98
105
  **parameters,
99
- )
106
+ )