vectordb-bench 0.0.19__py3-none-any.whl → 0.0.20__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (105) hide show
  1. vectordb_bench/__init__.py +49 -24
  2. vectordb_bench/__main__.py +4 -3
  3. vectordb_bench/backend/assembler.py +12 -13
  4. vectordb_bench/backend/cases.py +55 -45
  5. vectordb_bench/backend/clients/__init__.py +75 -14
  6. vectordb_bench/backend/clients/aliyun_elasticsearch/aliyun_elasticsearch.py +1 -2
  7. vectordb_bench/backend/clients/aliyun_elasticsearch/config.py +3 -4
  8. vectordb_bench/backend/clients/aliyun_opensearch/aliyun_opensearch.py +111 -70
  9. vectordb_bench/backend/clients/aliyun_opensearch/config.py +6 -7
  10. vectordb_bench/backend/clients/alloydb/alloydb.py +58 -80
  11. vectordb_bench/backend/clients/alloydb/cli.py +51 -34
  12. vectordb_bench/backend/clients/alloydb/config.py +30 -30
  13. vectordb_bench/backend/clients/api.py +5 -9
  14. vectordb_bench/backend/clients/aws_opensearch/aws_opensearch.py +46 -47
  15. vectordb_bench/backend/clients/aws_opensearch/cli.py +4 -7
  16. vectordb_bench/backend/clients/aws_opensearch/config.py +13 -9
  17. vectordb_bench/backend/clients/aws_opensearch/run.py +69 -59
  18. vectordb_bench/backend/clients/chroma/chroma.py +38 -36
  19. vectordb_bench/backend/clients/chroma/config.py +4 -2
  20. vectordb_bench/backend/clients/elastic_cloud/config.py +5 -5
  21. vectordb_bench/backend/clients/elastic_cloud/elastic_cloud.py +23 -22
  22. vectordb_bench/backend/clients/memorydb/cli.py +8 -8
  23. vectordb_bench/backend/clients/memorydb/config.py +2 -2
  24. vectordb_bench/backend/clients/memorydb/memorydb.py +65 -53
  25. vectordb_bench/backend/clients/milvus/cli.py +41 -83
  26. vectordb_bench/backend/clients/milvus/config.py +18 -8
  27. vectordb_bench/backend/clients/milvus/milvus.py +18 -19
  28. vectordb_bench/backend/clients/pgdiskann/cli.py +29 -22
  29. vectordb_bench/backend/clients/pgdiskann/config.py +29 -26
  30. vectordb_bench/backend/clients/pgdiskann/pgdiskann.py +55 -73
  31. vectordb_bench/backend/clients/pgvecto_rs/cli.py +9 -11
  32. vectordb_bench/backend/clients/pgvecto_rs/config.py +8 -14
  33. vectordb_bench/backend/clients/pgvecto_rs/pgvecto_rs.py +33 -34
  34. vectordb_bench/backend/clients/pgvector/cli.py +40 -31
  35. vectordb_bench/backend/clients/pgvector/config.py +63 -73
  36. vectordb_bench/backend/clients/pgvector/pgvector.py +97 -98
  37. vectordb_bench/backend/clients/pgvectorscale/cli.py +38 -24
  38. vectordb_bench/backend/clients/pgvectorscale/config.py +14 -15
  39. vectordb_bench/backend/clients/pgvectorscale/pgvectorscale.py +38 -43
  40. vectordb_bench/backend/clients/pinecone/config.py +1 -0
  41. vectordb_bench/backend/clients/pinecone/pinecone.py +14 -21
  42. vectordb_bench/backend/clients/qdrant_cloud/config.py +11 -10
  43. vectordb_bench/backend/clients/qdrant_cloud/qdrant_cloud.py +40 -31
  44. vectordb_bench/backend/clients/redis/cli.py +6 -12
  45. vectordb_bench/backend/clients/redis/config.py +7 -5
  46. vectordb_bench/backend/clients/redis/redis.py +94 -58
  47. vectordb_bench/backend/clients/test/cli.py +1 -2
  48. vectordb_bench/backend/clients/test/config.py +2 -2
  49. vectordb_bench/backend/clients/test/test.py +4 -5
  50. vectordb_bench/backend/clients/weaviate_cloud/cli.py +3 -4
  51. vectordb_bench/backend/clients/weaviate_cloud/config.py +2 -2
  52. vectordb_bench/backend/clients/weaviate_cloud/weaviate_cloud.py +36 -22
  53. vectordb_bench/backend/clients/zilliz_cloud/cli.py +14 -11
  54. vectordb_bench/backend/clients/zilliz_cloud/config.py +2 -4
  55. vectordb_bench/backend/clients/zilliz_cloud/zilliz_cloud.py +1 -1
  56. vectordb_bench/backend/data_source.py +30 -18
  57. vectordb_bench/backend/dataset.py +47 -27
  58. vectordb_bench/backend/result_collector.py +2 -3
  59. vectordb_bench/backend/runner/__init__.py +4 -6
  60. vectordb_bench/backend/runner/mp_runner.py +85 -34
  61. vectordb_bench/backend/runner/rate_runner.py +30 -19
  62. vectordb_bench/backend/runner/read_write_runner.py +51 -23
  63. vectordb_bench/backend/runner/serial_runner.py +91 -48
  64. vectordb_bench/backend/runner/util.py +4 -3
  65. vectordb_bench/backend/task_runner.py +92 -72
  66. vectordb_bench/backend/utils.py +17 -10
  67. vectordb_bench/base.py +0 -1
  68. vectordb_bench/cli/cli.py +65 -60
  69. vectordb_bench/cli/vectordbbench.py +6 -7
  70. vectordb_bench/frontend/components/check_results/charts.py +8 -19
  71. vectordb_bench/frontend/components/check_results/data.py +4 -16
  72. vectordb_bench/frontend/components/check_results/filters.py +8 -16
  73. vectordb_bench/frontend/components/check_results/nav.py +4 -4
  74. vectordb_bench/frontend/components/check_results/priceTable.py +1 -3
  75. vectordb_bench/frontend/components/check_results/stPageConfig.py +2 -1
  76. vectordb_bench/frontend/components/concurrent/charts.py +12 -12
  77. vectordb_bench/frontend/components/custom/displayCustomCase.py +17 -11
  78. vectordb_bench/frontend/components/custom/displaypPrams.py +4 -2
  79. vectordb_bench/frontend/components/custom/getCustomConfig.py +1 -2
  80. vectordb_bench/frontend/components/custom/initStyle.py +1 -1
  81. vectordb_bench/frontend/components/get_results/saveAsImage.py +2 -0
  82. vectordb_bench/frontend/components/run_test/caseSelector.py +3 -9
  83. vectordb_bench/frontend/components/run_test/dbConfigSetting.py +1 -4
  84. vectordb_bench/frontend/components/run_test/dbSelector.py +1 -1
  85. vectordb_bench/frontend/components/run_test/generateTasks.py +8 -8
  86. vectordb_bench/frontend/components/run_test/submitTask.py +14 -18
  87. vectordb_bench/frontend/components/tables/data.py +3 -6
  88. vectordb_bench/frontend/config/dbCaseConfigs.py +51 -84
  89. vectordb_bench/frontend/pages/concurrent.py +3 -5
  90. vectordb_bench/frontend/pages/custom.py +30 -9
  91. vectordb_bench/frontend/pages/quries_per_dollar.py +3 -3
  92. vectordb_bench/frontend/pages/run_test.py +3 -7
  93. vectordb_bench/frontend/utils.py +1 -1
  94. vectordb_bench/frontend/vdb_benchmark.py +4 -6
  95. vectordb_bench/interface.py +56 -26
  96. vectordb_bench/log_util.py +59 -64
  97. vectordb_bench/metric.py +10 -11
  98. vectordb_bench/models.py +26 -43
  99. {vectordb_bench-0.0.19.dist-info → vectordb_bench-0.0.20.dist-info}/METADATA +22 -15
  100. vectordb_bench-0.0.20.dist-info/RECORD +135 -0
  101. {vectordb_bench-0.0.19.dist-info → vectordb_bench-0.0.20.dist-info}/WHEEL +1 -1
  102. vectordb_bench-0.0.19.dist-info/RECORD +0 -135
  103. {vectordb_bench-0.0.19.dist-info → vectordb_bench-0.0.20.dist-info}/LICENSE +0 -0
  104. {vectordb_bench-0.0.19.dist-info → vectordb_bench-0.0.20.dist-info}/entry_points.txt +0 -0
  105. {vectordb_bench-0.0.19.dist-info → vectordb_bench-0.0.20.dist-info}/top_level.txt +0 -0
@@ -1,8 +1,9 @@
1
- from typing import Annotated, TypedDict, Unpack, Optional
1
+ from typing import Annotated, TypedDict, Unpack
2
2
 
3
3
  import click
4
4
  from pydantic import SecretStr
5
5
 
6
+ from vectordb_bench.backend.clients import DB
6
7
  from vectordb_bench.cli.cli import (
7
8
  CommonTypedDict,
8
9
  HNSWFlavor3,
@@ -10,33 +11,33 @@ from vectordb_bench.cli.cli import (
10
11
  cli,
11
12
  click_parameter_decorators_from_typed_dict,
12
13
  run,
13
-
14
14
  )
15
- from vectordb_bench.backend.clients import DB
16
15
 
17
16
  DBTYPE = DB.Milvus
18
17
 
19
18
 
20
19
  class MilvusTypedDict(TypedDict):
21
20
  uri: Annotated[
22
- str, click.option("--uri", type=str, help="uri connection string", required=True)
21
+ str,
22
+ click.option("--uri", type=str, help="uri connection string", required=True),
23
23
  ]
24
24
  user_name: Annotated[
25
- Optional[str], click.option("--user-name", type=str, help="Db username", required=False)
25
+ str | None,
26
+ click.option("--user-name", type=str, help="Db username", required=False),
26
27
  ]
27
28
  password: Annotated[
28
- Optional[str], click.option("--password", type=str, help="Db password", required=False)
29
+ str | None,
30
+ click.option("--password", type=str, help="Db password", required=False),
29
31
  ]
30
32
 
31
33
 
32
- class MilvusAutoIndexTypedDict(CommonTypedDict, MilvusTypedDict):
33
- ...
34
+ class MilvusAutoIndexTypedDict(CommonTypedDict, MilvusTypedDict): ...
34
35
 
35
36
 
36
37
  @cli.command()
37
38
  @click_parameter_decorators_from_typed_dict(MilvusAutoIndexTypedDict)
38
39
  def MilvusAutoIndex(**parameters: Unpack[MilvusAutoIndexTypedDict]):
39
- from .config import MilvusConfig, AutoIndexConfig
40
+ from .config import AutoIndexConfig, MilvusConfig
40
41
 
41
42
  run(
42
43
  db=DBTYPE,
@@ -54,7 +55,7 @@ def MilvusAutoIndex(**parameters: Unpack[MilvusAutoIndexTypedDict]):
54
55
  @cli.command()
55
56
  @click_parameter_decorators_from_typed_dict(MilvusAutoIndexTypedDict)
56
57
  def MilvusFlat(**parameters: Unpack[MilvusAutoIndexTypedDict]):
57
- from .config import MilvusConfig, FLATConfig
58
+ from .config import FLATConfig, MilvusConfig
58
59
 
59
60
  run(
60
61
  db=DBTYPE,
@@ -69,14 +70,13 @@ def MilvusFlat(**parameters: Unpack[MilvusAutoIndexTypedDict]):
69
70
  )
70
71
 
71
72
 
72
- class MilvusHNSWTypedDict(CommonTypedDict, MilvusTypedDict, HNSWFlavor3):
73
- ...
73
+ class MilvusHNSWTypedDict(CommonTypedDict, MilvusTypedDict, HNSWFlavor3): ...
74
74
 
75
75
 
76
76
  @cli.command()
77
77
  @click_parameter_decorators_from_typed_dict(MilvusHNSWTypedDict)
78
78
  def MilvusHNSW(**parameters: Unpack[MilvusHNSWTypedDict]):
79
- from .config import MilvusConfig, HNSWConfig
79
+ from .config import HNSWConfig, MilvusConfig
80
80
 
81
81
  run(
82
82
  db=DBTYPE,
@@ -95,14 +95,13 @@ def MilvusHNSW(**parameters: Unpack[MilvusHNSWTypedDict]):
95
95
  )
96
96
 
97
97
 
98
- class MilvusIVFFlatTypedDict(CommonTypedDict, MilvusTypedDict, IVFFlatTypedDictN):
99
- ...
98
+ class MilvusIVFFlatTypedDict(CommonTypedDict, MilvusTypedDict, IVFFlatTypedDictN): ...
100
99
 
101
100
 
102
101
  @cli.command()
103
102
  @click_parameter_decorators_from_typed_dict(MilvusIVFFlatTypedDict)
104
103
  def MilvusIVFFlat(**parameters: Unpack[MilvusIVFFlatTypedDict]):
105
- from .config import MilvusConfig, IVFFlatConfig
104
+ from .config import IVFFlatConfig, MilvusConfig
106
105
 
107
106
  run(
108
107
  db=DBTYPE,
@@ -123,7 +122,7 @@ def MilvusIVFFlat(**parameters: Unpack[MilvusIVFFlatTypedDict]):
123
122
  @cli.command()
124
123
  @click_parameter_decorators_from_typed_dict(MilvusIVFFlatTypedDict)
125
124
  def MilvusIVFSQ8(**parameters: Unpack[MilvusIVFFlatTypedDict]):
126
- from .config import MilvusConfig, IVFSQ8Config
125
+ from .config import IVFSQ8Config, MilvusConfig
127
126
 
128
127
  run(
129
128
  db=DBTYPE,
@@ -142,17 +141,13 @@ def MilvusIVFSQ8(**parameters: Unpack[MilvusIVFFlatTypedDict]):
142
141
 
143
142
 
144
143
  class MilvusDISKANNTypedDict(CommonTypedDict, MilvusTypedDict):
145
- search_list: Annotated[
146
- str, click.option("--search-list",
147
- type=int,
148
- required=True)
149
- ]
144
+ search_list: Annotated[str, click.option("--search-list", type=int, required=True)]
150
145
 
151
146
 
152
147
  @cli.command()
153
148
  @click_parameter_decorators_from_typed_dict(MilvusDISKANNTypedDict)
154
149
  def MilvusDISKANN(**parameters: Unpack[MilvusDISKANNTypedDict]):
155
- from .config import MilvusConfig, DISKANNConfig
150
+ from .config import DISKANNConfig, MilvusConfig
156
151
 
157
152
  run(
158
153
  db=DBTYPE,
@@ -171,21 +166,16 @@ def MilvusDISKANN(**parameters: Unpack[MilvusDISKANNTypedDict]):
171
166
 
172
167
  class MilvusGPUIVFTypedDict(CommonTypedDict, MilvusTypedDict, MilvusIVFFlatTypedDict):
173
168
  cache_dataset_on_device: Annotated[
174
- str, click.option("--cache-dataset-on-device",
175
- type=str,
176
- required=True)
177
- ]
178
- refine_ratio: Annotated[
179
- str, click.option("--refine-ratio",
180
- type=float,
181
- required=True)
169
+ str,
170
+ click.option("--cache-dataset-on-device", type=str, required=True),
182
171
  ]
172
+ refine_ratio: Annotated[str, click.option("--refine-ratio", type=float, required=True)]
183
173
 
184
174
 
185
175
  @cli.command()
186
176
  @click_parameter_decorators_from_typed_dict(MilvusGPUIVFTypedDict)
187
177
  def MilvusGPUIVFFlat(**parameters: Unpack[MilvusGPUIVFTypedDict]):
188
- from .config import MilvusConfig, GPUIVFFlatConfig
178
+ from .config import GPUIVFFlatConfig, MilvusConfig
189
179
 
190
180
  run(
191
181
  db=DBTYPE,
@@ -205,23 +195,20 @@ def MilvusGPUIVFFlat(**parameters: Unpack[MilvusGPUIVFTypedDict]):
205
195
  )
206
196
 
207
197
 
208
- class MilvusGPUIVFPQTypedDict(CommonTypedDict, MilvusTypedDict, MilvusIVFFlatTypedDict, MilvusGPUIVFTypedDict):
209
- m: Annotated[
210
- str, click.option("--m",
211
- type=int, help="hnsw m",
212
- required=True)
213
- ]
214
- nbits: Annotated[
215
- str, click.option("--nbits",
216
- type=int,
217
- required=True)
218
- ]
198
+ class MilvusGPUIVFPQTypedDict(
199
+ CommonTypedDict,
200
+ MilvusTypedDict,
201
+ MilvusIVFFlatTypedDict,
202
+ MilvusGPUIVFTypedDict,
203
+ ):
204
+ m: Annotated[str, click.option("--m", type=int, help="hnsw m", required=True)]
205
+ nbits: Annotated[str, click.option("--nbits", type=int, required=True)]
219
206
 
220
207
 
221
208
  @cli.command()
222
209
  @click_parameter_decorators_from_typed_dict(MilvusGPUIVFPQTypedDict)
223
210
  def MilvusGPUIVFPQ(**parameters: Unpack[MilvusGPUIVFPQTypedDict]):
224
- from .config import MilvusConfig, GPUIVFPQConfig
211
+ from .config import GPUIVFPQConfig, MilvusConfig
225
212
 
226
213
  run(
227
214
  db=DBTYPE,
@@ -245,51 +232,22 @@ def MilvusGPUIVFPQ(**parameters: Unpack[MilvusGPUIVFPQTypedDict]):
245
232
 
246
233
  class MilvusGPUCAGRATypedDict(CommonTypedDict, MilvusTypedDict, MilvusGPUIVFTypedDict):
247
234
  intermediate_graph_degree: Annotated[
248
- str, click.option("--intermediate-graph-degree",
249
- type=int,
250
- required=True)
251
- ]
252
- graph_degree: Annotated[
253
- str, click.option("--graph-degree",
254
- type=int,
255
- required=True)
256
- ]
257
- build_algo: Annotated[
258
- str, click.option("--build_algo",
259
- type=str,
260
- required=True)
261
- ]
262
- team_size: Annotated[
263
- str, click.option("--team-size",
264
- type=int,
265
- required=True)
266
- ]
267
- search_width: Annotated[
268
- str, click.option("--search-width",
269
- type=int,
270
- required=True)
271
- ]
272
- itopk_size: Annotated[
273
- str, click.option("--itopk-size",
274
- type=int,
275
- required=True)
276
- ]
277
- min_iterations: Annotated[
278
- str, click.option("--min-iterations",
279
- type=int,
280
- required=True)
281
- ]
282
- max_iterations: Annotated[
283
- str, click.option("--max-iterations",
284
- type=int,
285
- required=True)
235
+ str,
236
+ click.option("--intermediate-graph-degree", type=int, required=True),
286
237
  ]
238
+ graph_degree: Annotated[str, click.option("--graph-degree", type=int, required=True)]
239
+ build_algo: Annotated[str, click.option("--build_algo", type=str, required=True)]
240
+ team_size: Annotated[str, click.option("--team-size", type=int, required=True)]
241
+ search_width: Annotated[str, click.option("--search-width", type=int, required=True)]
242
+ itopk_size: Annotated[str, click.option("--itopk-size", type=int, required=True)]
243
+ min_iterations: Annotated[str, click.option("--min-iterations", type=int, required=True)]
244
+ max_iterations: Annotated[str, click.option("--max-iterations", type=int, required=True)]
287
245
 
288
246
 
289
247
  @cli.command()
290
248
  @click_parameter_decorators_from_typed_dict(MilvusGPUCAGRATypedDict)
291
249
  def MilvusGPUCAGRA(**parameters: Unpack[MilvusGPUCAGRATypedDict]):
292
- from .config import MilvusConfig, GPUCAGRAConfig
250
+ from .config import GPUCAGRAConfig, MilvusConfig
293
251
 
294
252
  run(
295
253
  db=DBTYPE,
@@ -1,5 +1,6 @@
1
1
  from pydantic import BaseModel, SecretStr, validator
2
- from ..api import DBConfig, DBCaseConfig, MetricType, IndexType
2
+
3
+ from ..api import DBCaseConfig, DBConfig, IndexType, MetricType
3
4
 
4
5
 
5
6
  class MilvusConfig(DBConfig):
@@ -15,10 +16,14 @@ class MilvusConfig(DBConfig):
15
16
  }
16
17
 
17
18
  @validator("*")
18
- def not_empty_field(cls, v, field):
19
- if field.name in cls.common_short_configs() or field.name in cls.common_long_configs() or field.name in ["user", "password"]:
19
+ def not_empty_field(cls, v: any, field: any):
20
+ if (
21
+ field.name in cls.common_short_configs()
22
+ or field.name in cls.common_long_configs()
23
+ or field.name in ["user", "password"]
24
+ ):
20
25
  return v
21
- if isinstance(v, (str, SecretStr)) and len(v) == 0:
26
+ if isinstance(v, str | SecretStr) and len(v) == 0:
22
27
  raise ValueError("Empty string!")
23
28
  return v
24
29
 
@@ -28,10 +33,14 @@ class MilvusIndexConfig(BaseModel):
28
33
 
29
34
  index: IndexType
30
35
  metric_type: MetricType | None = None
31
-
36
+
32
37
  @property
33
38
  def is_gpu_index(self) -> bool:
34
- return self.index in [IndexType.GPU_CAGRA, IndexType.GPU_IVF_FLAT, IndexType.GPU_IVF_PQ]
39
+ return self.index in [
40
+ IndexType.GPU_CAGRA,
41
+ IndexType.GPU_IVF_FLAT,
42
+ IndexType.GPU_IVF_PQ,
43
+ ]
35
44
 
36
45
  def parse_metric(self) -> str:
37
46
  if not self.metric_type:
@@ -113,7 +122,8 @@ class IVFFlatConfig(MilvusIndexConfig, DBCaseConfig):
113
122
  "metric_type": self.parse_metric(),
114
123
  "params": {"nprobe": self.nprobe},
115
124
  }
116
-
125
+
126
+
117
127
  class IVFSQ8Config(MilvusIndexConfig, DBCaseConfig):
118
128
  nlist: int
119
129
  nprobe: int | None = None
@@ -210,7 +220,7 @@ class GPUCAGRAConfig(MilvusIndexConfig, DBCaseConfig):
210
220
  search_width: int = 4
211
221
  min_iterations: int = 0
212
222
  max_iterations: int = 0
213
- build_algo: str = "IVF_PQ" # IVF_PQ; NN_DESCENT;
223
+ build_algo: str = "IVF_PQ" # IVF_PQ; NN_DESCENT;
214
224
  cache_dataset_on_device: str
215
225
  refine_ratio: float | None = None
216
226
  index: IndexType = IndexType.GPU_CAGRA
@@ -2,19 +2,18 @@
2
2
 
3
3
  import logging
4
4
  import time
5
+ from collections.abc import Iterable
5
6
  from contextlib import contextmanager
6
- from typing import Iterable
7
7
 
8
- from pymilvus import Collection, utility
9
- from pymilvus import CollectionSchema, DataType, FieldSchema, MilvusException
8
+ from pymilvus import Collection, CollectionSchema, DataType, FieldSchema, MilvusException, utility
10
9
 
11
10
  from ..api import VectorDB
12
11
  from .config import MilvusIndexConfig
13
12
 
14
-
15
13
  log = logging.getLogger(__name__)
16
14
 
17
- MILVUS_LOAD_REQS_SIZE = 1.5 * 1024 *1024
15
+ MILVUS_LOAD_REQS_SIZE = 1.5 * 1024 * 1024
16
+
18
17
 
19
18
  class Milvus(VectorDB):
20
19
  def __init__(
@@ -32,7 +31,7 @@ class Milvus(VectorDB):
32
31
  self.db_config = db_config
33
32
  self.case_config = db_case_config
34
33
  self.collection_name = collection_name
35
- self.batch_size = int(MILVUS_LOAD_REQS_SIZE / (dim *4))
34
+ self.batch_size = int(MILVUS_LOAD_REQS_SIZE / (dim * 4))
36
35
 
37
36
  self._primary_field = "pk"
38
37
  self._scalar_field = "id"
@@ -40,6 +39,7 @@ class Milvus(VectorDB):
40
39
  self._index_name = "vector_idx"
41
40
 
42
41
  from pymilvus import connections
42
+
43
43
  connections.connect(**self.db_config, timeout=30)
44
44
  if drop_old and utility.has_collection(self.collection_name):
45
45
  log.info(f"{self.name} client drop_old collection: {self.collection_name}")
@@ -49,7 +49,7 @@ class Milvus(VectorDB):
49
49
  fields = [
50
50
  FieldSchema(self._primary_field, DataType.INT64, is_primary=True),
51
51
  FieldSchema(self._scalar_field, DataType.INT64),
52
- FieldSchema(self._vector_field, DataType.FLOAT_VECTOR, dim=dim)
52
+ FieldSchema(self._vector_field, DataType.FLOAT_VECTOR, dim=dim),
53
53
  ]
54
54
 
55
55
  log.info(f"{self.name} create collection: {self.collection_name}")
@@ -79,6 +79,7 @@ class Milvus(VectorDB):
79
79
  >>> self.search_embedding()
80
80
  """
81
81
  from pymilvus import connections
82
+
82
83
  self.col: Collection | None = None
83
84
 
84
85
  connections.connect(**self.db_config, timeout=60)
@@ -108,6 +109,7 @@ class Milvus(VectorDB):
108
109
  )
109
110
 
110
111
  utility.wait_for_index_building_complete(self.collection_name)
112
+
111
113
  def wait_index():
112
114
  while True:
113
115
  progress = utility.index_building_progress(self.collection_name)
@@ -120,18 +122,17 @@ class Milvus(VectorDB):
120
122
  # Skip compaction if use GPU indexType
121
123
  if self.case_config.is_gpu_index:
122
124
  log.debug("skip compaction for gpu index type.")
123
- else :
125
+ else:
124
126
  try:
125
127
  self.col.compact()
126
128
  self.col.wait_for_compaction_completed()
127
129
  except Exception as e:
128
130
  log.warning(f"{self.name} compact error: {e}")
129
- if hasattr(e, 'code'):
130
- if e.code().name == 'PERMISSION_DENIED':
131
+ if hasattr(e, "code"):
132
+ if e.code().name == "PERMISSION_DENIED":
131
133
  log.warning("Skip compact due to permission denied.")
132
- pass
133
134
  else:
134
- raise e
135
+ raise e from e
135
136
  wait_index()
136
137
  except Exception as e:
137
138
  log.warning(f"{self.name} optimize error: {e}")
@@ -156,7 +157,6 @@ class Milvus(VectorDB):
156
157
  log.warning(f"{self.name} pre load error: {e}")
157
158
  raise e from None
158
159
 
159
-
160
160
  def optimize(self):
161
161
  assert self.col, "Please call self.init() before"
162
162
  self._optimize()
@@ -164,7 +164,7 @@ class Milvus(VectorDB):
164
164
  def need_normalize_cosine(self) -> bool:
165
165
  """Wheather this database need to normalize dataset to support COSINE"""
166
166
  if self.case_config.is_gpu_index:
167
- log.info(f"current gpu_index only supports IP / L2, cosine dataset need normalize.")
167
+ log.info("current gpu_index only supports IP / L2, cosine dataset need normalize.")
168
168
  return True
169
169
 
170
170
  return False
@@ -184,9 +184,9 @@ class Milvus(VectorDB):
184
184
  for batch_start_offset in range(0, len(embeddings), self.batch_size):
185
185
  batch_end_offset = min(batch_start_offset + self.batch_size, len(embeddings))
186
186
  insert_data = [
187
- metadata[batch_start_offset : batch_end_offset],
188
- metadata[batch_start_offset : batch_end_offset],
189
- embeddings[batch_start_offset : batch_end_offset],
187
+ metadata[batch_start_offset:batch_end_offset],
188
+ metadata[batch_start_offset:batch_end_offset],
189
+ embeddings[batch_start_offset:batch_end_offset],
190
190
  ]
191
191
  res = self.col.insert(insert_data)
192
192
  insert_count += len(res.primary_keys)
@@ -217,5 +217,4 @@ class Milvus(VectorDB):
217
217
  )
218
218
 
219
219
  # Organize results.
220
- ret = [result.id for result in res[0]]
221
- return ret
220
+ return [result.id for result in res[0]]
@@ -1,57 +1,63 @@
1
- import click
2
1
  import os
2
+ from typing import Annotated, Unpack
3
+
4
+ import click
3
5
  from pydantic import SecretStr
4
6
 
7
+ from vectordb_bench.backend.clients import DB
8
+
5
9
  from ....cli.cli import (
6
10
  CommonTypedDict,
7
11
  cli,
8
12
  click_parameter_decorators_from_typed_dict,
9
13
  run,
10
14
  )
11
- from typing import Annotated, Optional, Unpack
12
- from vectordb_bench.backend.clients import DB
13
15
 
14
16
 
15
17
  class PgDiskAnnTypedDict(CommonTypedDict):
16
18
  user_name: Annotated[
17
- str, click.option("--user-name", type=str, help="Db username", required=True)
19
+ str,
20
+ click.option("--user-name", type=str, help="Db username", required=True),
18
21
  ]
19
22
  password: Annotated[
20
23
  str,
21
- click.option("--password",
22
- type=str,
23
- help="Postgres database password",
24
- default=lambda: os.environ.get("POSTGRES_PASSWORD", ""),
25
- show_default="$POSTGRES_PASSWORD",
26
- ),
24
+ click.option(
25
+ "--password",
26
+ type=str,
27
+ help="Postgres database password",
28
+ default=lambda: os.environ.get("POSTGRES_PASSWORD", ""),
29
+ show_default="$POSTGRES_PASSWORD",
30
+ ),
27
31
  ]
28
32
 
29
- host: Annotated[
30
- str, click.option("--host", type=str, help="Db host", required=True)
31
- ]
32
- db_name: Annotated[
33
- str, click.option("--db-name", type=str, help="Db name", required=True)
34
- ]
33
+ host: Annotated[str, click.option("--host", type=str, help="Db host", required=True)]
34
+ db_name: Annotated[str, click.option("--db-name", type=str, help="Db name", required=True)]
35
35
  max_neighbors: Annotated[
36
36
  int,
37
37
  click.option(
38
- "--max-neighbors", type=int, help="PgDiskAnn max neighbors",
38
+ "--max-neighbors",
39
+ type=int,
40
+ help="PgDiskAnn max neighbors",
39
41
  ),
40
42
  ]
41
43
  l_value_ib: Annotated[
42
44
  int,
43
45
  click.option(
44
- "--l-value-ib", type=int, help="PgDiskAnn l_value_ib",
46
+ "--l-value-ib",
47
+ type=int,
48
+ help="PgDiskAnn l_value_ib",
45
49
  ),
46
50
  ]
47
51
  l_value_is: Annotated[
48
52
  float,
49
53
  click.option(
50
- "--l-value-is", type=float, help="PgDiskAnn l_value_is",
54
+ "--l-value-is",
55
+ type=float,
56
+ help="PgDiskAnn l_value_is",
51
57
  ),
52
58
  ]
53
59
  maintenance_work_mem: Annotated[
54
- Optional[str],
60
+ str | None,
55
61
  click.option(
56
62
  "--maintenance-work-mem",
57
63
  type=str,
@@ -63,7 +69,7 @@ class PgDiskAnnTypedDict(CommonTypedDict):
63
69
  ),
64
70
  ]
65
71
  max_parallel_workers: Annotated[
66
- Optional[int],
72
+ int | None,
67
73
  click.option(
68
74
  "--max-parallel-workers",
69
75
  type=int,
@@ -72,6 +78,7 @@ class PgDiskAnnTypedDict(CommonTypedDict):
72
78
  ),
73
79
  ]
74
80
 
81
+
75
82
  @cli.command()
76
83
  @click_parameter_decorators_from_typed_dict(PgDiskAnnTypedDict)
77
84
  def PgDiskAnn(
@@ -96,4 +103,4 @@ def PgDiskAnn(
96
103
  maintenance_work_mem=parameters["maintenance_work_mem"],
97
104
  ),
98
105
  **parameters,
99
- )
106
+ )
@@ -1,7 +1,9 @@
1
1
  from abc import abstractmethod
2
- from typing import Any, Mapping, Optional, Sequence, TypedDict
2
+ from collections.abc import Mapping, Sequence
3
+ from typing import Any, LiteralString, TypedDict
4
+
3
5
  from pydantic import BaseModel, SecretStr
4
- from typing_extensions import LiteralString
6
+
5
7
  from ..api import DBCaseConfig, DBConfig, IndexType, MetricType
6
8
 
7
9
  POSTGRE_URL_PLACEHOLDER = "postgresql://%s:%s@%s/%s"
@@ -9,7 +11,7 @@ POSTGRE_URL_PLACEHOLDER = "postgresql://%s:%s@%s/%s"
9
11
 
10
12
  class PgDiskANNConfigDict(TypedDict):
11
13
  """These keys will be directly used as kwargs in psycopg connection string,
12
- so the names must match exactly psycopg API"""
14
+ so the names must match exactly psycopg API"""
13
15
 
14
16
  user: str
15
17
  password: str
@@ -41,44 +43,43 @@ class PgDiskANNIndexConfig(BaseModel, DBCaseConfig):
41
43
  metric_type: MetricType | None = None
42
44
  create_index_before_load: bool = False
43
45
  create_index_after_load: bool = True
44
- maintenance_work_mem: Optional[str]
45
- max_parallel_workers: Optional[int]
46
+ maintenance_work_mem: str | None
47
+ max_parallel_workers: int | None
46
48
 
47
49
  def parse_metric(self) -> str:
48
50
  if self.metric_type == MetricType.L2:
49
51
  return "vector_l2_ops"
50
- elif self.metric_type == MetricType.IP:
52
+ if self.metric_type == MetricType.IP:
51
53
  return "vector_ip_ops"
52
54
  return "vector_cosine_ops"
53
55
 
54
56
  def parse_metric_fun_op(self) -> LiteralString:
55
57
  if self.metric_type == MetricType.L2:
56
58
  return "<->"
57
- elif self.metric_type == MetricType.IP:
59
+ if self.metric_type == MetricType.IP:
58
60
  return "<#>"
59
61
  return "<=>"
60
62
 
61
63
  def parse_metric_fun_str(self) -> str:
62
64
  if self.metric_type == MetricType.L2:
63
65
  return "l2_distance"
64
- elif self.metric_type == MetricType.IP:
66
+ if self.metric_type == MetricType.IP:
65
67
  return "max_inner_product"
66
68
  return "cosine_distance"
67
-
69
+
68
70
  @abstractmethod
69
- def index_param(self) -> dict:
70
- ...
71
+ def index_param(self) -> dict: ...
71
72
 
72
73
  @abstractmethod
73
- def search_param(self) -> dict:
74
- ...
74
+ def search_param(self) -> dict: ...
75
75
 
76
76
  @abstractmethod
77
- def session_param(self) -> dict:
78
- ...
77
+ def session_param(self) -> dict: ...
79
78
 
80
79
  @staticmethod
81
- def _optionally_build_with_options(with_options: Mapping[str, Any]) -> Sequence[dict[str, Any]]:
80
+ def _optionally_build_with_options(
81
+ with_options: Mapping[str, Any],
82
+ ) -> Sequence[dict[str, Any]]:
82
83
  """Walk through mappings, creating a List of {key1 = value} pairs. That will be used to build a where clause"""
83
84
  options = []
84
85
  for option_name, value in with_options.items():
@@ -87,35 +88,36 @@ class PgDiskANNIndexConfig(BaseModel, DBCaseConfig):
87
88
  {
88
89
  "option_name": option_name,
89
90
  "val": str(value),
90
- }
91
+ },
91
92
  )
92
93
  return options
93
94
 
94
95
  @staticmethod
95
96
  def _optionally_build_set_options(
96
- set_mapping: Mapping[str, Any]
97
+ set_mapping: Mapping[str, Any],
97
98
  ) -> Sequence[dict[str, Any]]:
98
99
  """Walk through options, creating 'SET 'key1 = "value1";' list"""
99
100
  session_options = []
100
101
  for setting_name, value in set_mapping.items():
101
102
  if value:
102
103
  session_options.append(
103
- {"parameter": {
104
+ {
105
+ "parameter": {
104
106
  "setting_name": setting_name,
105
107
  "val": str(value),
106
108
  },
107
- }
109
+ },
108
110
  )
109
111
  return session_options
110
-
112
+
111
113
 
112
114
  class PgDiskANNImplConfig(PgDiskANNIndexConfig):
113
115
  index: IndexType = IndexType.DISKANN
114
116
  max_neighbors: int | None
115
117
  l_value_ib: int | None
116
118
  l_value_is: float | None
117
- maintenance_work_mem: Optional[str] = None
118
- max_parallel_workers: Optional[int] = None
119
+ maintenance_work_mem: str | None = None
120
+ max_parallel_workers: int | None = None
119
121
 
120
122
  def index_param(self) -> dict:
121
123
  return {
@@ -128,18 +130,19 @@ class PgDiskANNImplConfig(PgDiskANNIndexConfig):
128
130
  "maintenance_work_mem": self.maintenance_work_mem,
129
131
  "max_parallel_workers": self.max_parallel_workers,
130
132
  }
131
-
133
+
132
134
  def search_param(self) -> dict:
133
135
  return {
134
136
  "metric": self.parse_metric(),
135
137
  "metric_fun_op": self.parse_metric_fun_op(),
136
138
  }
137
-
139
+
138
140
  def session_param(self) -> dict:
139
141
  return {
140
142
  "diskann.l_value_is": self.l_value_is,
141
143
  }
142
-
144
+
145
+
143
146
  _pgdiskann_case_config = {
144
147
  IndexType.DISKANN: PgDiskANNImplConfig,
145
148
  }