vectordb-bench 1.0.5__py3-none-any.whl → 1.0.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. vectordb_bench/__init__.py +1 -0
  2. vectordb_bench/backend/clients/__init__.py +15 -0
  3. vectordb_bench/backend/clients/api.py +2 -0
  4. vectordb_bench/backend/clients/aws_opensearch/aws_opensearch.py +104 -40
  5. vectordb_bench/backend/clients/aws_opensearch/cli.py +52 -15
  6. vectordb_bench/backend/clients/aws_opensearch/config.py +27 -7
  7. vectordb_bench/backend/clients/hologres/cli.py +50 -0
  8. vectordb_bench/backend/clients/hologres/config.py +121 -0
  9. vectordb_bench/backend/clients/hologres/hologres.py +365 -0
  10. vectordb_bench/backend/clients/lancedb/lancedb.py +1 -0
  11. vectordb_bench/backend/clients/milvus/cli.py +25 -0
  12. vectordb_bench/backend/clients/milvus/config.py +2 -0
  13. vectordb_bench/backend/clients/milvus/milvus.py +1 -1
  14. vectordb_bench/backend/clients/oceanbase/cli.py +1 -0
  15. vectordb_bench/backend/clients/oceanbase/config.py +3 -1
  16. vectordb_bench/backend/clients/oceanbase/oceanbase.py +20 -4
  17. vectordb_bench/backend/clients/pgdiskann/cli.py +45 -0
  18. vectordb_bench/backend/clients/pgdiskann/config.py +16 -0
  19. vectordb_bench/backend/clients/pgdiskann/pgdiskann.py +94 -26
  20. vectordb_bench/backend/clients/zilliz_cloud/cli.py +14 -1
  21. vectordb_bench/backend/clients/zilliz_cloud/config.py +4 -1
  22. vectordb_bench/backend/runner/rate_runner.py +23 -11
  23. vectordb_bench/cli/cli.py +36 -0
  24. vectordb_bench/cli/vectordbbench.py +2 -0
  25. vectordb_bench/frontend/config/dbCaseConfigs.py +82 -3
  26. vectordb_bench/frontend/config/styles.py +1 -0
  27. vectordb_bench/interface.py +5 -1
  28. vectordb_bench/models.py +3 -0
  29. vectordb_bench/results/getLeaderboardDataV2.py +23 -2
  30. vectordb_bench/results/leaderboard_v2.json +200 -0
  31. vectordb_bench/results/leaderboard_v2_streaming.json +128 -0
  32. {vectordb_bench-1.0.5.dist-info → vectordb_bench-1.0.7.dist-info}/METADATA +40 -8
  33. {vectordb_bench-1.0.5.dist-info → vectordb_bench-1.0.7.dist-info}/RECORD +37 -33
  34. {vectordb_bench-1.0.5.dist-info → vectordb_bench-1.0.7.dist-info}/WHEEL +0 -0
  35. {vectordb_bench-1.0.5.dist-info → vectordb_bench-1.0.7.dist-info}/entry_points.txt +0 -0
  36. {vectordb_bench-1.0.5.dist-info → vectordb_bench-1.0.7.dist-info}/licenses/LICENSE +0 -0
  37. {vectordb_bench-1.0.5.dist-info → vectordb_bench-1.0.7.dist-info}/top_level.txt +0 -0
@@ -16,6 +16,7 @@ class config:
16
16
  LOG_LEVEL = env.str("LOG_LEVEL", "INFO")
17
17
 
18
18
  DEFAULT_DATASET_URL = env.str("DEFAULT_DATASET_URL", AWS_S3_URL)
19
+ DATASET_SOURCE = env.str("DATASET_SOURCE", "S3") # Options "S3" or "AliyunOSS"
19
20
  DATASET_LOCAL_DIR = env.path("DATASET_LOCAL_DIR", "/tmp/vectordb_bench/dataset")
20
21
  NUM_PER_BATCH = env.int("NUM_PER_BATCH", 100)
21
22
  TIME_PER_BATCH = 1 # 1s. for streaming insertion.
@@ -50,6 +50,7 @@ class DB(Enum):
50
50
  LanceDB = "LanceDB"
51
51
  OceanBase = "OceanBase"
52
52
  S3Vectors = "S3Vectors"
53
+ Hologres = "Alibaba Cloud Hologres"
53
54
 
54
55
  @property
55
56
  def init_cls(self) -> type[VectorDB]: # noqa: PLR0911, PLR0912, C901, PLR0915
@@ -194,6 +195,11 @@ class DB(Enum):
194
195
 
195
196
  return S3Vectors
196
197
 
198
+ if self == DB.Hologres:
199
+ from .hologres.hologres import Hologres
200
+
201
+ return Hologres
202
+
197
203
  msg = f"Unknown DB: {self.name}"
198
204
  raise ValueError(msg)
199
205
 
@@ -340,6 +346,11 @@ class DB(Enum):
340
346
 
341
347
  return S3VectorsConfig
342
348
 
349
+ if self == DB.Hologres:
350
+ from .hologres.config import HologresConfig
351
+
352
+ return HologresConfig
353
+
343
354
  msg = f"Unknown DB: {self.name}"
344
355
  raise ValueError(msg)
345
356
 
@@ -461,6 +472,10 @@ class DB(Enum):
461
472
  from .s3_vectors.config import S3VectorsIndexConfig
462
473
 
463
474
  return S3VectorsIndexConfig
475
+ if self == DB.Hologres:
476
+ from .hologres.config import HologresIndexConfig
477
+
478
+ return HologresIndexConfig
464
479
 
465
480
  # DB.Pinecone, DB.Chroma, DB.Redis
466
481
  return EmptyDBCaseConfig
@@ -40,6 +40,8 @@ class IndexType(str, Enum):
40
40
  GPU_IVF_PQ = "GPU_IVF_PQ"
41
41
  GPU_CAGRA = "GPU_CAGRA"
42
42
  SCANN = "scann"
43
+ Hologres_HGraph = "HGraph"
44
+ Hologres_Graph = "Graph"
43
45
  NONE = "NONE"
44
46
 
45
47
 
@@ -65,9 +65,7 @@ class AWSOpenSearch(VectorDB):
65
65
  self._load_graphs_to_memory(client)
66
66
 
67
67
  def _create_index(self, client: OpenSearch) -> None:
68
- ef_search_value = (
69
- self.case_config.ef_search if self.case_config.ef_search is not None else self.case_config.efSearch
70
- )
68
+ ef_search_value = self.case_config.ef_search
71
69
  log.info(f"Creating index with ef_search: {ef_search_value}")
72
70
  log.info(f"Creating index with number_of_replicas: {self.case_config.number_of_replicas}")
73
71
 
@@ -81,7 +79,7 @@ class AWSOpenSearch(VectorDB):
81
79
  "knn.memory.circuit_breaker.limit": self.case_config.cb_threshold,
82
80
  }
83
81
  }
84
- client.cluster.put_settings(cluster_settings_body)
82
+ client.cluster.put_settings(body=cluster_settings_body)
85
83
  settings = {
86
84
  "index": {
87
85
  "knn": True,
@@ -93,25 +91,83 @@ class AWSOpenSearch(VectorDB):
93
91
  "refresh_interval": self.case_config.refresh_interval,
94
92
  }
95
93
  settings["index"]["knn.algo_param.ef_search"] = ef_search_value
96
- mappings = {
97
- "_source": {"excludes": [self.vector_col_name], "recovery_source_excludes": [self.vector_col_name]},
98
- "properties": {
99
- self.id_col_name: {"type": "integer", "store": True},
100
- self.label_col_name: {"type": "keyword"},
101
- self.vector_col_name: {
102
- "type": "knn_vector",
103
- "dimension": self.dim,
104
- "method": self.case_config.index_param(),
105
- },
106
- },
94
+
95
+ # Get method configuration and log it for debugging
96
+ method_config = self.case_config.index_param()
97
+ log.info(f"Raw method config from index_param(): {method_config}")
98
+
99
+ # For s3vector engine, ensure method only contains engine field
100
+ if self.case_config.engine == AWSOS_Engine.s3vector:
101
+ method_config = {"engine": "s3vector"}
102
+ log.info(f"Cleaned method config for s3vector: {method_config}")
103
+
104
+ # Prepare vector field configuration
105
+ vector_field_config = {
106
+ "type": "knn_vector",
107
+ "store": True,
108
+ "dimension": self.dim,
109
+ "method": method_config,
107
110
  }
111
+
112
+ # For s3vector engine, space_type should be set at the vector field level
113
+ if self.case_config.engine == AWSOS_Engine.s3vector:
114
+ space_type = self.case_config.parse_metric()
115
+ vector_field_config["space_type"] = space_type
116
+
117
+ # Ensure method config is absolutely clean for s3vector - remove any potential extra fields
118
+ vector_field_config["method"] = {"engine": "s3vector"}
119
+
120
+ log.info(f"Setting space_type '{space_type}' at vector field level for s3vector engine")
121
+ log.info(f"Final vector field config for s3vector: {vector_field_config}")
122
+
123
+ # Configure mappings based on engine type
124
+ if self.case_config.engine == AWSOS_Engine.s3vector:
125
+ # For s3vector engine, use simplified mappings without _source configuration
126
+ mappings = {
127
+ "properties": {
128
+ # self.id_col_name: {"type": "integer", "store": True},
129
+ self.label_col_name: {"type": "keyword"},
130
+ self.vector_col_name: vector_field_config,
131
+ },
132
+ }
133
+ log.info("Using simplified mappings for s3vector engine (no _source configuration)")
134
+ else:
135
+ # For other engines (faiss, lucene), use standard mappings with _source configuration
136
+ mappings = {
137
+ "_source": {"excludes": [self.vector_col_name], "recovery_source_excludes": [self.vector_col_name]},
138
+ "properties": {
139
+ # self.id_col_name: {"type": "integer", "store": True},
140
+ self.label_col_name: {"type": "keyword"},
141
+ self.vector_col_name: vector_field_config,
142
+ },
143
+ }
144
+ log.info("Using standard mappings with _source configuration for non-s3vector engines")
108
145
  try:
109
146
  log.info(f"Creating index with settings: {settings}")
110
147
  log.info(f"Creating index with mappings: {mappings}")
148
+
149
+ # Additional logging for s3vector to confirm method config before sending
150
+ if self.case_config.engine == AWSOS_Engine.s3vector:
151
+ method_in_mappings = mappings["properties"][self.vector_col_name]["method"]
152
+ log.info(f"Final method config being sent to OpenSearch: {method_in_mappings}")
153
+
111
154
  client.indices.create(
112
155
  index=self.index_name,
113
156
  body={"settings": settings, "mappings": mappings},
114
157
  )
158
+
159
+ # For s3vector, verify the actual index configuration after creation
160
+ if self.case_config.engine == AWSOS_Engine.s3vector:
161
+ try:
162
+ actual_mapping = client.indices.get_mapping(index=self.index_name)
163
+ actual_method = actual_mapping[self.index_name]["mappings"]["properties"][self.vector_col_name][
164
+ "method"
165
+ ]
166
+ log.info(f"Actual method config in created index: {actual_method}")
167
+
168
+ except Exception as e:
169
+ log.warning(f"Failed to verify index configuration: {e}")
170
+
115
171
  except Exception as e:
116
172
  log.warning(f"Failed to create index: {self.index_name} error: {e!s}")
117
173
  raise e from None
@@ -153,12 +209,12 @@ class AWSOpenSearch(VectorDB):
153
209
  insert_data = []
154
210
  for i in range(len(embeddings)):
155
211
  index_data = {"index": {"_index": self.index_name, self.id_col_name: metadata[i]}}
156
- if self.with_scalar_labels and self.case_config.use_routing:
212
+ if self.with_scalar_labels and self.case_config.use_routing and labels_data is not None:
157
213
  index_data["routing"] = labels_data[i]
158
214
  insert_data.append(index_data)
159
215
 
160
216
  other_data = {self.vector_col_name: embeddings[i]}
161
- if self.with_scalar_labels:
217
+ if self.with_scalar_labels and labels_data is not None:
162
218
  other_data[self.label_col_name] = labels_data[i]
163
219
  insert_data.append(other_data)
164
220
 
@@ -168,7 +224,7 @@ class AWSOpenSearch(VectorDB):
168
224
  except Exception as e:
169
225
  log.warning(f"Failed to insert data: {self.index_name} error: {e!s}")
170
226
  time.sleep(10)
171
- return self._insert_with_single_client(embeddings, metadata)
227
+ return self._insert_with_single_client(embeddings, metadata, labels_data)
172
228
 
173
229
  def _insert_with_multiple_clients(
174
230
  self,
@@ -186,7 +242,8 @@ class AWSOpenSearch(VectorDB):
186
242
 
187
243
  for i in range(0, len(embeddings_list), chunk_size):
188
244
  end = min(i + chunk_size, len(embeddings_list))
189
- chunks.append((embeddings_list[i:end], metadata[i:end], labels_data[i:end]))
245
+ chunk_labels = labels_data[i:end] if labels_data is not None else None
246
+ chunks.append((embeddings_list[i:end], metadata[i:end], chunk_labels))
190
247
 
191
248
  clients = []
192
249
  for _ in range(min(num_clients, len(chunks))):
@@ -202,12 +259,12 @@ class AWSOpenSearch(VectorDB):
202
259
  insert_data = []
203
260
  for i in range(len(chunk_embeddings)):
204
261
  index_data = {"index": {"_index": self.index_name, self.id_col_name: chunk_metadata[i]}}
205
- if self.with_scalar_labels and self.case_config.use_routing:
262
+ if self.with_scalar_labels and self.case_config.use_routing and chunk_labels_data is not None:
206
263
  index_data["routing"] = chunk_labels_data[i]
207
264
  insert_data.append(index_data)
208
265
 
209
266
  other_data = {self.vector_col_name: chunk_embeddings[i]}
210
- if self.with_scalar_labels:
267
+ if self.with_scalar_labels and chunk_labels_data is not None:
211
268
  other_data[self.label_col_name] = chunk_labels_data[i]
212
269
  insert_data.append(other_data)
213
270
 
@@ -254,10 +311,7 @@ class AWSOpenSearch(VectorDB):
254
311
  return (total_count, None)
255
312
 
256
313
  def _update_ef_search_before_search(self, client: OpenSearch):
257
- ef_search_value = (
258
- self.case_config.ef_search if self.case_config.ef_search is not None else self.case_config.efSearch
259
- )
260
-
314
+ ef_search_value = self.case_config.ef_search
261
315
  try:
262
316
  index_settings = client.indices.get_settings(index=self.index_name)
263
317
  current_ef_search = (
@@ -297,23 +351,33 @@ class AWSOpenSearch(VectorDB):
297
351
  """
298
352
  assert self.client is not None, "should self.init() first"
299
353
 
354
+ # Configure query based on engine type
355
+ if self.case_config.engine == AWSOS_Engine.s3vector:
356
+ # For s3vector engine, use simplified query without method_parameters
357
+ knn_query = {
358
+ "vector": query,
359
+ "k": k,
360
+ **({"filter": self.filter} if self.filter else {}),
361
+ }
362
+ log.debug("Using simplified knn query for s3vector engine (no method_parameters)")
363
+ else:
364
+ # For other engines (faiss, lucene), use standard query with method_parameters
365
+ knn_query = {
366
+ "vector": query,
367
+ "k": k,
368
+ "method_parameters": self.case_config.search_param(),
369
+ **({"filter": self.filter} if self.filter else {}),
370
+ **(
371
+ {"rescore": {"oversample_factor": self.case_config.oversample_factor}}
372
+ if self.case_config.use_quant
373
+ else {}
374
+ ),
375
+ }
376
+ log.debug("Using standard knn query with method_parameters for non-s3vector engines")
377
+
300
378
  body = {
301
379
  "size": k,
302
- "query": {
303
- "knn": {
304
- self.vector_col_name: {
305
- "vector": query,
306
- "k": k,
307
- "method_parameters": self.case_config.search_param(),
308
- **({"filter": self.filter} if self.filter else {}),
309
- **(
310
- {"rescore": {"oversample_factor": self.case_config.oversample_factor}}
311
- if self.case_config.use_quant
312
- else {}
313
- ),
314
- }
315
- }
316
- },
380
+ "query": {"knn": {self.vector_col_name: knn_query}},
317
381
  }
318
382
 
319
383
  try:
@@ -46,7 +46,7 @@ class AWSOpenSearchTypedDict(TypedDict):
46
46
  str,
47
47
  click.option(
48
48
  "--engine",
49
- type=click.Choice(["nmslib", "faiss", "lucene"], case_sensitive=False),
49
+ type=click.Choice(["faiss", "lucene", "s3vector"], case_sensitive=False),
50
50
  help="HNSW algorithm implementation to use",
51
51
  default="faiss",
52
52
  ),
@@ -96,24 +96,44 @@ class AWSOpenSearchTypedDict(TypedDict):
96
96
  ),
97
97
  ]
98
98
 
99
- quantization_type: Annotated[
100
- str | None,
99
+ index_thread_qty_during_force_merge: Annotated[
100
+ int,
101
101
  click.option(
102
- "--quantization-type",
103
- type=click.Choice(["fp32", "fp16"]),
104
- help="quantization type for vectors (in index)",
105
- default="fp32",
102
+ "--index-thread-qty-during-force-merge",
103
+ type=int,
104
+ help="Thread count during force merge operations",
105
+ default=8,
106
+ ),
107
+ ]
108
+
109
+ number_of_indexing_clients: Annotated[
110
+ int,
111
+ click.option(
112
+ "--number-of-indexing-clients",
113
+ type=int,
114
+ help="Number of concurrent indexing clients",
115
+ default=1,
116
+ ),
117
+ ]
118
+
119
+ ef_construction: Annotated[
120
+ int | None,
121
+ click.option(
122
+ "--ef-construction",
123
+ type=int,
124
+ help="ef parameter for HNSW construction (not used for s3vector engine)",
125
+ default=None,
106
126
  required=False,
107
127
  ),
108
128
  ]
109
129
 
110
- engine: Annotated[
130
+ quantization_type: Annotated[
111
131
  str | None,
112
132
  click.option(
113
- "--engine",
114
- type=click.Choice(["faiss", "lucene"]),
133
+ "--quantization-type",
134
+ type=click.Choice(["fp32", "fp16"]),
115
135
  help="quantization type for vectors (in index)",
116
- default="faiss",
136
+ default="fp32",
117
137
  required=False,
118
138
  ),
119
139
  ]
@@ -127,6 +147,21 @@ class AWSOpenSearchHNSWTypedDict(CommonTypedDict, AWSOpenSearchTypedDict, HNSWFl
127
147
  def AWSOpenSearch(**parameters: Unpack[AWSOpenSearchHNSWTypedDict]):
128
148
  from .config import AWSOpenSearchConfig, AWSOpenSearchIndexConfig
129
149
 
150
+ # Set default values for HNSW parameters if not provided and not using s3vector
151
+ engine = AWSOS_Engine(parameters["engine"])
152
+ ef_construction = parameters.get("ef_construction")
153
+ ef_search = parameters.get("ef_search")
154
+ m = parameters.get("m")
155
+
156
+ # For non-s3vector engines, provide defaults if None
157
+ if engine != AWSOS_Engine.s3vector:
158
+ if ef_construction is None:
159
+ ef_construction = 200
160
+ if ef_search is None:
161
+ ef_search = 100
162
+ if m is None:
163
+ m = 16
164
+
130
165
  run(
131
166
  db=DB.AWSOpenSearch,
132
167
  db_config=AWSOpenSearchConfig(
@@ -144,12 +179,14 @@ def AWSOpenSearch(**parameters: Unpack[AWSOpenSearchHNSWTypedDict]):
144
179
  force_merge_enabled=parameters["force_merge_enabled"],
145
180
  flush_threshold_size=parameters["flush_threshold_size"],
146
181
  index_thread_qty_during_force_merge=parameters["index_thread_qty_during_force_merge"],
182
+ number_of_indexing_clients=parameters["number_of_indexing_clients"],
147
183
  cb_threshold=parameters["cb_threshold"],
148
- efConstruction=parameters["ef_construction"],
149
- efSearch=parameters["ef_runtime"],
150
- M=parameters["m"],
151
- engine=AWSOS_Engine(parameters["engine"]),
184
+ efConstruction=ef_construction,
185
+ ef_search=ef_search,
186
+ M=m,
187
+ engine=engine,
152
188
  quantization_type=AWSOSQuantization(parameters["quantization_type"]),
189
+ metric_type_name=parameters["metric_type"],
153
190
  ),
154
191
  **parameters,
155
192
  )
@@ -34,6 +34,7 @@ class AWSOpenSearchConfig(DBConfig, BaseModel):
34
34
  class AWSOS_Engine(Enum):
35
35
  faiss = "faiss"
36
36
  lucene = "lucene"
37
+ s3vector = "s3vector"
37
38
 
38
39
 
39
40
  class AWSOSQuantization(Enum):
@@ -44,11 +45,11 @@ class AWSOSQuantization(Enum):
44
45
  class AWSOpenSearchIndexConfig(BaseModel, DBCaseConfig):
45
46
  metric_type: MetricType = MetricType.L2
46
47
  engine: AWSOS_Engine = AWSOS_Engine.faiss
47
- efConstruction: int = 256
48
- efSearch: int = 100
48
+ efConstruction: int | None = 256
49
+ ef_search: int | None = 100
49
50
  engine_name: str | None = None
50
51
  metric_type_name: str | None = None
51
- M: int = 16
52
+ M: int | None = 16
52
53
  index_thread_qty: int | None = 4
53
54
  number_of_shards: int | None = 1
54
55
  number_of_replicas: int | None = 0
@@ -91,6 +92,13 @@ class AWSOpenSearchIndexConfig(BaseModel, DBCaseConfig):
91
92
 
92
93
  def parse_metric(self) -> str:
93
94
  log.info(f"User specified metric_type: {self.metric_type_name}")
95
+
96
+ # Handle None or empty metric_type_name
97
+ if self.metric_type_name is None or self.metric_type_name == "":
98
+ log.info("No metric_type_name specified, defaulting to l2")
99
+ self.metric_type = MetricType.L2
100
+ return "l2"
101
+
94
102
  self.metric_type = MetricType[self.metric_type_name.upper()]
95
103
  if self.metric_type == MetricType.IP:
96
104
  return "innerproduct"
@@ -108,20 +116,28 @@ class AWSOpenSearchIndexConfig(BaseModel, DBCaseConfig):
108
116
  def index_param(self) -> dict:
109
117
  log.info(f"Using engine: {self.engine} for index creation")
110
118
  log.info(f"Using metric_type: {self.metric_type_name} for index creation")
111
- log.info(f"Resulting space_type: {self.parse_metric()} for index creation")
119
+ space_type = self.parse_metric()
120
+ log.info(f"Resulting space_type: {space_type} for index creation")
121
+
122
+ # Handle s3vector engine with simplified configuration
123
+ # For s3vector, space_type should be set at the vector field level, not in method
124
+ if self.engine == AWSOS_Engine.s3vector:
125
+ return {"engine": "s3vector"}
112
126
 
113
127
  parameters = {"ef_construction": self.efConstruction, "m": self.M}
114
128
 
115
- if self.engine == AWSOS_Engine.faiss and self.faiss_use_fp16:
129
+ if self.engine == AWSOS_Engine.faiss and self.quantization_type == AWSOSQuantization.fp16:
116
130
  parameters["encoder"] = {"name": "sq", "parameters": {"type": "fp16"}}
117
131
 
132
+ # For other engines (faiss, lucene), space_type is set at method level
118
133
  return {
119
134
  "name": "hnsw",
120
135
  "engine": self.engine.value,
136
+ "space_type": space_type,
121
137
  "parameters": {
122
138
  "ef_construction": self.efConstruction,
123
139
  "m": self.M,
124
- "ef_search": self.efSearch,
140
+ "ef_search": self.ef_search,
125
141
  **(
126
142
  {"encoder": {"name": "sq", "parameters": {"type": self.quantization_type.fp16.value}}}
127
143
  if self.use_quant
@@ -131,4 +147,8 @@ class AWSOpenSearchIndexConfig(BaseModel, DBCaseConfig):
131
147
  }
132
148
 
133
149
  def search_param(self) -> dict:
134
- return {"ef_search": self.efSearch}
150
+ # s3vector engine doesn't use ef_search parameter
151
+ if self.engine == AWSOS_Engine.s3vector:
152
+ return {}
153
+
154
+ return {"ef_search": self.ef_search}
@@ -0,0 +1,50 @@
1
+ from typing import Annotated, Unpack
2
+
3
+ import click
4
+ from pydantic import SecretStr
5
+
6
+ from vectordb_bench.backend.clients import DB
7
+ from vectordb_bench.cli.cli import (
8
+ CommonTypedDict,
9
+ HNSWFlavor5,
10
+ cli,
11
+ click_parameter_decorators_from_typed_dict,
12
+ run,
13
+ )
14
+
15
+
16
+ class HologresTypedDict(CommonTypedDict):
17
+ host: Annotated[str, click.option("--host", type=str, help="Hologres host", required=True)]
18
+ user: Annotated[str, click.option("--user", type=str, help="Hologres username", required=True)]
19
+ password: Annotated[str, click.option("--password", type=str, help="Hologres password", required=True)]
20
+ database: Annotated[str, click.option("--database", type=str, help="Hologres database name", required=True)]
21
+ port: Annotated[int, click.option("--port", type=int, help="Hologres port", required=True)]
22
+
23
+
24
+ class HologresHGraphTypedDict(CommonTypedDict, HologresTypedDict, HNSWFlavor5): ...
25
+
26
+
27
+ @cli.command()
28
+ @click_parameter_decorators_from_typed_dict(HologresHGraphTypedDict)
29
+ def HologresHGraph(**parameters: Unpack[HologresHGraphTypedDict]):
30
+ from .config import HologresConfig, HologresIndexConfig
31
+
32
+ run(
33
+ db=DB.Hologres,
34
+ db_config=HologresConfig(
35
+ db_label=parameters["db_label"],
36
+ user_name=SecretStr(parameters["user"]),
37
+ password=SecretStr(parameters["password"]),
38
+ host=parameters["host"],
39
+ port=parameters["port"],
40
+ db_name=parameters["database"],
41
+ ),
42
+ db_case_config=HologresIndexConfig(
43
+ index=parameters["index_type"],
44
+ max_degree=parameters["m"],
45
+ ef_construction=parameters["ef_construction"],
46
+ ef_search=parameters["ef_search"],
47
+ use_reorder=parameters["use_reorder"],
48
+ ),
49
+ **parameters,
50
+ )
@@ -0,0 +1,121 @@
1
+ from pydantic import BaseModel, SecretStr
2
+
3
+ from ..api import DBCaseConfig, DBConfig, IndexType, MetricType
4
+
5
+
6
+ class HologresConfig(DBConfig):
7
+ user_name: SecretStr = SecretStr("hologres")
8
+ password: SecretStr
9
+ host: str = "localhost"
10
+ port: int = 5432
11
+ db_name: str
12
+
13
+ def to_dict(self) -> dict:
14
+ user_str = self.user_name.get_secret_value()
15
+ pwd_str = self.password.get_secret_value()
16
+ return {
17
+ "host": self.host,
18
+ "port": self.port,
19
+ "dbname": self.db_name,
20
+ "user": user_str,
21
+ "password": pwd_str,
22
+ }
23
+
24
+
25
+ class HologresIndexConfig(BaseModel, DBCaseConfig):
26
+ index: IndexType = IndexType.Hologres_HGraph
27
+ metric_type: MetricType | None = None
28
+
29
+ create_index_before_load: bool = False
30
+ create_index_after_load: bool = True
31
+
32
+ min_flush_proxima_row_count: int = 1000
33
+ min_compaction_proxima_row_count: int = 1000
34
+ max_total_size_to_merge_mb: int = 4096
35
+ full_compact_max_file_size_mb: int = 4096
36
+
37
+ base_quantization_type: str = "sq8_uniform"
38
+ precise_quantization_type: str = "fp32"
39
+ use_reorder: bool = True
40
+ build_thread_count: int = 16
41
+ max_degree: int = 64
42
+ ef_construction: int = 400
43
+
44
+ ef_search: int = 51
45
+
46
+ def index_param(self) -> dict:
47
+ return {
48
+ "algorithm": self.algorithm(),
49
+ "distance_method": self.distance_method(),
50
+ "builder_params": self.builder_params(),
51
+ "full_compact_max_file_size_mb": self.full_compact_max_file_size_mb,
52
+ }
53
+
54
+ def search_param(self) -> dict:
55
+ return {
56
+ "distance_function": self.distance_function(),
57
+ "order_direction": self.order_direction(),
58
+ "searcher_params": self.search_params(),
59
+ }
60
+
61
+ def algorithm(self) -> str:
62
+ return self.index.value
63
+
64
+ def is_proxima(self) -> bool:
65
+ return self.index == IndexType.Hologres_Graph
66
+
67
+ def distance_method(self) -> str:
68
+ if self.metric_type == MetricType.L2:
69
+ if self.index == IndexType.Hologres_Graph:
70
+ return "SquaredEuclidean"
71
+ return "Euclidean"
72
+ if self.metric_type == MetricType.IP:
73
+ return "InnerProduct"
74
+ if self.metric_type == MetricType.COSINE:
75
+ if self.index == IndexType.Hologres_Graph:
76
+ return "InnerProduct"
77
+ return "Cosine"
78
+ return "Euclidean"
79
+
80
+ def distance_function(self) -> str:
81
+ if self.metric_type == MetricType.L2:
82
+ if self.index == IndexType.Hologres_Graph:
83
+ return "approx_squared_euclidean_distance"
84
+ return "approx_euclidean_distance"
85
+ if self.metric_type == MetricType.IP:
86
+ return "approx_inner_product_distance"
87
+ if self.metric_type == MetricType.COSINE:
88
+ if self.index == IndexType.Hologres_Graph:
89
+ return "approx_inner_product_distance"
90
+ return "approx_cosine_distance"
91
+ return "approx_euclidean_distance"
92
+
93
+ def order_direction(self) -> str:
94
+ if self.metric_type == MetricType.L2:
95
+ return "ASC"
96
+ if self.metric_type in {MetricType.IP, MetricType.COSINE}:
97
+ return "DESC"
98
+ return "ASC"
99
+
100
+ def builder_params(self) -> dict:
101
+ if self.use_reorder:
102
+ self.base_quantization_type = "sq8_uniform"
103
+ else:
104
+ self.base_quantization_type = "fp32"
105
+
106
+ return {
107
+ "min_flush_proxima_row_count": self.min_flush_proxima_row_count,
108
+ "min_compaction_proxima_row_count": self.min_compaction_proxima_row_count,
109
+ "max_total_size_to_merge_mb": self.max_total_size_to_merge_mb,
110
+ "build_thread_count": self.build_thread_count,
111
+ "base_quantization_type": self.base_quantization_type,
112
+ "max_degree": self.max_degree,
113
+ "ef_construction": self.ef_construction,
114
+ "precise_quantization_type": self.precise_quantization_type,
115
+ "use_reorder": self.use_reorder,
116
+ }
117
+
118
+ def searcher_params(self) -> dict:
119
+ return {
120
+ "ef_search": self.ef_search,
121
+ }