vectordb-bench 0.0.6__py3-none-any.whl → 0.0.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (30) hide show
  1. vectordb_bench/backend/clients/__init__.py +4 -4
  2. vectordb_bench/backend/clients/api.py +1 -0
  3. vectordb_bench/backend/clients/chroma/chroma.py +2 -14
  4. vectordb_bench/backend/clients/milvus/config.py +19 -0
  5. vectordb_bench/backend/clients/pgvecto_rs/config.py +44 -32
  6. vectordb_bench/backend/clients/pgvecto_rs/pgvecto_rs.py +16 -16
  7. vectordb_bench/backend/clients/pgvector/config.py +63 -12
  8. vectordb_bench/backend/clients/pgvector/pgvector.py +105 -77
  9. vectordb_bench/backend/clients/qdrant_cloud/config.py +19 -6
  10. vectordb_bench/backend/clients/qdrant_cloud/qdrant_cloud.py +11 -7
  11. vectordb_bench/backend/clients/zilliz_cloud/config.py +4 -0
  12. vectordb_bench/backend/data_source.py +13 -64
  13. vectordb_bench/backend/dataset.py +45 -67
  14. vectordb_bench/backend/runner/serial_runner.py +1 -1
  15. vectordb_bench/backend/task_runner.py +2 -2
  16. vectordb_bench/backend/utils.py +30 -0
  17. vectordb_bench/frontend/components/run_test/caseSelector.py +1 -1
  18. vectordb_bench/frontend/const/dbCaseConfigs.py +41 -77
  19. vectordb_bench/models.py +1 -0
  20. vectordb_bench/results/PgVector/result_20230727_standard_pgvector.json +8 -0
  21. vectordb_bench/results/PgVector/result_20230808_standard_pgvector.json +9 -3
  22. vectordb_bench/results/ZillizCloud/{result_20240105_beta_202401_zillizcloud.json → result_20240105_standard_202401_zillizcloud.json} +365 -41
  23. vectordb_bench/results/getLeaderboardData.py +1 -1
  24. vectordb_bench/results/leaderboard.json +1 -1
  25. {vectordb_bench-0.0.6.dist-info → vectordb_bench-0.0.8.dist-info}/METADATA +15 -2
  26. {vectordb_bench-0.0.6.dist-info → vectordb_bench-0.0.8.dist-info}/RECORD +30 -30
  27. {vectordb_bench-0.0.6.dist-info → vectordb_bench-0.0.8.dist-info}/WHEEL +1 -1
  28. {vectordb_bench-0.0.6.dist-info → vectordb_bench-0.0.8.dist-info}/LICENSE +0 -0
  29. {vectordb_bench-0.0.6.dist-info → vectordb_bench-0.0.8.dist-info}/entry_points.txt +0 -0
  30. {vectordb_bench-0.0.6.dist-info → vectordb_bench-0.0.8.dist-info}/top_level.txt +0 -0
@@ -22,7 +22,7 @@ from .data_source import DatasetSource, DatasetReader
22
22
  log = logging.getLogger(__name__)
23
23
 
24
24
 
25
- SizeLabel = namedtuple('SizeLabel', ['size', 'label', 'files'])
25
+ SizeLabel = namedtuple('SizeLabel', ['size', 'label', 'file_count'])
26
26
 
27
27
 
28
28
  class BaseDataset(BaseModel):
@@ -31,6 +31,7 @@ class BaseDataset(BaseModel):
31
31
  dim: int
32
32
  metric_type: MetricType
33
33
  use_shuffled: bool
34
+ with_gt: bool = False
34
35
  _size_label: dict[int, SizeLabel] = PrivateAttr()
35
36
 
36
37
  @validator("size")
@@ -48,34 +49,8 @@ class BaseDataset(BaseModel):
48
49
  return f"{self.name}_{self.label}_{utils.numerize(self.size)}".lower()
49
50
 
50
51
  @property
51
- def files(self) -> str:
52
- return self._size_label.get(self.size).files
53
-
54
-
55
- def get_files(train_count: int, use_shuffled: bool, with_gt: bool = True) -> list[str]:
56
- prefix = "shuffle_train" if use_shuffled else "train"
57
- middle = f"of-{train_count}"
58
- surfix = "parquet"
59
-
60
- train_files = []
61
- if train_count > 1:
62
- just_size = len(str(train_count))
63
- for i in range(train_count):
64
- sub_file = f"{prefix}-{str(i).rjust(just_size, '0')}-{middle}.{surfix}"
65
- train_files.append(sub_file)
66
- else:
67
- train_files.append(f"{prefix}.{surfix}")
68
-
69
- files = ['test.parquet']
70
- if with_gt:
71
- files.extend([
72
- 'neighbors.parquet',
73
- 'neighbors_tail_1p.parquet',
74
- 'neighbors_head_1p.parquet',
75
- ])
76
-
77
- files.extend(train_files)
78
- return files
52
+ def file_count(self) -> int:
53
+ return self._size_label.get(self.size).file_count
79
54
 
80
55
 
81
56
  class LAION(BaseDataset):
@@ -83,8 +58,9 @@ class LAION(BaseDataset):
83
58
  dim: int = 768
84
59
  metric_type: MetricType = MetricType.L2
85
60
  use_shuffled: bool = False
61
+ with_gt: bool = True
86
62
  _size_label: dict = {
87
- 100_000_000: SizeLabel(100_000_000, "LARGE", get_files(100, False)),
63
+ 100_000_000: SizeLabel(100_000_000, "LARGE", 100),
88
64
  }
89
65
 
90
66
 
@@ -94,8 +70,8 @@ class GIST(BaseDataset):
94
70
  metric_type: MetricType = MetricType.L2
95
71
  use_shuffled: bool = False
96
72
  _size_label: dict = {
97
- 100_000: SizeLabel(100_000, "SMALL", get_files(1, False, False)),
98
- 1_000_000: SizeLabel(1_000_000, "MEDIUM", get_files(1, False, False)),
73
+ 100_000: SizeLabel(100_000, "SMALL", 1),
74
+ 1_000_000: SizeLabel(1_000_000, "MEDIUM", 1),
99
75
  }
100
76
 
101
77
 
@@ -104,10 +80,11 @@ class Cohere(BaseDataset):
104
80
  dim: int = 768
105
81
  metric_type: MetricType = MetricType.COSINE
106
82
  use_shuffled: bool = config.USE_SHUFFLED_DATA
83
+ with_gt: bool = True,
107
84
  _size_label: dict = {
108
- 100_000: SizeLabel(100_000, "SMALL", get_files(1, config.USE_SHUFFLED_DATA)),
109
- 1_000_000: SizeLabel(1_000_000, "MEDIUM", get_files(1, config.USE_SHUFFLED_DATA)),
110
- 10_000_000: SizeLabel(10_000_000, "LARGE", get_files(10, config.USE_SHUFFLED_DATA)),
85
+ 100_000: SizeLabel(100_000, "SMALL", 1),
86
+ 1_000_000: SizeLabel(1_000_000, "MEDIUM", 1),
87
+ 10_000_000: SizeLabel(10_000_000, "LARGE", 10),
111
88
  }
112
89
 
113
90
 
@@ -116,7 +93,7 @@ class Glove(BaseDataset):
116
93
  dim: int = 200
117
94
  metric_type: MetricType = MetricType.COSINE
118
95
  use_shuffled: bool = False
119
- _size_label: dict = {1_000_000: SizeLabel(1_000_000, "MEDIUM", get_files(1, False, False))}
96
+ _size_label: dict = {1_000_000: SizeLabel(1_000_000, "MEDIUM", 1)}
120
97
 
121
98
 
122
99
  class SIFT(BaseDataset):
@@ -125,9 +102,9 @@ class SIFT(BaseDataset):
125
102
  metric_type: MetricType = MetricType.L2
126
103
  use_shuffled: bool = False
127
104
  _size_label: dict = {
128
- 500_000: SizeLabel(500_000, "SMALL", get_files(1, False, False)),
129
- 5_000_000: SizeLabel(5_000_000, "MEDIUM", get_files(1, False, False)),
130
- # 50_000_000: SizeLabel(50_000_000, "LARGE", get_files(50, False, False)),
105
+ 500_000: SizeLabel(500_000, "SMALL", 1,),
106
+ 5_000_000: SizeLabel(5_000_000, "MEDIUM", 1),
107
+ # 50_000_000: SizeLabel(50_000_000, "LARGE", 50),
131
108
  }
132
109
 
133
110
 
@@ -136,10 +113,11 @@ class OpenAI(BaseDataset):
136
113
  dim: int = 1536
137
114
  metric_type: MetricType = MetricType.COSINE
138
115
  use_shuffled: bool = config.USE_SHUFFLED_DATA
116
+ with_gt: bool = True,
139
117
  _size_label: dict = {
140
- 50_000: SizeLabel(50_000, "SMALL", get_files(1, config.USE_SHUFFLED_DATA)),
141
- 500_000: SizeLabel(500_000, "MEDIUM", get_files(1, config.USE_SHUFFLED_DATA)),
142
- 5_000_000: SizeLabel(5_000_000, "LARGE", get_files(10, config.USE_SHUFFLED_DATA)),
118
+ 50_000: SizeLabel(50_000, "SMALL", 1),
119
+ 500_000: SizeLabel(500_000, "MEDIUM", 1),
120
+ 5_000_000: SizeLabel(5_000_000, "LARGE", 10),
143
121
  }
144
122
 
145
123
 
@@ -155,6 +133,7 @@ class DatasetManager(BaseModel):
155
133
  """
156
134
  data: BaseDataset
157
135
  test_data: pd.DataFrame | None = None
136
+ gt_data: pd.DataFrame | None = None
158
137
  train_files : list[str] = []
159
138
  reader: DatasetReader | None = None
160
139
 
@@ -180,49 +159,48 @@ class DatasetManager(BaseModel):
180
159
  def __iter__(self):
181
160
  return DataSetIterator(self)
182
161
 
183
- def prepare(self, source: DatasetSource=DatasetSource.S3, check: bool=True) -> bool:
162
+ # TODO passing use_shuffle from outside
163
+ def prepare(self,
164
+ source: DatasetSource=DatasetSource.S3,
165
+ filters: int | float | str | None = None,
166
+ ) -> bool:
184
167
  """Download the dataset from DatasetSource
185
168
  url = f"{source}/{self.data.dir_name}"
186
169
 
187
- download files from url to self.data_dir, there'll be 4 types of files in the data_dir
188
- - train*.parquet: for training
189
- - test.parquet: for testing
190
- - neighbors.parquet: ground_truth of the test.parquet
191
- - neighbors_head_1p.parquet: ground_truth of the test.parquet after filtering 1% data
192
- - neighbors_99p.parquet: ground_truth of the test.parquet after filtering 99% data
193
-
194
170
  Args:
195
171
  source(DatasetSource): S3 or AliyunOSS, default as S3
196
- check(bool): Whether to do etags check
172
+ filters(Optional[int | float | str]): combined with dataset's with_gt to
173
+ compose the correct ground_truth file
197
174
 
198
175
  Returns:
199
176
  bool: whether the dataset is successfully prepared
200
177
 
201
178
  """
179
+ file_count, use_shuffled = self.data.file_count, self.data.use_shuffled
180
+
181
+ train_files = utils.compose_train_files(file_count, use_shuffled)
182
+ all_files = train_files
183
+
184
+ gt_file, test_file = None, None
185
+ if self.data.with_gt:
186
+ gt_file, test_file = utils.compose_gt_file(filters), "test.parquet"
187
+ all_files.extend([gt_file, test_file])
188
+
202
189
  source.reader().read(
203
190
  dataset=self.data.dir_name.lower(),
204
- files=self.data.files,
191
+ files=all_files,
205
192
  local_ds_root=self.data_dir,
206
193
  )
207
194
 
208
- prefix = "shuffle_train" if self.data.use_shuffled else "train"
195
+ if gt_file is not None and test_file is not None:
196
+ self.test_data = self._read_file(test_file)
197
+ self.gt_data = self._read_file(gt_file)
198
+
199
+ prefix = "shuffle_train" if use_shuffled else "train"
209
200
  self.train_files = sorted([f.name for f in self.data_dir.glob(f'{prefix}*.parquet')])
210
201
  log.debug(f"{self.data.name}: available train files {self.train_files}")
211
- self.test_data = self._read_file("test.parquet")
212
- return True
213
202
 
214
- def get_ground_truth(self, filters: int | float | None = None) -> pd.DataFrame:
215
-
216
- file_name = ""
217
- if filters is None:
218
- file_name = "neighbors.parquet"
219
- elif filters == 0.01:
220
- file_name = "neighbors_head_1p.parquet"
221
- elif filters == 0.99:
222
- file_name = "neighbors_tail_1p.parquet"
223
- else:
224
- raise ValueError(f"Filters not supported: {filters}")
225
- return self._read_file(file_name)
203
+ return True
226
204
 
227
205
  def _read_file(self, file_name: str) -> pd.DataFrame:
228
206
  """read one file from disk into memory"""
@@ -40,7 +40,7 @@ class SerialInsertRunner:
40
40
  emb_np = np.stack(data_df['emb'])
41
41
  if self.normalize:
42
42
  log.debug("normalize the 100k train data")
43
- all_embeddings = emb_np / np.linalg.norm(emb_np, axis=1)[:, np.newaxis].tolist()
43
+ all_embeddings = (emb_np / np.linalg.norm(emb_np, axis=1)[:, np.newaxis]).tolist()
44
44
  else:
45
45
  all_embeddings = emb_np.tolist()
46
46
  del(emb_np)
@@ -84,7 +84,7 @@ class CaseRunner(BaseModel):
84
84
  def _pre_run(self, drop_old: bool = True):
85
85
  try:
86
86
  self.init_db(drop_old)
87
- self.ca.dataset.prepare(self.dataset_source)
87
+ self.ca.dataset.prepare(self.dataset_source, filters=self.ca.filter_rate)
88
88
  except ModuleNotFoundError as e:
89
89
  log.warning(f"pre run case error: please install client for db: {self.config.db}, error={e}")
90
90
  raise e from None
@@ -215,7 +215,7 @@ class CaseRunner(BaseModel):
215
215
  test_emb = test_emb / np.linalg.norm(test_emb, axis=1)[:, np.newaxis]
216
216
  self.test_emb = test_emb.tolist()
217
217
 
218
- gt_df = self.ca.dataset.get_ground_truth(self.ca.filter_rate)
218
+ gt_df = self.ca.dataset.gt_data
219
219
 
220
220
  self.serial_search_runner = SerialSearchRunner(
221
221
  db=self.db,
@@ -42,3 +42,33 @@ def time_it(func):
42
42
  delta = time.perf_counter() - pref
43
43
  return result, delta
44
44
  return inner
45
+
46
+
47
+ def compose_train_files(train_count: int, use_shuffled: bool) -> list[str]:
48
+ prefix = "shuffle_train" if use_shuffled else "train"
49
+ middle = f"of-{train_count}"
50
+ surfix = "parquet"
51
+
52
+ train_files = []
53
+ if train_count > 1:
54
+ just_size = 2
55
+ for i in range(train_count):
56
+ sub_file = f"{prefix}-{str(i).rjust(just_size, '0')}-{middle}.{surfix}"
57
+ train_files.append(sub_file)
58
+ else:
59
+ train_files.append(f"{prefix}.{surfix}")
60
+
61
+ return train_files
62
+
63
+
64
+ def compose_gt_file(filters: int | float | str | None = None) -> str:
65
+ if filters is None:
66
+ return "neighbors.parquet"
67
+
68
+ if filters == 0.01:
69
+ return "neighbors_head_1p.parquet"
70
+
71
+ if filters == 0.99:
72
+ return "neighbors_tail_1p.parquet"
73
+
74
+ raise ValueError(f"Filters not supported: {filters}")
@@ -59,7 +59,7 @@ def caseConfigSetting(st, allCaseConfigs, case, activedDbList):
59
59
  )
60
60
  caseConfig = allCaseConfigs[db][case]
61
61
  k = 0
62
- for config in CASE_CONFIG_MAP.get(db, {}).get(case, []):
62
+ for config in CASE_CONFIG_MAP.get(db, {}).get(case.case_cls().label, []):
63
63
  if config.isDisplayed(caseConfig):
64
64
  column = columns[1 + k % CASE_CONFIG_SETTING_COLUMNS]
65
65
  key = "%s-%s-%s" % (db, case, config.label.value)
@@ -1,7 +1,7 @@
1
1
  from enum import IntEnum
2
2
  import typing
3
3
  from pydantic import BaseModel
4
- from vectordb_bench.backend.cases import CaseType
4
+ from vectordb_bench.backend.cases import CaseLabel, CaseType
5
5
  from vectordb_bench.backend.clients import DB
6
6
  from vectordb_bench.backend.clients.api import IndexType
7
7
 
@@ -60,6 +60,7 @@ CaseConfigParamInput_IndexType = CaseConfigInput(
60
60
  "options": [
61
61
  IndexType.HNSW.value,
62
62
  IndexType.IVFFlat.value,
63
+ IndexType.IVFSQ8.value,
63
64
  IndexType.DISKANN.value,
64
65
  IndexType.Flat.value,
65
66
  IndexType.AUTOINDEX.value,
@@ -197,6 +198,7 @@ CaseConfigParamInput_Nlist = CaseConfigInput(
197
198
  isDisplayed=lambda config: config.get(CaseConfigParamType.IndexType, None)
198
199
  in [
199
200
  IndexType.IVFFlat.value,
201
+ IndexType.IVFSQ8.value,
200
202
  IndexType.GPU_IVF_FLAT.value,
201
203
  IndexType.GPU_IVF_PQ.value,
202
204
  ],
@@ -213,6 +215,7 @@ CaseConfigParamInput_Nprobe = CaseConfigInput(
213
215
  isDisplayed=lambda config: config.get(CaseConfigParamType.IndexType, None)
214
216
  in [
215
217
  IndexType.IVFFlat.value,
218
+ IndexType.IVFSQ8.value,
216
219
  IndexType.GPU_IVF_FLAT.value,
217
220
  IndexType.GPU_IVF_PQ.value,
218
221
  ],
@@ -394,6 +397,11 @@ CaseConfigParamInput_QuantizationType_PgVectoRS = CaseConfigInput(
394
397
  inputConfig={
395
398
  "options": ["trivial", "scalar", "product"],
396
399
  },
400
+ isDisplayed=lambda config: config.get(CaseConfigParamType.IndexType, None)
401
+ in [
402
+ IndexType.HNSW.value,
403
+ IndexType.IVFFlat.value,
404
+ ],
397
405
  )
398
406
 
399
407
  CaseConfigParamInput_QuantizationRatio_PgVectoRS = CaseConfigInput(
@@ -403,7 +411,21 @@ CaseConfigParamInput_QuantizationRatio_PgVectoRS = CaseConfigInput(
403
411
  "options": ["x4", "x8", "x16", "x32", "x64"],
404
412
  },
405
413
  isDisplayed=lambda config: config.get(CaseConfigParamType.quantizationType, None)
406
- == "product",
414
+ == "product" and config.get(CaseConfigParamType.IndexType, None)
415
+ in [
416
+ IndexType.HNSW.value,
417
+ IndexType.IVFFlat.value,
418
+ ],
419
+ )
420
+
421
+ CaseConfigParamInput_ZillizLevel = CaseConfigInput(
422
+ label=CaseConfigParamType.level,
423
+ inputType=InputType.Number,
424
+ inputConfig={
425
+ "min": 1,
426
+ "max": 3,
427
+ "value": 1,
428
+ },
407
429
  )
408
430
 
409
431
  MilvusLoadConfig = [
@@ -479,90 +501,32 @@ PgVectoRSPerformanceConfig = [
479
501
  CaseConfigParamInput_QuantizationRatio_PgVectoRS,
480
502
  ]
481
503
 
504
+ ZillizCloudPerformanceConfig = [
505
+ CaseConfigParamInput_ZillizLevel,
506
+ ]
507
+
482
508
  CASE_CONFIG_MAP = {
483
509
  DB.Milvus: {
484
- CaseType.CapacityDim960: MilvusLoadConfig,
485
- CaseType.CapacityDim128: MilvusLoadConfig,
486
- CaseType.Performance768D100M: MilvusPerformanceConfig,
487
- CaseType.Performance768D10M: MilvusPerformanceConfig,
488
- CaseType.Performance768D1M: MilvusPerformanceConfig,
489
- CaseType.Performance768D10M1P: MilvusPerformanceConfig,
490
- CaseType.Performance768D1M1P: MilvusPerformanceConfig,
491
- CaseType.Performance768D10M99P: MilvusPerformanceConfig,
492
- CaseType.Performance768D1M99P: MilvusPerformanceConfig,
493
- CaseType.Performance1536D5M: MilvusPerformanceConfig,
494
- CaseType.Performance1536D500K: MilvusPerformanceConfig,
495
- CaseType.Performance1536D5M1P: MilvusPerformanceConfig,
496
- CaseType.Performance1536D500K1P: MilvusPerformanceConfig,
497
- CaseType.Performance1536D5M99P: MilvusPerformanceConfig,
498
- CaseType.Performance1536D500K99P: MilvusPerformanceConfig,
510
+ CaseLabel.Load: MilvusLoadConfig,
511
+ CaseLabel.Performance: MilvusPerformanceConfig,
512
+ },
513
+ DB.ZillizCloud: {
514
+ CaseLabel.Performance: ZillizCloudPerformanceConfig,
499
515
  },
500
516
  DB.WeaviateCloud: {
501
- CaseType.CapacityDim960: WeaviateLoadConfig,
502
- CaseType.CapacityDim128: WeaviateLoadConfig,
503
- CaseType.Performance768D100M: WeaviatePerformanceConfig,
504
- CaseType.Performance768D10M: WeaviatePerformanceConfig,
505
- CaseType.Performance768D1M: WeaviatePerformanceConfig,
506
- CaseType.Performance768D10M1P: WeaviatePerformanceConfig,
507
- CaseType.Performance768D1M1P: WeaviatePerformanceConfig,
508
- CaseType.Performance768D10M99P: WeaviatePerformanceConfig,
509
- CaseType.Performance768D1M99P: WeaviatePerformanceConfig,
510
- CaseType.Performance1536D5M: WeaviatePerformanceConfig,
511
- CaseType.Performance1536D500K: WeaviatePerformanceConfig,
512
- CaseType.Performance1536D5M1P: WeaviatePerformanceConfig,
513
- CaseType.Performance1536D500K1P: WeaviatePerformanceConfig,
514
- CaseType.Performance1536D5M99P: WeaviatePerformanceConfig,
515
- CaseType.Performance1536D500K99P: WeaviatePerformanceConfig,
517
+ CaseLabel.Load: WeaviateLoadConfig,
518
+ CaseLabel.Performance: WeaviatePerformanceConfig,
516
519
  },
517
520
  DB.ElasticCloud: {
518
- CaseType.CapacityDim960: ESLoadingConfig,
519
- CaseType.CapacityDim128: ESLoadingConfig,
520
- CaseType.Performance768D100M: ESPerformanceConfig,
521
- CaseType.Performance768D10M: ESPerformanceConfig,
522
- CaseType.Performance768D1M: ESPerformanceConfig,
523
- CaseType.Performance768D10M1P: ESPerformanceConfig,
524
- CaseType.Performance768D1M1P: ESPerformanceConfig,
525
- CaseType.Performance768D10M99P: ESPerformanceConfig,
526
- CaseType.Performance768D1M99P: ESPerformanceConfig,
527
- CaseType.Performance1536D5M: ESPerformanceConfig,
528
- CaseType.Performance1536D500K: ESPerformanceConfig,
529
- CaseType.Performance1536D5M1P: ESPerformanceConfig,
530
- CaseType.Performance1536D500K1P: ESPerformanceConfig,
531
- CaseType.Performance1536D5M99P: ESPerformanceConfig,
532
- CaseType.Performance1536D500K99P: ESPerformanceConfig,
521
+ CaseLabel.Load: ESLoadingConfig,
522
+ CaseLabel.Performance: ESPerformanceConfig,
533
523
  },
534
524
  DB.PgVector: {
535
- CaseType.CapacityDim960: PgVectorLoadingConfig,
536
- CaseType.CapacityDim128: PgVectorLoadingConfig,
537
- CaseType.Performance768D100M: PgVectorPerformanceConfig,
538
- CaseType.Performance768D10M: PgVectorPerformanceConfig,
539
- CaseType.Performance768D1M: PgVectorPerformanceConfig,
540
- CaseType.Performance768D10M1P: PgVectorPerformanceConfig,
541
- CaseType.Performance768D1M1P: PgVectorPerformanceConfig,
542
- CaseType.Performance768D10M99P: PgVectorPerformanceConfig,
543
- CaseType.Performance768D1M99P: PgVectorPerformanceConfig,
544
- CaseType.Performance1536D5M: PgVectorPerformanceConfig,
545
- CaseType.Performance1536D500K: PgVectorPerformanceConfig,
546
- CaseType.Performance1536D5M1P: PgVectorPerformanceConfig,
547
- CaseType.Performance1536D500K1P: PgVectorPerformanceConfig,
548
- CaseType.Performance1536D5M99P: PgVectorPerformanceConfig,
549
- CaseType.Performance1536D500K99P: PgVectorPerformanceConfig,
525
+ CaseLabel.Load: PgVectorLoadingConfig,
526
+ CaseLabel.Performance: PgVectorPerformanceConfig,
550
527
  },
551
528
  DB.PgVectoRS: {
552
- CaseType.CapacityDim960: PgVectoRSLoadingConfig,
553
- CaseType.CapacityDim128: PgVectoRSLoadingConfig,
554
- CaseType.Performance768D100M: PgVectoRSPerformanceConfig,
555
- CaseType.Performance768D10M: PgVectoRSPerformanceConfig,
556
- CaseType.Performance768D1M: PgVectoRSPerformanceConfig,
557
- CaseType.Performance768D10M1P: PgVectoRSPerformanceConfig,
558
- CaseType.Performance768D1M1P: PgVectoRSPerformanceConfig,
559
- CaseType.Performance768D10M99P: PgVectoRSPerformanceConfig,
560
- CaseType.Performance768D1M99P: PgVectoRSPerformanceConfig,
561
- CaseType.Performance1536D5M: PgVectoRSPerformanceConfig,
562
- CaseType.Performance1536D500K: PgVectoRSPerformanceConfig,
563
- CaseType.Performance1536D5M1P: PgVectoRSPerformanceConfig,
564
- CaseType.Performance1536D500K1P: PgVectoRSPerformanceConfig,
565
- CaseType.Performance1536D5M99P: PgVectorPerformanceConfig,
566
- CaseType.Performance1536D500K99P: PgVectoRSPerformanceConfig,
529
+ CaseLabel.Load: PgVectoRSLoadingConfig,
530
+ CaseLabel.Performance: PgVectoRSPerformanceConfig,
567
531
  },
568
532
  }
vectordb_bench/models.py CHANGED
@@ -59,6 +59,7 @@ class CaseConfigParamType(Enum):
59
59
  build_algo = "build_algo"
60
60
  cache_dataset_on_device = "cache_dataset_on_device"
61
61
  refine_ratio = "refine_ratio"
62
+ level = "level"
62
63
 
63
64
 
64
65
  class CustomizedCase(BaseModel):
@@ -20,6 +20,7 @@
20
20
  "db_name": "**********"
21
21
  },
22
22
  "db_case_config": {
23
+ "index": "IVF_FLAT",
23
24
  "metric_type": "L2",
24
25
  "lists": 10,
25
26
  "probes": 2
@@ -49,6 +50,7 @@
49
50
  "db_name": "**********"
50
51
  },
51
52
  "db_case_config": {
53
+ "index": "IVF_FLAT",
52
54
  "metric_type": "L2",
53
55
  "lists": 10,
54
56
  "probes": 2
@@ -78,6 +80,7 @@
78
80
  "db_name": "**********"
79
81
  },
80
82
  "db_case_config": {
83
+ "index": "IVF_FLAT",
81
84
  "metric_type": "COSINE",
82
85
  "lists": 10,
83
86
  "probes": 2
@@ -107,6 +110,7 @@
107
110
  "db_name": "**********"
108
111
  },
109
112
  "db_case_config": {
113
+ "index": "IVF_FLAT",
110
114
  "metric_type": "COSINE",
111
115
  "lists": 10,
112
116
  "probes": 2
@@ -136,6 +140,7 @@
136
140
  "db_name": "**********"
137
141
  },
138
142
  "db_case_config": {
143
+ "index": "IVF_FLAT",
139
144
  "metric_type": "COSINE",
140
145
  "lists": 10,
141
146
  "probes": 2
@@ -165,6 +170,7 @@
165
170
  "db_name": "**********"
166
171
  },
167
172
  "db_case_config": {
173
+ "index": "IVF_FLAT",
168
174
  "metric_type": "COSINE",
169
175
  "lists": 10,
170
176
  "probes": 2
@@ -194,6 +200,7 @@
194
200
  "db_name": "**********"
195
201
  },
196
202
  "db_case_config": {
203
+ "index": "IVF_FLAT",
197
204
  "metric_type": "COSINE",
198
205
  "lists": 10,
199
206
  "probes": 2
@@ -223,6 +230,7 @@
223
230
  "db_name": "**********"
224
231
  },
225
232
  "db_case_config": {
233
+ "index": "IVF_FLAT",
226
234
  "metric_type": "COSINE",
227
235
  "lists": 10,
228
236
  "probes": 2
@@ -20,6 +20,7 @@
20
20
  "db_name": "**********"
21
21
  },
22
22
  "db_case_config": {
23
+ "index": "IVF_FLAT",
23
24
  "metric_type": "L2",
24
25
  "lists": 10,
25
26
  "probes": 2
@@ -51,7 +52,8 @@
51
52
  "db_case_config": {
52
53
  "metric_type": "L2",
53
54
  "lists": 10,
54
- "probes": 2
55
+ "probes": 2,
56
+ "index": "IVF_FLAT"
55
57
  },
56
58
  "case_config": {
57
59
  "case_id": 11,
@@ -80,7 +82,8 @@
80
82
  "db_case_config": {
81
83
  "metric_type": "L2",
82
84
  "lists": 10,
83
- "probes": 2
85
+ "probes": 2,
86
+ "index": "IVF_FLAT"
84
87
  },
85
88
  "case_config": {
86
89
  "case_id": 12,
@@ -107,6 +110,7 @@
107
110
  "db_name": "**********"
108
111
  },
109
112
  "db_case_config": {
113
+ "index": "IVF_FLAT",
110
114
  "metric_type": "L2",
111
115
  "lists": 10,
112
116
  "probes": 2
@@ -136,6 +140,7 @@
136
140
  "db_name": "**********"
137
141
  },
138
142
  "db_case_config": {
143
+ "index": "IVF_FLAT",
139
144
  "metric_type": "L2",
140
145
  "lists": 10,
141
146
  "probes": 2
@@ -165,6 +170,7 @@
165
170
  "db_name": "**********"
166
171
  },
167
172
  "db_case_config": {
173
+ "index": "IVF_FLAT",
168
174
  "metric_type": "L2",
169
175
  "lists": 10,
170
176
  "probes": 2
@@ -178,4 +184,4 @@
178
184
  }
179
185
  ],
180
186
  "file_fmt": "result_{}_{}_{}.json"
181
- }
187
+ }