vectordb-bench 0.0.6__py3-none-any.whl → 0.0.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vectordb_bench/backend/clients/__init__.py +4 -4
- vectordb_bench/backend/clients/api.py +1 -0
- vectordb_bench/backend/clients/chroma/chroma.py +2 -14
- vectordb_bench/backend/clients/milvus/config.py +19 -0
- vectordb_bench/backend/clients/pgvecto_rs/config.py +44 -32
- vectordb_bench/backend/clients/pgvecto_rs/pgvecto_rs.py +16 -16
- vectordb_bench/backend/clients/pgvector/config.py +63 -12
- vectordb_bench/backend/clients/pgvector/pgvector.py +105 -77
- vectordb_bench/backend/clients/qdrant_cloud/config.py +19 -6
- vectordb_bench/backend/clients/qdrant_cloud/qdrant_cloud.py +11 -7
- vectordb_bench/backend/clients/zilliz_cloud/config.py +4 -0
- vectordb_bench/backend/data_source.py +13 -64
- vectordb_bench/backend/dataset.py +45 -67
- vectordb_bench/backend/runner/serial_runner.py +1 -1
- vectordb_bench/backend/task_runner.py +2 -2
- vectordb_bench/backend/utils.py +30 -0
- vectordb_bench/frontend/components/run_test/caseSelector.py +1 -1
- vectordb_bench/frontend/const/dbCaseConfigs.py +41 -77
- vectordb_bench/models.py +1 -0
- vectordb_bench/results/PgVector/result_20230727_standard_pgvector.json +8 -0
- vectordb_bench/results/PgVector/result_20230808_standard_pgvector.json +9 -3
- vectordb_bench/results/ZillizCloud/{result_20240105_beta_202401_zillizcloud.json → result_20240105_standard_202401_zillizcloud.json} +365 -41
- vectordb_bench/results/getLeaderboardData.py +1 -1
- vectordb_bench/results/leaderboard.json +1 -1
- {vectordb_bench-0.0.6.dist-info → vectordb_bench-0.0.8.dist-info}/METADATA +15 -2
- {vectordb_bench-0.0.6.dist-info → vectordb_bench-0.0.8.dist-info}/RECORD +30 -30
- {vectordb_bench-0.0.6.dist-info → vectordb_bench-0.0.8.dist-info}/WHEEL +1 -1
- {vectordb_bench-0.0.6.dist-info → vectordb_bench-0.0.8.dist-info}/LICENSE +0 -0
- {vectordb_bench-0.0.6.dist-info → vectordb_bench-0.0.8.dist-info}/entry_points.txt +0 -0
- {vectordb_bench-0.0.6.dist-info → vectordb_bench-0.0.8.dist-info}/top_level.txt +0 -0
@@ -22,7 +22,7 @@ from .data_source import DatasetSource, DatasetReader
|
|
22
22
|
log = logging.getLogger(__name__)
|
23
23
|
|
24
24
|
|
25
|
-
SizeLabel = namedtuple('SizeLabel', ['size', 'label', '
|
25
|
+
SizeLabel = namedtuple('SizeLabel', ['size', 'label', 'file_count'])
|
26
26
|
|
27
27
|
|
28
28
|
class BaseDataset(BaseModel):
|
@@ -31,6 +31,7 @@ class BaseDataset(BaseModel):
|
|
31
31
|
dim: int
|
32
32
|
metric_type: MetricType
|
33
33
|
use_shuffled: bool
|
34
|
+
with_gt: bool = False
|
34
35
|
_size_label: dict[int, SizeLabel] = PrivateAttr()
|
35
36
|
|
36
37
|
@validator("size")
|
@@ -48,34 +49,8 @@ class BaseDataset(BaseModel):
|
|
48
49
|
return f"{self.name}_{self.label}_{utils.numerize(self.size)}".lower()
|
49
50
|
|
50
51
|
@property
|
51
|
-
def
|
52
|
-
return self._size_label.get(self.size).
|
53
|
-
|
54
|
-
|
55
|
-
def get_files(train_count: int, use_shuffled: bool, with_gt: bool = True) -> list[str]:
|
56
|
-
prefix = "shuffle_train" if use_shuffled else "train"
|
57
|
-
middle = f"of-{train_count}"
|
58
|
-
surfix = "parquet"
|
59
|
-
|
60
|
-
train_files = []
|
61
|
-
if train_count > 1:
|
62
|
-
just_size = len(str(train_count))
|
63
|
-
for i in range(train_count):
|
64
|
-
sub_file = f"{prefix}-{str(i).rjust(just_size, '0')}-{middle}.{surfix}"
|
65
|
-
train_files.append(sub_file)
|
66
|
-
else:
|
67
|
-
train_files.append(f"{prefix}.{surfix}")
|
68
|
-
|
69
|
-
files = ['test.parquet']
|
70
|
-
if with_gt:
|
71
|
-
files.extend([
|
72
|
-
'neighbors.parquet',
|
73
|
-
'neighbors_tail_1p.parquet',
|
74
|
-
'neighbors_head_1p.parquet',
|
75
|
-
])
|
76
|
-
|
77
|
-
files.extend(train_files)
|
78
|
-
return files
|
52
|
+
def file_count(self) -> int:
|
53
|
+
return self._size_label.get(self.size).file_count
|
79
54
|
|
80
55
|
|
81
56
|
class LAION(BaseDataset):
|
@@ -83,8 +58,9 @@ class LAION(BaseDataset):
|
|
83
58
|
dim: int = 768
|
84
59
|
metric_type: MetricType = MetricType.L2
|
85
60
|
use_shuffled: bool = False
|
61
|
+
with_gt: bool = True
|
86
62
|
_size_label: dict = {
|
87
|
-
100_000_000: SizeLabel(100_000_000, "LARGE",
|
63
|
+
100_000_000: SizeLabel(100_000_000, "LARGE", 100),
|
88
64
|
}
|
89
65
|
|
90
66
|
|
@@ -94,8 +70,8 @@ class GIST(BaseDataset):
|
|
94
70
|
metric_type: MetricType = MetricType.L2
|
95
71
|
use_shuffled: bool = False
|
96
72
|
_size_label: dict = {
|
97
|
-
100_000: SizeLabel(100_000, "SMALL",
|
98
|
-
1_000_000: SizeLabel(1_000_000, "MEDIUM",
|
73
|
+
100_000: SizeLabel(100_000, "SMALL", 1),
|
74
|
+
1_000_000: SizeLabel(1_000_000, "MEDIUM", 1),
|
99
75
|
}
|
100
76
|
|
101
77
|
|
@@ -104,10 +80,11 @@ class Cohere(BaseDataset):
|
|
104
80
|
dim: int = 768
|
105
81
|
metric_type: MetricType = MetricType.COSINE
|
106
82
|
use_shuffled: bool = config.USE_SHUFFLED_DATA
|
83
|
+
with_gt: bool = True,
|
107
84
|
_size_label: dict = {
|
108
|
-
100_000: SizeLabel(100_000, "SMALL",
|
109
|
-
1_000_000: SizeLabel(1_000_000, "MEDIUM",
|
110
|
-
10_000_000: SizeLabel(10_000_000, "LARGE",
|
85
|
+
100_000: SizeLabel(100_000, "SMALL", 1),
|
86
|
+
1_000_000: SizeLabel(1_000_000, "MEDIUM", 1),
|
87
|
+
10_000_000: SizeLabel(10_000_000, "LARGE", 10),
|
111
88
|
}
|
112
89
|
|
113
90
|
|
@@ -116,7 +93,7 @@ class Glove(BaseDataset):
|
|
116
93
|
dim: int = 200
|
117
94
|
metric_type: MetricType = MetricType.COSINE
|
118
95
|
use_shuffled: bool = False
|
119
|
-
_size_label: dict = {1_000_000: SizeLabel(1_000_000, "MEDIUM",
|
96
|
+
_size_label: dict = {1_000_000: SizeLabel(1_000_000, "MEDIUM", 1)}
|
120
97
|
|
121
98
|
|
122
99
|
class SIFT(BaseDataset):
|
@@ -125,9 +102,9 @@ class SIFT(BaseDataset):
|
|
125
102
|
metric_type: MetricType = MetricType.L2
|
126
103
|
use_shuffled: bool = False
|
127
104
|
_size_label: dict = {
|
128
|
-
500_000: SizeLabel(500_000, "SMALL",
|
129
|
-
5_000_000: SizeLabel(5_000_000, "MEDIUM",
|
130
|
-
# 50_000_000: SizeLabel(50_000_000, "LARGE",
|
105
|
+
500_000: SizeLabel(500_000, "SMALL", 1,),
|
106
|
+
5_000_000: SizeLabel(5_000_000, "MEDIUM", 1),
|
107
|
+
# 50_000_000: SizeLabel(50_000_000, "LARGE", 50),
|
131
108
|
}
|
132
109
|
|
133
110
|
|
@@ -136,10 +113,11 @@ class OpenAI(BaseDataset):
|
|
136
113
|
dim: int = 1536
|
137
114
|
metric_type: MetricType = MetricType.COSINE
|
138
115
|
use_shuffled: bool = config.USE_SHUFFLED_DATA
|
116
|
+
with_gt: bool = True,
|
139
117
|
_size_label: dict = {
|
140
|
-
50_000: SizeLabel(50_000, "SMALL",
|
141
|
-
500_000: SizeLabel(500_000, "MEDIUM",
|
142
|
-
5_000_000: SizeLabel(5_000_000, "LARGE",
|
118
|
+
50_000: SizeLabel(50_000, "SMALL", 1),
|
119
|
+
500_000: SizeLabel(500_000, "MEDIUM", 1),
|
120
|
+
5_000_000: SizeLabel(5_000_000, "LARGE", 10),
|
143
121
|
}
|
144
122
|
|
145
123
|
|
@@ -155,6 +133,7 @@ class DatasetManager(BaseModel):
|
|
155
133
|
"""
|
156
134
|
data: BaseDataset
|
157
135
|
test_data: pd.DataFrame | None = None
|
136
|
+
gt_data: pd.DataFrame | None = None
|
158
137
|
train_files : list[str] = []
|
159
138
|
reader: DatasetReader | None = None
|
160
139
|
|
@@ -180,49 +159,48 @@ class DatasetManager(BaseModel):
|
|
180
159
|
def __iter__(self):
|
181
160
|
return DataSetIterator(self)
|
182
161
|
|
183
|
-
|
162
|
+
# TODO passing use_shuffle from outside
|
163
|
+
def prepare(self,
|
164
|
+
source: DatasetSource=DatasetSource.S3,
|
165
|
+
filters: int | float | str | None = None,
|
166
|
+
) -> bool:
|
184
167
|
"""Download the dataset from DatasetSource
|
185
168
|
url = f"{source}/{self.data.dir_name}"
|
186
169
|
|
187
|
-
download files from url to self.data_dir, there'll be 4 types of files in the data_dir
|
188
|
-
- train*.parquet: for training
|
189
|
-
- test.parquet: for testing
|
190
|
-
- neighbors.parquet: ground_truth of the test.parquet
|
191
|
-
- neighbors_head_1p.parquet: ground_truth of the test.parquet after filtering 1% data
|
192
|
-
- neighbors_99p.parquet: ground_truth of the test.parquet after filtering 99% data
|
193
|
-
|
194
170
|
Args:
|
195
171
|
source(DatasetSource): S3 or AliyunOSS, default as S3
|
196
|
-
|
172
|
+
filters(Optional[int | float | str]): combined with dataset's with_gt to
|
173
|
+
compose the correct ground_truth file
|
197
174
|
|
198
175
|
Returns:
|
199
176
|
bool: whether the dataset is successfully prepared
|
200
177
|
|
201
178
|
"""
|
179
|
+
file_count, use_shuffled = self.data.file_count, self.data.use_shuffled
|
180
|
+
|
181
|
+
train_files = utils.compose_train_files(file_count, use_shuffled)
|
182
|
+
all_files = train_files
|
183
|
+
|
184
|
+
gt_file, test_file = None, None
|
185
|
+
if self.data.with_gt:
|
186
|
+
gt_file, test_file = utils.compose_gt_file(filters), "test.parquet"
|
187
|
+
all_files.extend([gt_file, test_file])
|
188
|
+
|
202
189
|
source.reader().read(
|
203
190
|
dataset=self.data.dir_name.lower(),
|
204
|
-
files=
|
191
|
+
files=all_files,
|
205
192
|
local_ds_root=self.data_dir,
|
206
193
|
)
|
207
194
|
|
208
|
-
|
195
|
+
if gt_file is not None and test_file is not None:
|
196
|
+
self.test_data = self._read_file(test_file)
|
197
|
+
self.gt_data = self._read_file(gt_file)
|
198
|
+
|
199
|
+
prefix = "shuffle_train" if use_shuffled else "train"
|
209
200
|
self.train_files = sorted([f.name for f in self.data_dir.glob(f'{prefix}*.parquet')])
|
210
201
|
log.debug(f"{self.data.name}: available train files {self.train_files}")
|
211
|
-
self.test_data = self._read_file("test.parquet")
|
212
|
-
return True
|
213
202
|
|
214
|
-
|
215
|
-
|
216
|
-
file_name = ""
|
217
|
-
if filters is None:
|
218
|
-
file_name = "neighbors.parquet"
|
219
|
-
elif filters == 0.01:
|
220
|
-
file_name = "neighbors_head_1p.parquet"
|
221
|
-
elif filters == 0.99:
|
222
|
-
file_name = "neighbors_tail_1p.parquet"
|
223
|
-
else:
|
224
|
-
raise ValueError(f"Filters not supported: {filters}")
|
225
|
-
return self._read_file(file_name)
|
203
|
+
return True
|
226
204
|
|
227
205
|
def _read_file(self, file_name: str) -> pd.DataFrame:
|
228
206
|
"""read one file from disk into memory"""
|
@@ -40,7 +40,7 @@ class SerialInsertRunner:
|
|
40
40
|
emb_np = np.stack(data_df['emb'])
|
41
41
|
if self.normalize:
|
42
42
|
log.debug("normalize the 100k train data")
|
43
|
-
all_embeddings = emb_np / np.linalg.norm(emb_np, axis=1)[:, np.newaxis].tolist()
|
43
|
+
all_embeddings = (emb_np / np.linalg.norm(emb_np, axis=1)[:, np.newaxis]).tolist()
|
44
44
|
else:
|
45
45
|
all_embeddings = emb_np.tolist()
|
46
46
|
del(emb_np)
|
@@ -84,7 +84,7 @@ class CaseRunner(BaseModel):
|
|
84
84
|
def _pre_run(self, drop_old: bool = True):
|
85
85
|
try:
|
86
86
|
self.init_db(drop_old)
|
87
|
-
self.ca.dataset.prepare(self.dataset_source)
|
87
|
+
self.ca.dataset.prepare(self.dataset_source, filters=self.ca.filter_rate)
|
88
88
|
except ModuleNotFoundError as e:
|
89
89
|
log.warning(f"pre run case error: please install client for db: {self.config.db}, error={e}")
|
90
90
|
raise e from None
|
@@ -215,7 +215,7 @@ class CaseRunner(BaseModel):
|
|
215
215
|
test_emb = test_emb / np.linalg.norm(test_emb, axis=1)[:, np.newaxis]
|
216
216
|
self.test_emb = test_emb.tolist()
|
217
217
|
|
218
|
-
gt_df = self.ca.dataset.
|
218
|
+
gt_df = self.ca.dataset.gt_data
|
219
219
|
|
220
220
|
self.serial_search_runner = SerialSearchRunner(
|
221
221
|
db=self.db,
|
vectordb_bench/backend/utils.py
CHANGED
@@ -42,3 +42,33 @@ def time_it(func):
|
|
42
42
|
delta = time.perf_counter() - pref
|
43
43
|
return result, delta
|
44
44
|
return inner
|
45
|
+
|
46
|
+
|
47
|
+
def compose_train_files(train_count: int, use_shuffled: bool) -> list[str]:
|
48
|
+
prefix = "shuffle_train" if use_shuffled else "train"
|
49
|
+
middle = f"of-{train_count}"
|
50
|
+
surfix = "parquet"
|
51
|
+
|
52
|
+
train_files = []
|
53
|
+
if train_count > 1:
|
54
|
+
just_size = 2
|
55
|
+
for i in range(train_count):
|
56
|
+
sub_file = f"{prefix}-{str(i).rjust(just_size, '0')}-{middle}.{surfix}"
|
57
|
+
train_files.append(sub_file)
|
58
|
+
else:
|
59
|
+
train_files.append(f"{prefix}.{surfix}")
|
60
|
+
|
61
|
+
return train_files
|
62
|
+
|
63
|
+
|
64
|
+
def compose_gt_file(filters: int | float | str | None = None) -> str:
|
65
|
+
if filters is None:
|
66
|
+
return "neighbors.parquet"
|
67
|
+
|
68
|
+
if filters == 0.01:
|
69
|
+
return "neighbors_head_1p.parquet"
|
70
|
+
|
71
|
+
if filters == 0.99:
|
72
|
+
return "neighbors_tail_1p.parquet"
|
73
|
+
|
74
|
+
raise ValueError(f"Filters not supported: {filters}")
|
@@ -59,7 +59,7 @@ def caseConfigSetting(st, allCaseConfigs, case, activedDbList):
|
|
59
59
|
)
|
60
60
|
caseConfig = allCaseConfigs[db][case]
|
61
61
|
k = 0
|
62
|
-
for config in CASE_CONFIG_MAP.get(db, {}).get(case, []):
|
62
|
+
for config in CASE_CONFIG_MAP.get(db, {}).get(case.case_cls().label, []):
|
63
63
|
if config.isDisplayed(caseConfig):
|
64
64
|
column = columns[1 + k % CASE_CONFIG_SETTING_COLUMNS]
|
65
65
|
key = "%s-%s-%s" % (db, case, config.label.value)
|
@@ -1,7 +1,7 @@
|
|
1
1
|
from enum import IntEnum
|
2
2
|
import typing
|
3
3
|
from pydantic import BaseModel
|
4
|
-
from vectordb_bench.backend.cases import CaseType
|
4
|
+
from vectordb_bench.backend.cases import CaseLabel, CaseType
|
5
5
|
from vectordb_bench.backend.clients import DB
|
6
6
|
from vectordb_bench.backend.clients.api import IndexType
|
7
7
|
|
@@ -60,6 +60,7 @@ CaseConfigParamInput_IndexType = CaseConfigInput(
|
|
60
60
|
"options": [
|
61
61
|
IndexType.HNSW.value,
|
62
62
|
IndexType.IVFFlat.value,
|
63
|
+
IndexType.IVFSQ8.value,
|
63
64
|
IndexType.DISKANN.value,
|
64
65
|
IndexType.Flat.value,
|
65
66
|
IndexType.AUTOINDEX.value,
|
@@ -197,6 +198,7 @@ CaseConfigParamInput_Nlist = CaseConfigInput(
|
|
197
198
|
isDisplayed=lambda config: config.get(CaseConfigParamType.IndexType, None)
|
198
199
|
in [
|
199
200
|
IndexType.IVFFlat.value,
|
201
|
+
IndexType.IVFSQ8.value,
|
200
202
|
IndexType.GPU_IVF_FLAT.value,
|
201
203
|
IndexType.GPU_IVF_PQ.value,
|
202
204
|
],
|
@@ -213,6 +215,7 @@ CaseConfigParamInput_Nprobe = CaseConfigInput(
|
|
213
215
|
isDisplayed=lambda config: config.get(CaseConfigParamType.IndexType, None)
|
214
216
|
in [
|
215
217
|
IndexType.IVFFlat.value,
|
218
|
+
IndexType.IVFSQ8.value,
|
216
219
|
IndexType.GPU_IVF_FLAT.value,
|
217
220
|
IndexType.GPU_IVF_PQ.value,
|
218
221
|
],
|
@@ -394,6 +397,11 @@ CaseConfigParamInput_QuantizationType_PgVectoRS = CaseConfigInput(
|
|
394
397
|
inputConfig={
|
395
398
|
"options": ["trivial", "scalar", "product"],
|
396
399
|
},
|
400
|
+
isDisplayed=lambda config: config.get(CaseConfigParamType.IndexType, None)
|
401
|
+
in [
|
402
|
+
IndexType.HNSW.value,
|
403
|
+
IndexType.IVFFlat.value,
|
404
|
+
],
|
397
405
|
)
|
398
406
|
|
399
407
|
CaseConfigParamInput_QuantizationRatio_PgVectoRS = CaseConfigInput(
|
@@ -403,7 +411,21 @@ CaseConfigParamInput_QuantizationRatio_PgVectoRS = CaseConfigInput(
|
|
403
411
|
"options": ["x4", "x8", "x16", "x32", "x64"],
|
404
412
|
},
|
405
413
|
isDisplayed=lambda config: config.get(CaseConfigParamType.quantizationType, None)
|
406
|
-
== "product",
|
414
|
+
== "product" and config.get(CaseConfigParamType.IndexType, None)
|
415
|
+
in [
|
416
|
+
IndexType.HNSW.value,
|
417
|
+
IndexType.IVFFlat.value,
|
418
|
+
],
|
419
|
+
)
|
420
|
+
|
421
|
+
CaseConfigParamInput_ZillizLevel = CaseConfigInput(
|
422
|
+
label=CaseConfigParamType.level,
|
423
|
+
inputType=InputType.Number,
|
424
|
+
inputConfig={
|
425
|
+
"min": 1,
|
426
|
+
"max": 3,
|
427
|
+
"value": 1,
|
428
|
+
},
|
407
429
|
)
|
408
430
|
|
409
431
|
MilvusLoadConfig = [
|
@@ -479,90 +501,32 @@ PgVectoRSPerformanceConfig = [
|
|
479
501
|
CaseConfigParamInput_QuantizationRatio_PgVectoRS,
|
480
502
|
]
|
481
503
|
|
504
|
+
ZillizCloudPerformanceConfig = [
|
505
|
+
CaseConfigParamInput_ZillizLevel,
|
506
|
+
]
|
507
|
+
|
482
508
|
CASE_CONFIG_MAP = {
|
483
509
|
DB.Milvus: {
|
484
|
-
|
485
|
-
|
486
|
-
|
487
|
-
|
488
|
-
|
489
|
-
CaseType.Performance768D10M1P: MilvusPerformanceConfig,
|
490
|
-
CaseType.Performance768D1M1P: MilvusPerformanceConfig,
|
491
|
-
CaseType.Performance768D10M99P: MilvusPerformanceConfig,
|
492
|
-
CaseType.Performance768D1M99P: MilvusPerformanceConfig,
|
493
|
-
CaseType.Performance1536D5M: MilvusPerformanceConfig,
|
494
|
-
CaseType.Performance1536D500K: MilvusPerformanceConfig,
|
495
|
-
CaseType.Performance1536D5M1P: MilvusPerformanceConfig,
|
496
|
-
CaseType.Performance1536D500K1P: MilvusPerformanceConfig,
|
497
|
-
CaseType.Performance1536D5M99P: MilvusPerformanceConfig,
|
498
|
-
CaseType.Performance1536D500K99P: MilvusPerformanceConfig,
|
510
|
+
CaseLabel.Load: MilvusLoadConfig,
|
511
|
+
CaseLabel.Performance: MilvusPerformanceConfig,
|
512
|
+
},
|
513
|
+
DB.ZillizCloud: {
|
514
|
+
CaseLabel.Performance: ZillizCloudPerformanceConfig,
|
499
515
|
},
|
500
516
|
DB.WeaviateCloud: {
|
501
|
-
|
502
|
-
|
503
|
-
CaseType.Performance768D100M: WeaviatePerformanceConfig,
|
504
|
-
CaseType.Performance768D10M: WeaviatePerformanceConfig,
|
505
|
-
CaseType.Performance768D1M: WeaviatePerformanceConfig,
|
506
|
-
CaseType.Performance768D10M1P: WeaviatePerformanceConfig,
|
507
|
-
CaseType.Performance768D1M1P: WeaviatePerformanceConfig,
|
508
|
-
CaseType.Performance768D10M99P: WeaviatePerformanceConfig,
|
509
|
-
CaseType.Performance768D1M99P: WeaviatePerformanceConfig,
|
510
|
-
CaseType.Performance1536D5M: WeaviatePerformanceConfig,
|
511
|
-
CaseType.Performance1536D500K: WeaviatePerformanceConfig,
|
512
|
-
CaseType.Performance1536D5M1P: WeaviatePerformanceConfig,
|
513
|
-
CaseType.Performance1536D500K1P: WeaviatePerformanceConfig,
|
514
|
-
CaseType.Performance1536D5M99P: WeaviatePerformanceConfig,
|
515
|
-
CaseType.Performance1536D500K99P: WeaviatePerformanceConfig,
|
517
|
+
CaseLabel.Load: WeaviateLoadConfig,
|
518
|
+
CaseLabel.Performance: WeaviatePerformanceConfig,
|
516
519
|
},
|
517
520
|
DB.ElasticCloud: {
|
518
|
-
|
519
|
-
|
520
|
-
CaseType.Performance768D100M: ESPerformanceConfig,
|
521
|
-
CaseType.Performance768D10M: ESPerformanceConfig,
|
522
|
-
CaseType.Performance768D1M: ESPerformanceConfig,
|
523
|
-
CaseType.Performance768D10M1P: ESPerformanceConfig,
|
524
|
-
CaseType.Performance768D1M1P: ESPerformanceConfig,
|
525
|
-
CaseType.Performance768D10M99P: ESPerformanceConfig,
|
526
|
-
CaseType.Performance768D1M99P: ESPerformanceConfig,
|
527
|
-
CaseType.Performance1536D5M: ESPerformanceConfig,
|
528
|
-
CaseType.Performance1536D500K: ESPerformanceConfig,
|
529
|
-
CaseType.Performance1536D5M1P: ESPerformanceConfig,
|
530
|
-
CaseType.Performance1536D500K1P: ESPerformanceConfig,
|
531
|
-
CaseType.Performance1536D5M99P: ESPerformanceConfig,
|
532
|
-
CaseType.Performance1536D500K99P: ESPerformanceConfig,
|
521
|
+
CaseLabel.Load: ESLoadingConfig,
|
522
|
+
CaseLabel.Performance: ESPerformanceConfig,
|
533
523
|
},
|
534
524
|
DB.PgVector: {
|
535
|
-
|
536
|
-
|
537
|
-
CaseType.Performance768D100M: PgVectorPerformanceConfig,
|
538
|
-
CaseType.Performance768D10M: PgVectorPerformanceConfig,
|
539
|
-
CaseType.Performance768D1M: PgVectorPerformanceConfig,
|
540
|
-
CaseType.Performance768D10M1P: PgVectorPerformanceConfig,
|
541
|
-
CaseType.Performance768D1M1P: PgVectorPerformanceConfig,
|
542
|
-
CaseType.Performance768D10M99P: PgVectorPerformanceConfig,
|
543
|
-
CaseType.Performance768D1M99P: PgVectorPerformanceConfig,
|
544
|
-
CaseType.Performance1536D5M: PgVectorPerformanceConfig,
|
545
|
-
CaseType.Performance1536D500K: PgVectorPerformanceConfig,
|
546
|
-
CaseType.Performance1536D5M1P: PgVectorPerformanceConfig,
|
547
|
-
CaseType.Performance1536D500K1P: PgVectorPerformanceConfig,
|
548
|
-
CaseType.Performance1536D5M99P: PgVectorPerformanceConfig,
|
549
|
-
CaseType.Performance1536D500K99P: PgVectorPerformanceConfig,
|
525
|
+
CaseLabel.Load: PgVectorLoadingConfig,
|
526
|
+
CaseLabel.Performance: PgVectorPerformanceConfig,
|
550
527
|
},
|
551
528
|
DB.PgVectoRS: {
|
552
|
-
|
553
|
-
|
554
|
-
CaseType.Performance768D100M: PgVectoRSPerformanceConfig,
|
555
|
-
CaseType.Performance768D10M: PgVectoRSPerformanceConfig,
|
556
|
-
CaseType.Performance768D1M: PgVectoRSPerformanceConfig,
|
557
|
-
CaseType.Performance768D10M1P: PgVectoRSPerformanceConfig,
|
558
|
-
CaseType.Performance768D1M1P: PgVectoRSPerformanceConfig,
|
559
|
-
CaseType.Performance768D10M99P: PgVectoRSPerformanceConfig,
|
560
|
-
CaseType.Performance768D1M99P: PgVectoRSPerformanceConfig,
|
561
|
-
CaseType.Performance1536D5M: PgVectoRSPerformanceConfig,
|
562
|
-
CaseType.Performance1536D500K: PgVectoRSPerformanceConfig,
|
563
|
-
CaseType.Performance1536D5M1P: PgVectoRSPerformanceConfig,
|
564
|
-
CaseType.Performance1536D500K1P: PgVectoRSPerformanceConfig,
|
565
|
-
CaseType.Performance1536D5M99P: PgVectorPerformanceConfig,
|
566
|
-
CaseType.Performance1536D500K99P: PgVectoRSPerformanceConfig,
|
529
|
+
CaseLabel.Load: PgVectoRSLoadingConfig,
|
530
|
+
CaseLabel.Performance: PgVectoRSPerformanceConfig,
|
567
531
|
},
|
568
532
|
}
|
vectordb_bench/models.py
CHANGED
@@ -20,6 +20,7 @@
|
|
20
20
|
"db_name": "**********"
|
21
21
|
},
|
22
22
|
"db_case_config": {
|
23
|
+
"index": "IVF_FLAT",
|
23
24
|
"metric_type": "L2",
|
24
25
|
"lists": 10,
|
25
26
|
"probes": 2
|
@@ -49,6 +50,7 @@
|
|
49
50
|
"db_name": "**********"
|
50
51
|
},
|
51
52
|
"db_case_config": {
|
53
|
+
"index": "IVF_FLAT",
|
52
54
|
"metric_type": "L2",
|
53
55
|
"lists": 10,
|
54
56
|
"probes": 2
|
@@ -78,6 +80,7 @@
|
|
78
80
|
"db_name": "**********"
|
79
81
|
},
|
80
82
|
"db_case_config": {
|
83
|
+
"index": "IVF_FLAT",
|
81
84
|
"metric_type": "COSINE",
|
82
85
|
"lists": 10,
|
83
86
|
"probes": 2
|
@@ -107,6 +110,7 @@
|
|
107
110
|
"db_name": "**********"
|
108
111
|
},
|
109
112
|
"db_case_config": {
|
113
|
+
"index": "IVF_FLAT",
|
110
114
|
"metric_type": "COSINE",
|
111
115
|
"lists": 10,
|
112
116
|
"probes": 2
|
@@ -136,6 +140,7 @@
|
|
136
140
|
"db_name": "**********"
|
137
141
|
},
|
138
142
|
"db_case_config": {
|
143
|
+
"index": "IVF_FLAT",
|
139
144
|
"metric_type": "COSINE",
|
140
145
|
"lists": 10,
|
141
146
|
"probes": 2
|
@@ -165,6 +170,7 @@
|
|
165
170
|
"db_name": "**********"
|
166
171
|
},
|
167
172
|
"db_case_config": {
|
173
|
+
"index": "IVF_FLAT",
|
168
174
|
"metric_type": "COSINE",
|
169
175
|
"lists": 10,
|
170
176
|
"probes": 2
|
@@ -194,6 +200,7 @@
|
|
194
200
|
"db_name": "**********"
|
195
201
|
},
|
196
202
|
"db_case_config": {
|
203
|
+
"index": "IVF_FLAT",
|
197
204
|
"metric_type": "COSINE",
|
198
205
|
"lists": 10,
|
199
206
|
"probes": 2
|
@@ -223,6 +230,7 @@
|
|
223
230
|
"db_name": "**********"
|
224
231
|
},
|
225
232
|
"db_case_config": {
|
233
|
+
"index": "IVF_FLAT",
|
226
234
|
"metric_type": "COSINE",
|
227
235
|
"lists": 10,
|
228
236
|
"probes": 2
|
@@ -20,6 +20,7 @@
|
|
20
20
|
"db_name": "**********"
|
21
21
|
},
|
22
22
|
"db_case_config": {
|
23
|
+
"index": "IVF_FLAT",
|
23
24
|
"metric_type": "L2",
|
24
25
|
"lists": 10,
|
25
26
|
"probes": 2
|
@@ -51,7 +52,8 @@
|
|
51
52
|
"db_case_config": {
|
52
53
|
"metric_type": "L2",
|
53
54
|
"lists": 10,
|
54
|
-
"probes": 2
|
55
|
+
"probes": 2,
|
56
|
+
"index": "IVF_FLAT"
|
55
57
|
},
|
56
58
|
"case_config": {
|
57
59
|
"case_id": 11,
|
@@ -80,7 +82,8 @@
|
|
80
82
|
"db_case_config": {
|
81
83
|
"metric_type": "L2",
|
82
84
|
"lists": 10,
|
83
|
-
"probes": 2
|
85
|
+
"probes": 2,
|
86
|
+
"index": "IVF_FLAT"
|
84
87
|
},
|
85
88
|
"case_config": {
|
86
89
|
"case_id": 12,
|
@@ -107,6 +110,7 @@
|
|
107
110
|
"db_name": "**********"
|
108
111
|
},
|
109
112
|
"db_case_config": {
|
113
|
+
"index": "IVF_FLAT",
|
110
114
|
"metric_type": "L2",
|
111
115
|
"lists": 10,
|
112
116
|
"probes": 2
|
@@ -136,6 +140,7 @@
|
|
136
140
|
"db_name": "**********"
|
137
141
|
},
|
138
142
|
"db_case_config": {
|
143
|
+
"index": "IVF_FLAT",
|
139
144
|
"metric_type": "L2",
|
140
145
|
"lists": 10,
|
141
146
|
"probes": 2
|
@@ -165,6 +170,7 @@
|
|
165
170
|
"db_name": "**********"
|
166
171
|
},
|
167
172
|
"db_case_config": {
|
173
|
+
"index": "IVF_FLAT",
|
168
174
|
"metric_type": "L2",
|
169
175
|
"lists": 10,
|
170
176
|
"probes": 2
|
@@ -178,4 +184,4 @@
|
|
178
184
|
}
|
179
185
|
],
|
180
186
|
"file_fmt": "result_{}_{}_{}.json"
|
181
|
-
}
|
187
|
+
}
|