vectordb-bench 0.0.29__py3-none-any.whl → 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vectordb_bench/__init__.py +14 -27
- vectordb_bench/backend/assembler.py +19 -6
- vectordb_bench/backend/cases.py +186 -23
- vectordb_bench/backend/clients/__init__.py +32 -0
- vectordb_bench/backend/clients/api.py +22 -1
- vectordb_bench/backend/clients/aws_opensearch/aws_opensearch.py +249 -43
- vectordb_bench/backend/clients/aws_opensearch/cli.py +51 -21
- vectordb_bench/backend/clients/aws_opensearch/config.py +58 -16
- vectordb_bench/backend/clients/chroma/chroma.py +6 -2
- vectordb_bench/backend/clients/elastic_cloud/config.py +19 -1
- vectordb_bench/backend/clients/elastic_cloud/elastic_cloud.py +133 -45
- vectordb_bench/backend/clients/lancedb/cli.py +62 -8
- vectordb_bench/backend/clients/lancedb/config.py +14 -1
- vectordb_bench/backend/clients/lancedb/lancedb.py +21 -9
- vectordb_bench/backend/clients/memorydb/memorydb.py +2 -2
- vectordb_bench/backend/clients/milvus/cli.py +30 -9
- vectordb_bench/backend/clients/milvus/config.py +3 -0
- vectordb_bench/backend/clients/milvus/milvus.py +81 -23
- vectordb_bench/backend/clients/oceanbase/cli.py +100 -0
- vectordb_bench/backend/clients/oceanbase/config.py +125 -0
- vectordb_bench/backend/clients/oceanbase/oceanbase.py +215 -0
- vectordb_bench/backend/clients/pinecone/pinecone.py +39 -25
- vectordb_bench/backend/clients/qdrant_cloud/config.py +59 -3
- vectordb_bench/backend/clients/qdrant_cloud/qdrant_cloud.py +100 -33
- vectordb_bench/backend/clients/qdrant_local/cli.py +60 -0
- vectordb_bench/backend/clients/qdrant_local/config.py +47 -0
- vectordb_bench/backend/clients/qdrant_local/qdrant_local.py +232 -0
- vectordb_bench/backend/clients/weaviate_cloud/cli.py +29 -3
- vectordb_bench/backend/clients/weaviate_cloud/config.py +2 -0
- vectordb_bench/backend/clients/weaviate_cloud/weaviate_cloud.py +5 -0
- vectordb_bench/backend/dataset.py +143 -27
- vectordb_bench/backend/filter.py +76 -0
- vectordb_bench/backend/runner/__init__.py +3 -3
- vectordb_bench/backend/runner/mp_runner.py +52 -39
- vectordb_bench/backend/runner/rate_runner.py +68 -52
- vectordb_bench/backend/runner/read_write_runner.py +125 -68
- vectordb_bench/backend/runner/serial_runner.py +56 -23
- vectordb_bench/backend/task_runner.py +48 -20
- vectordb_bench/cli/batch_cli.py +121 -0
- vectordb_bench/cli/cli.py +59 -1
- vectordb_bench/cli/vectordbbench.py +7 -0
- vectordb_bench/config-files/batch_sample_config.yml +17 -0
- vectordb_bench/frontend/components/check_results/data.py +16 -11
- vectordb_bench/frontend/components/check_results/filters.py +53 -25
- vectordb_bench/frontend/components/check_results/headerIcon.py +16 -13
- vectordb_bench/frontend/components/check_results/nav.py +20 -0
- vectordb_bench/frontend/components/custom/displayCustomCase.py +43 -8
- vectordb_bench/frontend/components/custom/displaypPrams.py +10 -5
- vectordb_bench/frontend/components/custom/getCustomConfig.py +10 -0
- vectordb_bench/frontend/components/label_filter/charts.py +60 -0
- vectordb_bench/frontend/components/run_test/caseSelector.py +48 -52
- vectordb_bench/frontend/components/run_test/dbSelector.py +9 -5
- vectordb_bench/frontend/components/run_test/inputWidget.py +48 -0
- vectordb_bench/frontend/components/run_test/submitTask.py +3 -1
- vectordb_bench/frontend/components/streaming/charts.py +253 -0
- vectordb_bench/frontend/components/streaming/data.py +62 -0
- vectordb_bench/frontend/components/tables/data.py +1 -1
- vectordb_bench/frontend/components/welcome/explainPrams.py +66 -0
- vectordb_bench/frontend/components/welcome/pagestyle.py +106 -0
- vectordb_bench/frontend/components/welcome/welcomePrams.py +147 -0
- vectordb_bench/frontend/config/dbCaseConfigs.py +420 -41
- vectordb_bench/frontend/config/styles.py +32 -2
- vectordb_bench/frontend/pages/concurrent.py +5 -1
- vectordb_bench/frontend/pages/custom.py +4 -0
- vectordb_bench/frontend/pages/label_filter.py +56 -0
- vectordb_bench/frontend/pages/quries_per_dollar.py +5 -1
- vectordb_bench/frontend/pages/results.py +60 -0
- vectordb_bench/frontend/pages/run_test.py +3 -3
- vectordb_bench/frontend/pages/streaming.py +135 -0
- vectordb_bench/frontend/pages/tables.py +4 -0
- vectordb_bench/frontend/vdb_benchmark.py +16 -41
- vectordb_bench/interface.py +6 -2
- vectordb_bench/metric.py +15 -1
- vectordb_bench/models.py +38 -11
- vectordb_bench/results/ElasticCloud/result_20250318_standard_elasticcloud.json +5890 -0
- vectordb_bench/results/Milvus/result_20250509_standard_milvus.json +6138 -0
- vectordb_bench/results/OpenSearch/result_20250224_standard_opensearch.json +7319 -0
- vectordb_bench/results/Pinecone/result_20250124_standard_pinecone.json +2365 -0
- vectordb_bench/results/QdrantCloud/result_20250602_standard_qdrantcloud.json +3556 -0
- vectordb_bench/results/ZillizCloud/result_20250613_standard_zillizcloud.json +6290 -0
- vectordb_bench/results/dbPrices.json +12 -4
- {vectordb_bench-0.0.29.dist-info → vectordb_bench-1.0.0.dist-info}/METADATA +131 -32
- {vectordb_bench-0.0.29.dist-info → vectordb_bench-1.0.0.dist-info}/RECORD +87 -65
- {vectordb_bench-0.0.29.dist-info → vectordb_bench-1.0.0.dist-info}/WHEEL +1 -1
- vectordb_bench/results/ZillizCloud/result_20230727_standard_zillizcloud.json +0 -791
- vectordb_bench/results/ZillizCloud/result_20230808_standard_zillizcloud.json +0 -679
- vectordb_bench/results/ZillizCloud/result_20240105_standard_202401_zillizcloud.json +0 -1352
- {vectordb_bench-0.0.29.dist-info → vectordb_bench-1.0.0.dist-info}/entry_points.txt +0 -0
- {vectordb_bench-0.0.29.dist-info → vectordb_bench-1.0.0.dist-info}/licenses/LICENSE +0 -0
- {vectordb_bench-0.0.29.dist-info → vectordb_bench-1.0.0.dist-info}/top_level.txt +0 -0
@@ -4,6 +4,7 @@ from pydantic import BaseModel
|
|
4
4
|
from vectordb_bench.backend.cases import CaseLabel, CaseType
|
5
5
|
from vectordb_bench.backend.clients import DB
|
6
6
|
from vectordb_bench.backend.clients.api import IndexType, MetricType, SQType
|
7
|
+
from vectordb_bench.backend.dataset import DatasetWithSizeType
|
7
8
|
from vectordb_bench.frontend.components.custom.getCustomConfig import get_custom_configs
|
8
9
|
|
9
10
|
from vectordb_bench.models import CaseConfig, CaseConfigParamType
|
@@ -23,32 +24,58 @@ class BatchCaseConfig(BaseModel):
|
|
23
24
|
cases: list[CaseConfig] = []
|
24
25
|
|
25
26
|
|
27
|
+
class InputType(IntEnum):
|
28
|
+
Text = 20001
|
29
|
+
Number = 20002
|
30
|
+
Option = 20003
|
31
|
+
Float = 20004
|
32
|
+
Bool = 20005
|
33
|
+
|
34
|
+
|
35
|
+
class ConfigInput(BaseModel):
|
36
|
+
label: CaseConfigParamType
|
37
|
+
inputType: InputType = InputType.Text
|
38
|
+
inputConfig: dict = {}
|
39
|
+
inputHelp: str = ""
|
40
|
+
displayLabel: str = ""
|
41
|
+
|
42
|
+
|
43
|
+
class CaseConfigInput(ConfigInput):
|
44
|
+
# todo type should be a function
|
45
|
+
isDisplayed: typing.Any = lambda config: True
|
46
|
+
|
47
|
+
|
26
48
|
class UICaseItem(BaseModel):
|
27
49
|
isLine: bool = False
|
50
|
+
key: str = ""
|
28
51
|
label: str = ""
|
29
52
|
description: str = ""
|
30
53
|
cases: list[CaseConfig] = []
|
31
54
|
caseLabel: CaseLabel = CaseLabel.Performance
|
55
|
+
extra_custom_case_config_inputs: list[ConfigInput] = []
|
56
|
+
tmp_custom_config: dict = dict()
|
32
57
|
|
33
58
|
def __init__(
|
34
59
|
self,
|
35
60
|
isLine: bool = False,
|
36
|
-
|
37
|
-
custom_case: dict | None = None,
|
38
|
-
cases: list[CaseConfig] | None = None,
|
61
|
+
cases: list[CaseConfig] = None,
|
39
62
|
label: str = "",
|
40
63
|
description: str = "",
|
41
64
|
caseLabel: CaseLabel = CaseLabel.Performance,
|
65
|
+
**kwargs,
|
42
66
|
):
|
43
67
|
if isLine is True:
|
44
|
-
super().__init__(isLine=True)
|
45
|
-
|
46
|
-
|
68
|
+
super().__init__(isLine=True, **kwargs)
|
69
|
+
if cases is None:
|
70
|
+
cases = []
|
71
|
+
elif len(cases) == 1:
|
72
|
+
c = cases[0].case
|
47
73
|
super().__init__(
|
48
|
-
label=c.name,
|
49
|
-
description=c.description,
|
50
|
-
cases=
|
51
|
-
caseLabel=
|
74
|
+
label=label if label else c.name,
|
75
|
+
description=description if description else c.description,
|
76
|
+
cases=cases,
|
77
|
+
caseLabel=caseLabel,
|
78
|
+
**kwargs,
|
52
79
|
)
|
53
80
|
else:
|
54
81
|
super().__init__(
|
@@ -56,10 +83,26 @@ class UICaseItem(BaseModel):
|
|
56
83
|
description=description,
|
57
84
|
cases=cases,
|
58
85
|
caseLabel=caseLabel,
|
86
|
+
**kwargs,
|
59
87
|
)
|
60
88
|
|
61
89
|
def __hash__(self) -> int:
|
62
|
-
return hash(self.
|
90
|
+
return hash(self.key if self.key else self.label)
|
91
|
+
|
92
|
+
def get_cases(self) -> list[CaseConfig]:
|
93
|
+
# return self.cases
|
94
|
+
if len(self.extra_custom_case_config_inputs) == 0:
|
95
|
+
return self.cases
|
96
|
+
cases = [
|
97
|
+
CaseConfig(
|
98
|
+
case_id=c.case_id,
|
99
|
+
k=c.k,
|
100
|
+
concurrency_search_config=c.concurrency_search_config,
|
101
|
+
custom_case={**c.custom_case, **self.tmp_custom_config},
|
102
|
+
)
|
103
|
+
for c in self.cases
|
104
|
+
]
|
105
|
+
return cases
|
63
106
|
|
64
107
|
|
65
108
|
class UICaseItemCluster(BaseModel):
|
@@ -70,47 +113,181 @@ class UICaseItemCluster(BaseModel):
|
|
70
113
|
def get_custom_case_items() -> list[UICaseItem]:
|
71
114
|
custom_configs = get_custom_configs()
|
72
115
|
return [
|
73
|
-
UICaseItem(
|
116
|
+
UICaseItem(
|
117
|
+
label=f"{custom_config.dataset_config.name} - None Filter",
|
118
|
+
cases=[
|
119
|
+
CaseConfig(
|
120
|
+
case_id=CaseType.PerformanceCustomDataset,
|
121
|
+
custom_case={
|
122
|
+
**custom_config.dict(),
|
123
|
+
"use_filter": False,
|
124
|
+
},
|
125
|
+
)
|
126
|
+
],
|
127
|
+
)
|
128
|
+
for custom_config in custom_configs
|
129
|
+
] + [
|
130
|
+
UICaseItem(
|
131
|
+
label=f"{custom_config.dataset_config.name} - Filter",
|
132
|
+
description=(
|
133
|
+
f'[Batch Cases] This case evaluate search performance under filtering constraints like "color==red."'
|
134
|
+
f"Vdbbench provides an additional column of randomly distributed labels with fixed proportions, "
|
135
|
+
f"such as [0.001, 0.002, 0.005, 0.01, 0.02, 0.05, 0.1, 0.2, 0.5]. "
|
136
|
+
f"Essentially, vdbbench will test each filter label in your own dataset to"
|
137
|
+
" assess the vector database's search performance across different filtering conditions."
|
138
|
+
),
|
139
|
+
cases=[
|
140
|
+
CaseConfig(
|
141
|
+
case_id=CaseType.PerformanceCustomDataset,
|
142
|
+
custom_case={
|
143
|
+
**custom_config.dict(),
|
144
|
+
"use_filter": True,
|
145
|
+
"label_percentage": label_percentage,
|
146
|
+
},
|
147
|
+
)
|
148
|
+
for label_percentage in custom_config.dataset_config.label_percentages
|
149
|
+
],
|
150
|
+
)
|
74
151
|
for custom_config in custom_configs
|
152
|
+
if custom_config.dataset_config.label_percentages
|
75
153
|
]
|
76
154
|
|
77
155
|
|
156
|
+
def generate_normal_cases(case_id: CaseType, custom_case: dict | None = None) -> list[CaseConfig]:
|
157
|
+
return [CaseConfig(case_id=case_id, custom_case=custom_case)]
|
158
|
+
|
159
|
+
|
78
160
|
def get_custom_case_cluter() -> UICaseItemCluster:
|
79
161
|
return UICaseItemCluster(label="Custom Search Performance Test", uiCaseItems=get_custom_case_items())
|
80
162
|
|
81
163
|
|
164
|
+
def generate_custom_streaming_case() -> CaseConfig:
|
165
|
+
return CaseConfig(
|
166
|
+
case_id=CaseType.StreamingPerformanceCase,
|
167
|
+
custom_case=dict(),
|
168
|
+
)
|
169
|
+
|
170
|
+
|
171
|
+
custom_streaming_config: list[ConfigInput] = [
|
172
|
+
ConfigInput(
|
173
|
+
label=CaseConfigParamType.dataset_with_size_type,
|
174
|
+
displayLabel="dataset",
|
175
|
+
inputType=InputType.Option,
|
176
|
+
inputConfig=dict(options=[dataset.value for dataset in DatasetWithSizeType]),
|
177
|
+
),
|
178
|
+
ConfigInput(
|
179
|
+
label=CaseConfigParamType.insert_rate,
|
180
|
+
inputType=InputType.Number,
|
181
|
+
inputConfig=dict(step=100, min=100, max=4_000, value=200),
|
182
|
+
inputHelp="fixed insertion rate (rows/s), must be divisible by 100",
|
183
|
+
),
|
184
|
+
ConfigInput(
|
185
|
+
label=CaseConfigParamType.search_stages,
|
186
|
+
inputType=InputType.Text,
|
187
|
+
inputConfig=dict(value="[0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]"),
|
188
|
+
inputHelp="0<=stage<1.0; do search test when inserting a specified amount of data.",
|
189
|
+
),
|
190
|
+
ConfigInput(
|
191
|
+
label=CaseConfigParamType.concurrencies,
|
192
|
+
inputType=InputType.Text,
|
193
|
+
inputConfig=dict(value="[5, 10, 20]"),
|
194
|
+
inputHelp="concurrent num of search test while insertion; record max-qps.",
|
195
|
+
),
|
196
|
+
ConfigInput(
|
197
|
+
label=CaseConfigParamType.optimize_after_write,
|
198
|
+
inputType=InputType.Option,
|
199
|
+
inputConfig=dict(options=[True, False]),
|
200
|
+
inputHelp="whether to optimize after inserting all data",
|
201
|
+
),
|
202
|
+
ConfigInput(
|
203
|
+
label=CaseConfigParamType.read_dur_after_write,
|
204
|
+
inputType=InputType.Number,
|
205
|
+
inputConfig=dict(step=10, min=30, max=360_000, value=30),
|
206
|
+
inputHelp="search test duration after inserting all data",
|
207
|
+
),
|
208
|
+
]
|
209
|
+
|
210
|
+
|
211
|
+
def generate_label_filter_cases(dataset_with_size_type: DatasetWithSizeType) -> list[CaseConfig]:
|
212
|
+
label_percentages = dataset_with_size_type.get_manager().data.scalar_label_percentages
|
213
|
+
return [
|
214
|
+
CaseConfig(
|
215
|
+
case_id=CaseType.LabelFilterPerformanceCase,
|
216
|
+
custom_case=dict(dataset_with_size_type=dataset_with_size_type, label_percentage=label_percentage),
|
217
|
+
)
|
218
|
+
for label_percentage in label_percentages
|
219
|
+
]
|
220
|
+
|
221
|
+
|
82
222
|
UI_CASE_CLUSTERS: list[UICaseItemCluster] = [
|
83
223
|
UICaseItemCluster(
|
84
224
|
label="Search Performance Test",
|
85
225
|
uiCaseItems=[
|
86
|
-
UICaseItem(
|
87
|
-
UICaseItem(
|
88
|
-
UICaseItem(
|
226
|
+
UICaseItem(cases=generate_normal_cases(CaseType.Performance768D100M)),
|
227
|
+
UICaseItem(cases=generate_normal_cases(CaseType.Performance768D10M)),
|
228
|
+
UICaseItem(cases=generate_normal_cases(CaseType.Performance768D1M)),
|
89
229
|
UICaseItem(isLine=True),
|
90
|
-
UICaseItem(
|
91
|
-
UICaseItem(
|
92
|
-
UICaseItem(
|
230
|
+
UICaseItem(cases=generate_normal_cases(CaseType.Performance1024D1M)),
|
231
|
+
UICaseItem(cases=generate_normal_cases(CaseType.Performance1024D10M)),
|
232
|
+
UICaseItem(isLine=True),
|
233
|
+
UICaseItem(cases=generate_normal_cases(CaseType.Performance1536D5M)),
|
234
|
+
UICaseItem(cases=generate_normal_cases(CaseType.Performance1536D500K)),
|
235
|
+
UICaseItem(cases=generate_normal_cases(CaseType.Performance1536D50K)),
|
93
236
|
],
|
94
237
|
),
|
95
238
|
UICaseItemCluster(
|
96
|
-
label="Filter Search Performance Test",
|
239
|
+
label="Int-Filter Search Performance Test",
|
97
240
|
uiCaseItems=[
|
98
|
-
UICaseItem(
|
99
|
-
UICaseItem(
|
100
|
-
UICaseItem(
|
101
|
-
UICaseItem(
|
241
|
+
UICaseItem(cases=generate_normal_cases(CaseType.Performance768D10M1P)),
|
242
|
+
UICaseItem(cases=generate_normal_cases(CaseType.Performance768D10M99P)),
|
243
|
+
UICaseItem(cases=generate_normal_cases(CaseType.Performance768D1M1P)),
|
244
|
+
UICaseItem(cases=generate_normal_cases(CaseType.Performance768D1M99P)),
|
102
245
|
UICaseItem(isLine=True),
|
103
|
-
UICaseItem(
|
104
|
-
UICaseItem(
|
105
|
-
UICaseItem(
|
106
|
-
UICaseItem(
|
246
|
+
UICaseItem(cases=generate_normal_cases(CaseType.Performance1536D5M1P)),
|
247
|
+
UICaseItem(cases=generate_normal_cases(CaseType.Performance1536D5M99P)),
|
248
|
+
UICaseItem(cases=generate_normal_cases(CaseType.Performance1536D500K1P)),
|
249
|
+
UICaseItem(cases=generate_normal_cases(CaseType.Performance1536D500K99P)),
|
250
|
+
],
|
251
|
+
),
|
252
|
+
UICaseItemCluster(
|
253
|
+
label="Label-Filter Search Performance Test",
|
254
|
+
uiCaseItems=[
|
255
|
+
UICaseItem(
|
256
|
+
label=f"Label-Filter Search Performance Test - {dataset_with_size_type.value}",
|
257
|
+
description=(
|
258
|
+
f'[Batch Cases] These cases evaluate search performance under filtering constraints like "color==red." '
|
259
|
+
"Vdbbench provides an additional column of randomly distributed labels with fixed proportions, "
|
260
|
+
f"such as {dataset_with_size_type.get_manager().data.scalar_label_percentages}. "
|
261
|
+
f"Essentially, vdbbench will test each filter label in {dataset_with_size_type.value} to "
|
262
|
+
"assess the vector database's search performance across different filtering conditions. "
|
263
|
+
),
|
264
|
+
cases=generate_label_filter_cases(dataset_with_size_type),
|
265
|
+
)
|
266
|
+
for dataset_with_size_type in DatasetWithSizeType
|
107
267
|
],
|
108
268
|
),
|
109
269
|
UICaseItemCluster(
|
110
270
|
label="Capacity Test",
|
111
271
|
uiCaseItems=[
|
112
|
-
UICaseItem(
|
113
|
-
UICaseItem(
|
272
|
+
UICaseItem(cases=generate_normal_cases(CaseType.CapacityDim960)),
|
273
|
+
UICaseItem(cases=generate_normal_cases(CaseType.CapacityDim128)),
|
274
|
+
],
|
275
|
+
),
|
276
|
+
UICaseItemCluster(
|
277
|
+
label="Streaming Test",
|
278
|
+
uiCaseItems=[
|
279
|
+
UICaseItem(
|
280
|
+
label="Customize Streaming Test",
|
281
|
+
description=(
|
282
|
+
"This case test the search performance during insertion. "
|
283
|
+
"VDBB will send insert requests to VectorDB at a fixed rate and "
|
284
|
+
"conduct a search test once the insert count reaches the search_stages. "
|
285
|
+
"After all data is inserted, optimization and search tests can be "
|
286
|
+
"optionally performed."
|
287
|
+
),
|
288
|
+
cases=[generate_custom_streaming_case()],
|
289
|
+
extra_custom_case_config_inputs=custom_streaming_config,
|
290
|
+
)
|
114
291
|
],
|
115
292
|
),
|
116
293
|
]
|
@@ -123,14 +300,8 @@ DISPLAY_CASE_ORDER: list[CaseType] = [
|
|
123
300
|
CaseType.Performance1536D5M,
|
124
301
|
CaseType.Performance1536D500K,
|
125
302
|
CaseType.Performance1536D50K,
|
126
|
-
CaseType.
|
127
|
-
CaseType.
|
128
|
-
CaseType.Performance1536D5M1P,
|
129
|
-
CaseType.Performance1536D500K1P,
|
130
|
-
CaseType.Performance768D10M99P,
|
131
|
-
CaseType.Performance768D1M99P,
|
132
|
-
CaseType.Performance1536D5M99P,
|
133
|
-
CaseType.Performance1536D500K99P,
|
303
|
+
CaseType.Performance1024D1M,
|
304
|
+
CaseType.Performance1024D10M,
|
134
305
|
CaseType.CapacityDim960,
|
135
306
|
CaseType.CapacityDim128,
|
136
307
|
]
|
@@ -146,6 +317,7 @@ class InputType(IntEnum):
|
|
146
317
|
Option = 20003
|
147
318
|
Float = 20004
|
148
319
|
Bool = 20005
|
320
|
+
Select = 20006
|
149
321
|
|
150
322
|
|
151
323
|
class CaseConfigInput(BaseModel):
|
@@ -454,7 +626,7 @@ CaseConfigParamInput_EFConstruction_ES = CaseConfigInput(
|
|
454
626
|
inputConfig={
|
455
627
|
"min": 8,
|
456
628
|
"max": 512,
|
457
|
-
"value":
|
629
|
+
"value": 128,
|
458
630
|
},
|
459
631
|
)
|
460
632
|
|
@@ -482,7 +654,7 @@ CaseConfigParamInput_EF_SEARCH_AWSOpensearch = CaseConfigInput(
|
|
482
654
|
label=CaseConfigParamType.ef_search,
|
483
655
|
inputType=InputType.Number,
|
484
656
|
inputConfig={
|
485
|
-
"min":
|
657
|
+
"min": 1,
|
486
658
|
"max": 1024,
|
487
659
|
"value": 256,
|
488
660
|
},
|
@@ -556,6 +728,67 @@ CaseConfigParamInput_EFConstruction_PgVector = CaseConfigInput(
|
|
556
728
|
isDisplayed=lambda config: config[CaseConfigParamType.IndexType] == IndexType.HNSW.value,
|
557
729
|
)
|
558
730
|
|
731
|
+
CaseConfigParamInput_IndexType_ES = CaseConfigInput(
|
732
|
+
label=CaseConfigParamType.IndexType,
|
733
|
+
inputType=InputType.Option,
|
734
|
+
inputConfig={
|
735
|
+
"options": [
|
736
|
+
IndexType.ES_HNSW.value,
|
737
|
+
IndexType.ES_HNSW_INT8.value,
|
738
|
+
IndexType.ES_HNSW_INT4.value,
|
739
|
+
IndexType.ES_HNSW_BBQ.value,
|
740
|
+
],
|
741
|
+
},
|
742
|
+
)
|
743
|
+
|
744
|
+
CaseConfigParamInput_NumShards_ES = CaseConfigInput(
|
745
|
+
label=CaseConfigParamType.number_of_shards,
|
746
|
+
inputType=InputType.Number,
|
747
|
+
inputConfig={
|
748
|
+
"min": 1,
|
749
|
+
"max": 128,
|
750
|
+
"value": 1,
|
751
|
+
},
|
752
|
+
)
|
753
|
+
|
754
|
+
CaseConfigParamInput_NumReplica_ES = CaseConfigInput(
|
755
|
+
label=CaseConfigParamType.number_of_replicas,
|
756
|
+
inputType=InputType.Number,
|
757
|
+
inputConfig={
|
758
|
+
"min": 0,
|
759
|
+
"max": 10,
|
760
|
+
"value": 0,
|
761
|
+
},
|
762
|
+
)
|
763
|
+
|
764
|
+
CaseConfigParamInput_RefreshInterval_ES = CaseConfigInput(
|
765
|
+
label=CaseConfigParamType.refresh_interval,
|
766
|
+
inputType=InputType.Text,
|
767
|
+
inputConfig={"value": "30s"},
|
768
|
+
)
|
769
|
+
|
770
|
+
CaseConfigParamInput_UseRescore_ES = CaseConfigInput(
|
771
|
+
label=CaseConfigParamType.use_rescore,
|
772
|
+
inputType=InputType.Bool,
|
773
|
+
inputConfig={"value": False},
|
774
|
+
isDisplayed=lambda config: config.get(CaseConfigParamType.IndexType, None) != IndexType.ES_HNSW.value,
|
775
|
+
)
|
776
|
+
|
777
|
+
CaseConfigParamInput_OversampleRatio_ES = CaseConfigInput(
|
778
|
+
label=CaseConfigParamType.oversample_ratio,
|
779
|
+
inputType=InputType.Float,
|
780
|
+
inputConfig={"min": 1.0, "max": 100.0, "value": 2.0},
|
781
|
+
isDisplayed=lambda config: config.get(CaseConfigParamType.use_rescore, False),
|
782
|
+
inputHelp="num_oversample = oversample_ratio * top_k.",
|
783
|
+
)
|
784
|
+
|
785
|
+
CaseConfigParamInput_UseRouting_ES = CaseConfigInput(
|
786
|
+
label=CaseConfigParamType.use_routing,
|
787
|
+
inputType=InputType.Bool,
|
788
|
+
inputConfig={"value": False},
|
789
|
+
inputHelp="Using routing to improve label-filter case performance",
|
790
|
+
)
|
791
|
+
|
559
792
|
|
560
793
|
CaseConfigParamInput_M_ES = CaseConfigInput(
|
561
794
|
label=CaseConfigParamType.M,
|
@@ -563,10 +796,11 @@ CaseConfigParamInput_M_ES = CaseConfigInput(
|
|
563
796
|
inputConfig={
|
564
797
|
"min": 4,
|
565
798
|
"max": 64,
|
566
|
-
"value":
|
799
|
+
"value": 16,
|
567
800
|
},
|
568
801
|
)
|
569
802
|
|
803
|
+
|
570
804
|
CaseConfigParamInput_NumCandidates_ES = CaseConfigInput(
|
571
805
|
label=CaseConfigParamType.numCandidates,
|
572
806
|
inputType=InputType.Number,
|
@@ -1203,6 +1437,13 @@ CaseConfigParamInput_CacheSize_MariaDB = CaseConfigInput(
|
|
1203
1437
|
},
|
1204
1438
|
isDisplayed=lambda config: config.get(CaseConfigParamType.IndexType, None) == IndexType.HNSW.value,
|
1205
1439
|
)
|
1440
|
+
CaseConfigParamInput_Milvus_use_partition_key = CaseConfigInput(
|
1441
|
+
label=CaseConfigParamType.use_partition_key,
|
1442
|
+
inputType=InputType.Option,
|
1443
|
+
inputHelp="whether to use partition_key for label-filter cases. only works in label-filter cases",
|
1444
|
+
inputConfig={"options": [True, False]},
|
1445
|
+
)
|
1446
|
+
|
1206
1447
|
|
1207
1448
|
CaseConfigParamInput_MongoDBQuantizationType = CaseConfigInput(
|
1208
1449
|
label=CaseConfigParamType.mongodb_quantization_type,
|
@@ -1264,6 +1505,87 @@ CaseConfigParamInput_EFConstruction_Vespa = CaseConfigInput(
|
|
1264
1505
|
isDisplayed=lambda config: config[CaseConfigParamType.IndexType] == IndexType.HNSW.value,
|
1265
1506
|
)
|
1266
1507
|
|
1508
|
+
CaseConfigParamInput_INDEX_THREAD_QTY_DURING_FORCE_MERGE_AWSOpensearch = CaseConfigInput(
|
1509
|
+
label=CaseConfigParamType.index_thread_qty_during_force_merge,
|
1510
|
+
displayLabel="Index Thread Qty During Force Merge",
|
1511
|
+
inputHelp="Thread count during force merge operations",
|
1512
|
+
inputType=InputType.Number,
|
1513
|
+
inputConfig={
|
1514
|
+
"min": 1,
|
1515
|
+
"max": 32,
|
1516
|
+
"value": 4,
|
1517
|
+
},
|
1518
|
+
)
|
1519
|
+
|
1520
|
+
CaseConfigParamInput_NUMBER_OF_INDEXING_CLIENTS_AWSOpensearch = CaseConfigInput(
|
1521
|
+
label=CaseConfigParamType.number_of_indexing_clients,
|
1522
|
+
displayLabel="Number of Indexing Clients",
|
1523
|
+
inputHelp="Number of concurrent clients for data insertion",
|
1524
|
+
inputType=InputType.Number,
|
1525
|
+
inputConfig={
|
1526
|
+
"min": 1,
|
1527
|
+
"max": 32,
|
1528
|
+
"value": 1,
|
1529
|
+
},
|
1530
|
+
)
|
1531
|
+
|
1532
|
+
CaseConfigParamInput_NUMBER_OF_SHARDS_AWSOpensearch = CaseConfigInput(
|
1533
|
+
label=CaseConfigParamType.number_of_shards,
|
1534
|
+
displayLabel="Number of Shards",
|
1535
|
+
inputHelp="Number of primary shards for the index",
|
1536
|
+
inputType=InputType.Number,
|
1537
|
+
inputConfig={
|
1538
|
+
"min": 1,
|
1539
|
+
"max": 32,
|
1540
|
+
"value": 1,
|
1541
|
+
},
|
1542
|
+
)
|
1543
|
+
|
1544
|
+
CaseConfigParamInput_NUMBER_OF_REPLICAS_AWSOpensearch = CaseConfigInput(
|
1545
|
+
label=CaseConfigParamType.number_of_replicas,
|
1546
|
+
displayLabel="Number of Replicas",
|
1547
|
+
inputHelp="Number of replica copies for each primary shard",
|
1548
|
+
inputType=InputType.Number,
|
1549
|
+
inputConfig={
|
1550
|
+
"min": 0,
|
1551
|
+
"max": 10,
|
1552
|
+
"value": 1,
|
1553
|
+
},
|
1554
|
+
)
|
1555
|
+
|
1556
|
+
CaseConfigParamInput_INDEX_THREAD_QTY_AWSOpensearch = CaseConfigInput(
|
1557
|
+
label=CaseConfigParamType.index_thread_qty,
|
1558
|
+
displayLabel="Index Thread Qty",
|
1559
|
+
inputHelp="Thread count for native engine indexing",
|
1560
|
+
inputType=InputType.Number,
|
1561
|
+
inputConfig={
|
1562
|
+
"min": 1,
|
1563
|
+
"max": 32,
|
1564
|
+
"value": 4,
|
1565
|
+
},
|
1566
|
+
)
|
1567
|
+
|
1568
|
+
CaseConfigParamInput_ENGINE_NAME_AWSOpensearch = CaseConfigInput(
|
1569
|
+
label=CaseConfigParamType.engine_name,
|
1570
|
+
displayLabel="Engine",
|
1571
|
+
inputHelp="HNSW algorithm implementation to use",
|
1572
|
+
inputType=InputType.Option,
|
1573
|
+
inputConfig={
|
1574
|
+
"options": ["faiss", "nmslib", "lucene"],
|
1575
|
+
"default": "faiss",
|
1576
|
+
},
|
1577
|
+
)
|
1578
|
+
|
1579
|
+
CaseConfigParamInput_METRIC_TYPE_NAME_AWSOpensearch = CaseConfigInput(
|
1580
|
+
label=CaseConfigParamType.metric_type_name,
|
1581
|
+
displayLabel="Metric Type",
|
1582
|
+
inputHelp="Distance metric type for vector similarity",
|
1583
|
+
inputType=InputType.Option,
|
1584
|
+
inputConfig={
|
1585
|
+
"options": ["l2", "cosine", "ip"],
|
1586
|
+
"default": "l2",
|
1587
|
+
},
|
1588
|
+
)
|
1267
1589
|
|
1268
1590
|
MilvusLoadConfig = [
|
1269
1591
|
CaseConfigParamInput_IndexType,
|
@@ -1280,6 +1602,7 @@ MilvusLoadConfig = [
|
|
1280
1602
|
CaseConfigParamInput_Refine,
|
1281
1603
|
CaseConfigParamInput_RefineType,
|
1282
1604
|
CaseConfigParamInput_NRQ,
|
1605
|
+
CaseConfigParamInput_Milvus_use_partition_key,
|
1283
1606
|
]
|
1284
1607
|
MilvusPerformanceConfig = [
|
1285
1608
|
CaseConfigParamInput_IndexType,
|
@@ -1307,6 +1630,7 @@ MilvusPerformanceConfig = [
|
|
1307
1630
|
CaseConfigParamInput_Refine,
|
1308
1631
|
CaseConfigParamInput_RefineType,
|
1309
1632
|
CaseConfigParamInput_RefineK,
|
1633
|
+
CaseConfigParamInput_Milvus_use_partition_key,
|
1310
1634
|
]
|
1311
1635
|
|
1312
1636
|
WeaviateLoadConfig = [
|
@@ -1319,11 +1643,25 @@ WeaviatePerformanceConfig = [
|
|
1319
1643
|
CaseConfigParamInput_EF_Weaviate,
|
1320
1644
|
]
|
1321
1645
|
|
1322
|
-
ESLoadingConfig = [
|
1646
|
+
ESLoadingConfig = [
|
1647
|
+
CaseConfigParamInput_IndexType_ES,
|
1648
|
+
CaseConfigParamInput_NumShards_ES,
|
1649
|
+
CaseConfigParamInput_NumReplica_ES,
|
1650
|
+
CaseConfigParamInput_RefreshInterval_ES,
|
1651
|
+
CaseConfigParamInput_EFConstruction_ES,
|
1652
|
+
CaseConfigParamInput_M_ES,
|
1653
|
+
]
|
1323
1654
|
ESPerformanceConfig = [
|
1655
|
+
CaseConfigParamInput_IndexType_ES,
|
1656
|
+
CaseConfigParamInput_NumShards_ES,
|
1657
|
+
CaseConfigParamInput_NumReplica_ES,
|
1658
|
+
CaseConfigParamInput_RefreshInterval_ES,
|
1324
1659
|
CaseConfigParamInput_EFConstruction_ES,
|
1325
1660
|
CaseConfigParamInput_M_ES,
|
1326
1661
|
CaseConfigParamInput_NumCandidates_ES,
|
1662
|
+
CaseConfigParamInput_UseRescore_ES,
|
1663
|
+
CaseConfigParamInput_OversampleRatio_ES,
|
1664
|
+
CaseConfigParamInput_UseRouting_ES,
|
1327
1665
|
]
|
1328
1666
|
|
1329
1667
|
AWSOpensearchLoadingConfig = [
|
@@ -1612,10 +1950,37 @@ LanceDBLoadConfig = [
|
|
1612
1950
|
|
1613
1951
|
LanceDBPerformanceConfig = LanceDBLoadConfig
|
1614
1952
|
|
1953
|
+
AWSOpensearchLoadingConfig = [
|
1954
|
+
CaseConfigParamInput_EFConstruction_AWSOpensearch,
|
1955
|
+
CaseConfigParamInput_M_AWSOpensearch,
|
1956
|
+
CaseConfigParamInput_ENGINE_NAME_AWSOpensearch,
|
1957
|
+
CaseConfigParamInput_METRIC_TYPE_NAME_AWSOpensearch,
|
1958
|
+
CaseConfigParamInput_INDEX_THREAD_QTY_DURING_FORCE_MERGE_AWSOpensearch,
|
1959
|
+
CaseConfigParamInput_NUMBER_OF_INDEXING_CLIENTS_AWSOpensearch,
|
1960
|
+
CaseConfigParamInput_NUMBER_OF_SHARDS_AWSOpensearch,
|
1961
|
+
CaseConfigParamInput_NUMBER_OF_REPLICAS_AWSOpensearch,
|
1962
|
+
CaseConfigParamInput_INDEX_THREAD_QTY_AWSOpensearch,
|
1963
|
+
]
|
1964
|
+
|
1965
|
+
AWSOpenSearchPerformanceConfig = [
|
1966
|
+
CaseConfigParamInput_EFConstruction_AWSOpensearch,
|
1967
|
+
CaseConfigParamInput_M_AWSOpensearch,
|
1968
|
+
CaseConfigParamInput_EF_SEARCH_AWSOpensearch,
|
1969
|
+
CaseConfigParamInput_ENGINE_NAME_AWSOpensearch,
|
1970
|
+
CaseConfigParamInput_METRIC_TYPE_NAME_AWSOpensearch,
|
1971
|
+
CaseConfigParamInput_INDEX_THREAD_QTY_DURING_FORCE_MERGE_AWSOpensearch,
|
1972
|
+
CaseConfigParamInput_NUMBER_OF_INDEXING_CLIENTS_AWSOpensearch,
|
1973
|
+
CaseConfigParamInput_NUMBER_OF_SHARDS_AWSOpensearch,
|
1974
|
+
CaseConfigParamInput_NUMBER_OF_REPLICAS_AWSOpensearch,
|
1975
|
+
CaseConfigParamInput_INDEX_THREAD_QTY_AWSOpensearch,
|
1976
|
+
]
|
1977
|
+
|
1978
|
+
# Map DB to config
|
1615
1979
|
CASE_CONFIG_MAP = {
|
1616
1980
|
DB.Milvus: {
|
1617
1981
|
CaseLabel.Load: MilvusLoadConfig,
|
1618
1982
|
CaseLabel.Performance: MilvusPerformanceConfig,
|
1983
|
+
CaseLabel.Streaming: MilvusPerformanceConfig,
|
1619
1984
|
},
|
1620
1985
|
DB.ZillizCloud: {
|
1621
1986
|
CaseLabel.Performance: ZillizCloudPerformanceConfig,
|
@@ -1676,4 +2041,18 @@ CASE_CONFIG_MAP = {
|
|
1676
2041
|
CaseLabel.Load: LanceDBLoadConfig,
|
1677
2042
|
CaseLabel.Performance: LanceDBPerformanceConfig,
|
1678
2043
|
},
|
2044
|
+
DB.AWSOpenSearch: {
|
2045
|
+
CaseLabel.Load: AWSOpensearchLoadingConfig,
|
2046
|
+
CaseLabel.Performance: AWSOpenSearchPerformanceConfig,
|
2047
|
+
},
|
1679
2048
|
}
|
2049
|
+
|
2050
|
+
|
2051
|
+
def get_case_config_inputs(db: DB, case_label: CaseLabel) -> list[CaseConfigInput]:
|
2052
|
+
if db not in CASE_CONFIG_MAP:
|
2053
|
+
return []
|
2054
|
+
if case_label == CaseLabel.Load:
|
2055
|
+
return CASE_CONFIG_MAP[db][CaseLabel.Load]
|
2056
|
+
elif case_label == CaseLabel.Performance or case_label == CaseLabel.Streaming:
|
2057
|
+
return CASE_CONFIG_MAP[db][CaseLabel.Performance]
|
2058
|
+
return []
|