vectordb-bench 0.0.29__py3-none-any.whl → 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (90) hide show
  1. vectordb_bench/__init__.py +14 -27
  2. vectordb_bench/backend/assembler.py +19 -6
  3. vectordb_bench/backend/cases.py +186 -23
  4. vectordb_bench/backend/clients/__init__.py +32 -0
  5. vectordb_bench/backend/clients/api.py +22 -1
  6. vectordb_bench/backend/clients/aws_opensearch/aws_opensearch.py +249 -43
  7. vectordb_bench/backend/clients/aws_opensearch/cli.py +51 -21
  8. vectordb_bench/backend/clients/aws_opensearch/config.py +58 -16
  9. vectordb_bench/backend/clients/chroma/chroma.py +6 -2
  10. vectordb_bench/backend/clients/elastic_cloud/config.py +19 -1
  11. vectordb_bench/backend/clients/elastic_cloud/elastic_cloud.py +133 -45
  12. vectordb_bench/backend/clients/lancedb/cli.py +62 -8
  13. vectordb_bench/backend/clients/lancedb/config.py +14 -1
  14. vectordb_bench/backend/clients/lancedb/lancedb.py +21 -9
  15. vectordb_bench/backend/clients/memorydb/memorydb.py +2 -2
  16. vectordb_bench/backend/clients/milvus/cli.py +30 -9
  17. vectordb_bench/backend/clients/milvus/config.py +3 -0
  18. vectordb_bench/backend/clients/milvus/milvus.py +81 -23
  19. vectordb_bench/backend/clients/oceanbase/cli.py +100 -0
  20. vectordb_bench/backend/clients/oceanbase/config.py +125 -0
  21. vectordb_bench/backend/clients/oceanbase/oceanbase.py +215 -0
  22. vectordb_bench/backend/clients/pinecone/pinecone.py +39 -25
  23. vectordb_bench/backend/clients/qdrant_cloud/config.py +59 -3
  24. vectordb_bench/backend/clients/qdrant_cloud/qdrant_cloud.py +100 -33
  25. vectordb_bench/backend/clients/qdrant_local/cli.py +60 -0
  26. vectordb_bench/backend/clients/qdrant_local/config.py +47 -0
  27. vectordb_bench/backend/clients/qdrant_local/qdrant_local.py +232 -0
  28. vectordb_bench/backend/clients/weaviate_cloud/cli.py +29 -3
  29. vectordb_bench/backend/clients/weaviate_cloud/config.py +2 -0
  30. vectordb_bench/backend/clients/weaviate_cloud/weaviate_cloud.py +5 -0
  31. vectordb_bench/backend/dataset.py +143 -27
  32. vectordb_bench/backend/filter.py +76 -0
  33. vectordb_bench/backend/runner/__init__.py +3 -3
  34. vectordb_bench/backend/runner/mp_runner.py +52 -39
  35. vectordb_bench/backend/runner/rate_runner.py +68 -52
  36. vectordb_bench/backend/runner/read_write_runner.py +125 -68
  37. vectordb_bench/backend/runner/serial_runner.py +56 -23
  38. vectordb_bench/backend/task_runner.py +48 -20
  39. vectordb_bench/cli/batch_cli.py +121 -0
  40. vectordb_bench/cli/cli.py +59 -1
  41. vectordb_bench/cli/vectordbbench.py +7 -0
  42. vectordb_bench/config-files/batch_sample_config.yml +17 -0
  43. vectordb_bench/frontend/components/check_results/data.py +16 -11
  44. vectordb_bench/frontend/components/check_results/filters.py +53 -25
  45. vectordb_bench/frontend/components/check_results/headerIcon.py +16 -13
  46. vectordb_bench/frontend/components/check_results/nav.py +20 -0
  47. vectordb_bench/frontend/components/custom/displayCustomCase.py +43 -8
  48. vectordb_bench/frontend/components/custom/displaypPrams.py +10 -5
  49. vectordb_bench/frontend/components/custom/getCustomConfig.py +10 -0
  50. vectordb_bench/frontend/components/label_filter/charts.py +60 -0
  51. vectordb_bench/frontend/components/run_test/caseSelector.py +48 -52
  52. vectordb_bench/frontend/components/run_test/dbSelector.py +9 -5
  53. vectordb_bench/frontend/components/run_test/inputWidget.py +48 -0
  54. vectordb_bench/frontend/components/run_test/submitTask.py +3 -1
  55. vectordb_bench/frontend/components/streaming/charts.py +253 -0
  56. vectordb_bench/frontend/components/streaming/data.py +62 -0
  57. vectordb_bench/frontend/components/tables/data.py +1 -1
  58. vectordb_bench/frontend/components/welcome/explainPrams.py +66 -0
  59. vectordb_bench/frontend/components/welcome/pagestyle.py +106 -0
  60. vectordb_bench/frontend/components/welcome/welcomePrams.py +147 -0
  61. vectordb_bench/frontend/config/dbCaseConfigs.py +420 -41
  62. vectordb_bench/frontend/config/styles.py +32 -2
  63. vectordb_bench/frontend/pages/concurrent.py +5 -1
  64. vectordb_bench/frontend/pages/custom.py +4 -0
  65. vectordb_bench/frontend/pages/label_filter.py +56 -0
  66. vectordb_bench/frontend/pages/quries_per_dollar.py +5 -1
  67. vectordb_bench/frontend/pages/results.py +60 -0
  68. vectordb_bench/frontend/pages/run_test.py +3 -3
  69. vectordb_bench/frontend/pages/streaming.py +135 -0
  70. vectordb_bench/frontend/pages/tables.py +4 -0
  71. vectordb_bench/frontend/vdb_benchmark.py +16 -41
  72. vectordb_bench/interface.py +6 -2
  73. vectordb_bench/metric.py +15 -1
  74. vectordb_bench/models.py +38 -11
  75. vectordb_bench/results/ElasticCloud/result_20250318_standard_elasticcloud.json +5890 -0
  76. vectordb_bench/results/Milvus/result_20250509_standard_milvus.json +6138 -0
  77. vectordb_bench/results/OpenSearch/result_20250224_standard_opensearch.json +7319 -0
  78. vectordb_bench/results/Pinecone/result_20250124_standard_pinecone.json +2365 -0
  79. vectordb_bench/results/QdrantCloud/result_20250602_standard_qdrantcloud.json +3556 -0
  80. vectordb_bench/results/ZillizCloud/result_20250613_standard_zillizcloud.json +6290 -0
  81. vectordb_bench/results/dbPrices.json +12 -4
  82. {vectordb_bench-0.0.29.dist-info → vectordb_bench-1.0.0.dist-info}/METADATA +131 -32
  83. {vectordb_bench-0.0.29.dist-info → vectordb_bench-1.0.0.dist-info}/RECORD +87 -65
  84. {vectordb_bench-0.0.29.dist-info → vectordb_bench-1.0.0.dist-info}/WHEEL +1 -1
  85. vectordb_bench/results/ZillizCloud/result_20230727_standard_zillizcloud.json +0 -791
  86. vectordb_bench/results/ZillizCloud/result_20230808_standard_zillizcloud.json +0 -679
  87. vectordb_bench/results/ZillizCloud/result_20240105_standard_202401_zillizcloud.json +0 -1352
  88. {vectordb_bench-0.0.29.dist-info → vectordb_bench-1.0.0.dist-info}/entry_points.txt +0 -0
  89. {vectordb_bench-0.0.29.dist-info → vectordb_bench-1.0.0.dist-info}/licenses/LICENSE +0 -0
  90. {vectordb_bench-0.0.29.dist-info → vectordb_bench-1.0.0.dist-info}/top_level.txt +0 -0
@@ -4,6 +4,7 @@ from pydantic import BaseModel
4
4
  from vectordb_bench.backend.cases import CaseLabel, CaseType
5
5
  from vectordb_bench.backend.clients import DB
6
6
  from vectordb_bench.backend.clients.api import IndexType, MetricType, SQType
7
+ from vectordb_bench.backend.dataset import DatasetWithSizeType
7
8
  from vectordb_bench.frontend.components.custom.getCustomConfig import get_custom_configs
8
9
 
9
10
  from vectordb_bench.models import CaseConfig, CaseConfigParamType
@@ -23,32 +24,58 @@ class BatchCaseConfig(BaseModel):
23
24
  cases: list[CaseConfig] = []
24
25
 
25
26
 
27
+ class InputType(IntEnum):
28
+ Text = 20001
29
+ Number = 20002
30
+ Option = 20003
31
+ Float = 20004
32
+ Bool = 20005
33
+
34
+
35
+ class ConfigInput(BaseModel):
36
+ label: CaseConfigParamType
37
+ inputType: InputType = InputType.Text
38
+ inputConfig: dict = {}
39
+ inputHelp: str = ""
40
+ displayLabel: str = ""
41
+
42
+
43
+ class CaseConfigInput(ConfigInput):
44
+ # todo type should be a function
45
+ isDisplayed: typing.Any = lambda config: True
46
+
47
+
26
48
  class UICaseItem(BaseModel):
27
49
  isLine: bool = False
50
+ key: str = ""
28
51
  label: str = ""
29
52
  description: str = ""
30
53
  cases: list[CaseConfig] = []
31
54
  caseLabel: CaseLabel = CaseLabel.Performance
55
+ extra_custom_case_config_inputs: list[ConfigInput] = []
56
+ tmp_custom_config: dict = dict()
32
57
 
33
58
  def __init__(
34
59
  self,
35
60
  isLine: bool = False,
36
- case_id: CaseType | None = None,
37
- custom_case: dict | None = None,
38
- cases: list[CaseConfig] | None = None,
61
+ cases: list[CaseConfig] = None,
39
62
  label: str = "",
40
63
  description: str = "",
41
64
  caseLabel: CaseLabel = CaseLabel.Performance,
65
+ **kwargs,
42
66
  ):
43
67
  if isLine is True:
44
- super().__init__(isLine=True)
45
- elif case_id is not None and isinstance(case_id, CaseType):
46
- c = case_id.case_cls(custom_case)
68
+ super().__init__(isLine=True, **kwargs)
69
+ if cases is None:
70
+ cases = []
71
+ elif len(cases) == 1:
72
+ c = cases[0].case
47
73
  super().__init__(
48
- label=c.name,
49
- description=c.description,
50
- cases=[CaseConfig(case_id=case_id, custom_case=custom_case)],
51
- caseLabel=c.label,
74
+ label=label if label else c.name,
75
+ description=description if description else c.description,
76
+ cases=cases,
77
+ caseLabel=caseLabel,
78
+ **kwargs,
52
79
  )
53
80
  else:
54
81
  super().__init__(
@@ -56,10 +83,26 @@ class UICaseItem(BaseModel):
56
83
  description=description,
57
84
  cases=cases,
58
85
  caseLabel=caseLabel,
86
+ **kwargs,
59
87
  )
60
88
 
61
89
  def __hash__(self) -> int:
62
- return hash(self.json())
90
+ return hash(self.key if self.key else self.label)
91
+
92
+ def get_cases(self) -> list[CaseConfig]:
93
+ # return self.cases
94
+ if len(self.extra_custom_case_config_inputs) == 0:
95
+ return self.cases
96
+ cases = [
97
+ CaseConfig(
98
+ case_id=c.case_id,
99
+ k=c.k,
100
+ concurrency_search_config=c.concurrency_search_config,
101
+ custom_case={**c.custom_case, **self.tmp_custom_config},
102
+ )
103
+ for c in self.cases
104
+ ]
105
+ return cases
63
106
 
64
107
 
65
108
  class UICaseItemCluster(BaseModel):
@@ -70,47 +113,181 @@ class UICaseItemCluster(BaseModel):
70
113
  def get_custom_case_items() -> list[UICaseItem]:
71
114
  custom_configs = get_custom_configs()
72
115
  return [
73
- UICaseItem(case_id=CaseType.PerformanceCustomDataset, custom_case=custom_config.dict())
116
+ UICaseItem(
117
+ label=f"{custom_config.dataset_config.name} - None Filter",
118
+ cases=[
119
+ CaseConfig(
120
+ case_id=CaseType.PerformanceCustomDataset,
121
+ custom_case={
122
+ **custom_config.dict(),
123
+ "use_filter": False,
124
+ },
125
+ )
126
+ ],
127
+ )
128
+ for custom_config in custom_configs
129
+ ] + [
130
+ UICaseItem(
131
+ label=f"{custom_config.dataset_config.name} - Filter",
132
+ description=(
133
+ f'[Batch Cases] This case evaluate search performance under filtering constraints like "color==red."'
134
+ f"Vdbbench provides an additional column of randomly distributed labels with fixed proportions, "
135
+ f"such as [0.001, 0.002, 0.005, 0.01, 0.02, 0.05, 0.1, 0.2, 0.5]. "
136
+ f"Essentially, vdbbench will test each filter label in your own dataset to"
137
+ " assess the vector database's search performance across different filtering conditions."
138
+ ),
139
+ cases=[
140
+ CaseConfig(
141
+ case_id=CaseType.PerformanceCustomDataset,
142
+ custom_case={
143
+ **custom_config.dict(),
144
+ "use_filter": True,
145
+ "label_percentage": label_percentage,
146
+ },
147
+ )
148
+ for label_percentage in custom_config.dataset_config.label_percentages
149
+ ],
150
+ )
74
151
  for custom_config in custom_configs
152
+ if custom_config.dataset_config.label_percentages
75
153
  ]
76
154
 
77
155
 
156
+ def generate_normal_cases(case_id: CaseType, custom_case: dict | None = None) -> list[CaseConfig]:
157
+ return [CaseConfig(case_id=case_id, custom_case=custom_case)]
158
+
159
+
78
160
  def get_custom_case_cluter() -> UICaseItemCluster:
79
161
  return UICaseItemCluster(label="Custom Search Performance Test", uiCaseItems=get_custom_case_items())
80
162
 
81
163
 
164
+ def generate_custom_streaming_case() -> CaseConfig:
165
+ return CaseConfig(
166
+ case_id=CaseType.StreamingPerformanceCase,
167
+ custom_case=dict(),
168
+ )
169
+
170
+
171
+ custom_streaming_config: list[ConfigInput] = [
172
+ ConfigInput(
173
+ label=CaseConfigParamType.dataset_with_size_type,
174
+ displayLabel="dataset",
175
+ inputType=InputType.Option,
176
+ inputConfig=dict(options=[dataset.value for dataset in DatasetWithSizeType]),
177
+ ),
178
+ ConfigInput(
179
+ label=CaseConfigParamType.insert_rate,
180
+ inputType=InputType.Number,
181
+ inputConfig=dict(step=100, min=100, max=4_000, value=200),
182
+ inputHelp="fixed insertion rate (rows/s), must be divisible by 100",
183
+ ),
184
+ ConfigInput(
185
+ label=CaseConfigParamType.search_stages,
186
+ inputType=InputType.Text,
187
+ inputConfig=dict(value="[0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]"),
188
+ inputHelp="0<=stage<1.0; do search test when inserting a specified amount of data.",
189
+ ),
190
+ ConfigInput(
191
+ label=CaseConfigParamType.concurrencies,
192
+ inputType=InputType.Text,
193
+ inputConfig=dict(value="[5, 10, 20]"),
194
+ inputHelp="concurrent num of search test while insertion; record max-qps.",
195
+ ),
196
+ ConfigInput(
197
+ label=CaseConfigParamType.optimize_after_write,
198
+ inputType=InputType.Option,
199
+ inputConfig=dict(options=[True, False]),
200
+ inputHelp="whether to optimize after inserting all data",
201
+ ),
202
+ ConfigInput(
203
+ label=CaseConfigParamType.read_dur_after_write,
204
+ inputType=InputType.Number,
205
+ inputConfig=dict(step=10, min=30, max=360_000, value=30),
206
+ inputHelp="search test duration after inserting all data",
207
+ ),
208
+ ]
209
+
210
+
211
+ def generate_label_filter_cases(dataset_with_size_type: DatasetWithSizeType) -> list[CaseConfig]:
212
+ label_percentages = dataset_with_size_type.get_manager().data.scalar_label_percentages
213
+ return [
214
+ CaseConfig(
215
+ case_id=CaseType.LabelFilterPerformanceCase,
216
+ custom_case=dict(dataset_with_size_type=dataset_with_size_type, label_percentage=label_percentage),
217
+ )
218
+ for label_percentage in label_percentages
219
+ ]
220
+
221
+
82
222
  UI_CASE_CLUSTERS: list[UICaseItemCluster] = [
83
223
  UICaseItemCluster(
84
224
  label="Search Performance Test",
85
225
  uiCaseItems=[
86
- UICaseItem(case_id=CaseType.Performance768D100M),
87
- UICaseItem(case_id=CaseType.Performance768D10M),
88
- UICaseItem(case_id=CaseType.Performance768D1M),
226
+ UICaseItem(cases=generate_normal_cases(CaseType.Performance768D100M)),
227
+ UICaseItem(cases=generate_normal_cases(CaseType.Performance768D10M)),
228
+ UICaseItem(cases=generate_normal_cases(CaseType.Performance768D1M)),
89
229
  UICaseItem(isLine=True),
90
- UICaseItem(case_id=CaseType.Performance1536D5M),
91
- UICaseItem(case_id=CaseType.Performance1536D500K),
92
- UICaseItem(case_id=CaseType.Performance1536D50K),
230
+ UICaseItem(cases=generate_normal_cases(CaseType.Performance1024D1M)),
231
+ UICaseItem(cases=generate_normal_cases(CaseType.Performance1024D10M)),
232
+ UICaseItem(isLine=True),
233
+ UICaseItem(cases=generate_normal_cases(CaseType.Performance1536D5M)),
234
+ UICaseItem(cases=generate_normal_cases(CaseType.Performance1536D500K)),
235
+ UICaseItem(cases=generate_normal_cases(CaseType.Performance1536D50K)),
93
236
  ],
94
237
  ),
95
238
  UICaseItemCluster(
96
- label="Filter Search Performance Test",
239
+ label="Int-Filter Search Performance Test",
97
240
  uiCaseItems=[
98
- UICaseItem(case_id=CaseType.Performance768D10M1P),
99
- UICaseItem(case_id=CaseType.Performance768D10M99P),
100
- UICaseItem(case_id=CaseType.Performance768D1M1P),
101
- UICaseItem(case_id=CaseType.Performance768D1M99P),
241
+ UICaseItem(cases=generate_normal_cases(CaseType.Performance768D10M1P)),
242
+ UICaseItem(cases=generate_normal_cases(CaseType.Performance768D10M99P)),
243
+ UICaseItem(cases=generate_normal_cases(CaseType.Performance768D1M1P)),
244
+ UICaseItem(cases=generate_normal_cases(CaseType.Performance768D1M99P)),
102
245
  UICaseItem(isLine=True),
103
- UICaseItem(case_id=CaseType.Performance1536D5M1P),
104
- UICaseItem(case_id=CaseType.Performance1536D5M99P),
105
- UICaseItem(case_id=CaseType.Performance1536D500K1P),
106
- UICaseItem(case_id=CaseType.Performance1536D500K99P),
246
+ UICaseItem(cases=generate_normal_cases(CaseType.Performance1536D5M1P)),
247
+ UICaseItem(cases=generate_normal_cases(CaseType.Performance1536D5M99P)),
248
+ UICaseItem(cases=generate_normal_cases(CaseType.Performance1536D500K1P)),
249
+ UICaseItem(cases=generate_normal_cases(CaseType.Performance1536D500K99P)),
250
+ ],
251
+ ),
252
+ UICaseItemCluster(
253
+ label="Label-Filter Search Performance Test",
254
+ uiCaseItems=[
255
+ UICaseItem(
256
+ label=f"Label-Filter Search Performance Test - {dataset_with_size_type.value}",
257
+ description=(
258
+ f'[Batch Cases] These cases evaluate search performance under filtering constraints like "color==red." '
259
+ "Vdbbench provides an additional column of randomly distributed labels with fixed proportions, "
260
+ f"such as {dataset_with_size_type.get_manager().data.scalar_label_percentages}. "
261
+ f"Essentially, vdbbench will test each filter label in {dataset_with_size_type.value} to "
262
+ "assess the vector database's search performance across different filtering conditions. "
263
+ ),
264
+ cases=generate_label_filter_cases(dataset_with_size_type),
265
+ )
266
+ for dataset_with_size_type in DatasetWithSizeType
107
267
  ],
108
268
  ),
109
269
  UICaseItemCluster(
110
270
  label="Capacity Test",
111
271
  uiCaseItems=[
112
- UICaseItem(case_id=CaseType.CapacityDim960),
113
- UICaseItem(case_id=CaseType.CapacityDim128),
272
+ UICaseItem(cases=generate_normal_cases(CaseType.CapacityDim960)),
273
+ UICaseItem(cases=generate_normal_cases(CaseType.CapacityDim128)),
274
+ ],
275
+ ),
276
+ UICaseItemCluster(
277
+ label="Streaming Test",
278
+ uiCaseItems=[
279
+ UICaseItem(
280
+ label="Customize Streaming Test",
281
+ description=(
282
+ "This case test the search performance during insertion. "
283
+ "VDBB will send insert requests to VectorDB at a fixed rate and "
284
+ "conduct a search test once the insert count reaches the search_stages. "
285
+ "After all data is inserted, optimization and search tests can be "
286
+ "optionally performed."
287
+ ),
288
+ cases=[generate_custom_streaming_case()],
289
+ extra_custom_case_config_inputs=custom_streaming_config,
290
+ )
114
291
  ],
115
292
  ),
116
293
  ]
@@ -123,14 +300,8 @@ DISPLAY_CASE_ORDER: list[CaseType] = [
123
300
  CaseType.Performance1536D5M,
124
301
  CaseType.Performance1536D500K,
125
302
  CaseType.Performance1536D50K,
126
- CaseType.Performance768D10M1P,
127
- CaseType.Performance768D1M1P,
128
- CaseType.Performance1536D5M1P,
129
- CaseType.Performance1536D500K1P,
130
- CaseType.Performance768D10M99P,
131
- CaseType.Performance768D1M99P,
132
- CaseType.Performance1536D5M99P,
133
- CaseType.Performance1536D500K99P,
303
+ CaseType.Performance1024D1M,
304
+ CaseType.Performance1024D10M,
134
305
  CaseType.CapacityDim960,
135
306
  CaseType.CapacityDim128,
136
307
  ]
@@ -146,6 +317,7 @@ class InputType(IntEnum):
146
317
  Option = 20003
147
318
  Float = 20004
148
319
  Bool = 20005
320
+ Select = 20006
149
321
 
150
322
 
151
323
  class CaseConfigInput(BaseModel):
@@ -454,7 +626,7 @@ CaseConfigParamInput_EFConstruction_ES = CaseConfigInput(
454
626
  inputConfig={
455
627
  "min": 8,
456
628
  "max": 512,
457
- "value": 360,
629
+ "value": 128,
458
630
  },
459
631
  )
460
632
 
@@ -482,7 +654,7 @@ CaseConfigParamInput_EF_SEARCH_AWSOpensearch = CaseConfigInput(
482
654
  label=CaseConfigParamType.ef_search,
483
655
  inputType=InputType.Number,
484
656
  inputConfig={
485
- "min": 100,
657
+ "min": 1,
486
658
  "max": 1024,
487
659
  "value": 256,
488
660
  },
@@ -556,6 +728,67 @@ CaseConfigParamInput_EFConstruction_PgVector = CaseConfigInput(
556
728
  isDisplayed=lambda config: config[CaseConfigParamType.IndexType] == IndexType.HNSW.value,
557
729
  )
558
730
 
731
+ CaseConfigParamInput_IndexType_ES = CaseConfigInput(
732
+ label=CaseConfigParamType.IndexType,
733
+ inputType=InputType.Option,
734
+ inputConfig={
735
+ "options": [
736
+ IndexType.ES_HNSW.value,
737
+ IndexType.ES_HNSW_INT8.value,
738
+ IndexType.ES_HNSW_INT4.value,
739
+ IndexType.ES_HNSW_BBQ.value,
740
+ ],
741
+ },
742
+ )
743
+
744
+ CaseConfigParamInput_NumShards_ES = CaseConfigInput(
745
+ label=CaseConfigParamType.number_of_shards,
746
+ inputType=InputType.Number,
747
+ inputConfig={
748
+ "min": 1,
749
+ "max": 128,
750
+ "value": 1,
751
+ },
752
+ )
753
+
754
+ CaseConfigParamInput_NumReplica_ES = CaseConfigInput(
755
+ label=CaseConfigParamType.number_of_replicas,
756
+ inputType=InputType.Number,
757
+ inputConfig={
758
+ "min": 0,
759
+ "max": 10,
760
+ "value": 0,
761
+ },
762
+ )
763
+
764
+ CaseConfigParamInput_RefreshInterval_ES = CaseConfigInput(
765
+ label=CaseConfigParamType.refresh_interval,
766
+ inputType=InputType.Text,
767
+ inputConfig={"value": "30s"},
768
+ )
769
+
770
+ CaseConfigParamInput_UseRescore_ES = CaseConfigInput(
771
+ label=CaseConfigParamType.use_rescore,
772
+ inputType=InputType.Bool,
773
+ inputConfig={"value": False},
774
+ isDisplayed=lambda config: config.get(CaseConfigParamType.IndexType, None) != IndexType.ES_HNSW.value,
775
+ )
776
+
777
+ CaseConfigParamInput_OversampleRatio_ES = CaseConfigInput(
778
+ label=CaseConfigParamType.oversample_ratio,
779
+ inputType=InputType.Float,
780
+ inputConfig={"min": 1.0, "max": 100.0, "value": 2.0},
781
+ isDisplayed=lambda config: config.get(CaseConfigParamType.use_rescore, False),
782
+ inputHelp="num_oversample = oversample_ratio * top_k.",
783
+ )
784
+
785
+ CaseConfigParamInput_UseRouting_ES = CaseConfigInput(
786
+ label=CaseConfigParamType.use_routing,
787
+ inputType=InputType.Bool,
788
+ inputConfig={"value": False},
789
+ inputHelp="Using routing to improve label-filter case performance",
790
+ )
791
+
559
792
 
560
793
  CaseConfigParamInput_M_ES = CaseConfigInput(
561
794
  label=CaseConfigParamType.M,
@@ -563,10 +796,11 @@ CaseConfigParamInput_M_ES = CaseConfigInput(
563
796
  inputConfig={
564
797
  "min": 4,
565
798
  "max": 64,
566
- "value": 30,
799
+ "value": 16,
567
800
  },
568
801
  )
569
802
 
803
+
570
804
  CaseConfigParamInput_NumCandidates_ES = CaseConfigInput(
571
805
  label=CaseConfigParamType.numCandidates,
572
806
  inputType=InputType.Number,
@@ -1203,6 +1437,13 @@ CaseConfigParamInput_CacheSize_MariaDB = CaseConfigInput(
1203
1437
  },
1204
1438
  isDisplayed=lambda config: config.get(CaseConfigParamType.IndexType, None) == IndexType.HNSW.value,
1205
1439
  )
1440
+ CaseConfigParamInput_Milvus_use_partition_key = CaseConfigInput(
1441
+ label=CaseConfigParamType.use_partition_key,
1442
+ inputType=InputType.Option,
1443
+ inputHelp="whether to use partition_key for label-filter cases. only works in label-filter cases",
1444
+ inputConfig={"options": [True, False]},
1445
+ )
1446
+
1206
1447
 
1207
1448
  CaseConfigParamInput_MongoDBQuantizationType = CaseConfigInput(
1208
1449
  label=CaseConfigParamType.mongodb_quantization_type,
@@ -1264,6 +1505,87 @@ CaseConfigParamInput_EFConstruction_Vespa = CaseConfigInput(
1264
1505
  isDisplayed=lambda config: config[CaseConfigParamType.IndexType] == IndexType.HNSW.value,
1265
1506
  )
1266
1507
 
1508
+ CaseConfigParamInput_INDEX_THREAD_QTY_DURING_FORCE_MERGE_AWSOpensearch = CaseConfigInput(
1509
+ label=CaseConfigParamType.index_thread_qty_during_force_merge,
1510
+ displayLabel="Index Thread Qty During Force Merge",
1511
+ inputHelp="Thread count during force merge operations",
1512
+ inputType=InputType.Number,
1513
+ inputConfig={
1514
+ "min": 1,
1515
+ "max": 32,
1516
+ "value": 4,
1517
+ },
1518
+ )
1519
+
1520
+ CaseConfigParamInput_NUMBER_OF_INDEXING_CLIENTS_AWSOpensearch = CaseConfigInput(
1521
+ label=CaseConfigParamType.number_of_indexing_clients,
1522
+ displayLabel="Number of Indexing Clients",
1523
+ inputHelp="Number of concurrent clients for data insertion",
1524
+ inputType=InputType.Number,
1525
+ inputConfig={
1526
+ "min": 1,
1527
+ "max": 32,
1528
+ "value": 1,
1529
+ },
1530
+ )
1531
+
1532
+ CaseConfigParamInput_NUMBER_OF_SHARDS_AWSOpensearch = CaseConfigInput(
1533
+ label=CaseConfigParamType.number_of_shards,
1534
+ displayLabel="Number of Shards",
1535
+ inputHelp="Number of primary shards for the index",
1536
+ inputType=InputType.Number,
1537
+ inputConfig={
1538
+ "min": 1,
1539
+ "max": 32,
1540
+ "value": 1,
1541
+ },
1542
+ )
1543
+
1544
+ CaseConfigParamInput_NUMBER_OF_REPLICAS_AWSOpensearch = CaseConfigInput(
1545
+ label=CaseConfigParamType.number_of_replicas,
1546
+ displayLabel="Number of Replicas",
1547
+ inputHelp="Number of replica copies for each primary shard",
1548
+ inputType=InputType.Number,
1549
+ inputConfig={
1550
+ "min": 0,
1551
+ "max": 10,
1552
+ "value": 1,
1553
+ },
1554
+ )
1555
+
1556
+ CaseConfigParamInput_INDEX_THREAD_QTY_AWSOpensearch = CaseConfigInput(
1557
+ label=CaseConfigParamType.index_thread_qty,
1558
+ displayLabel="Index Thread Qty",
1559
+ inputHelp="Thread count for native engine indexing",
1560
+ inputType=InputType.Number,
1561
+ inputConfig={
1562
+ "min": 1,
1563
+ "max": 32,
1564
+ "value": 4,
1565
+ },
1566
+ )
1567
+
1568
+ CaseConfigParamInput_ENGINE_NAME_AWSOpensearch = CaseConfigInput(
1569
+ label=CaseConfigParamType.engine_name,
1570
+ displayLabel="Engine",
1571
+ inputHelp="HNSW algorithm implementation to use",
1572
+ inputType=InputType.Option,
1573
+ inputConfig={
1574
+ "options": ["faiss", "nmslib", "lucene"],
1575
+ "default": "faiss",
1576
+ },
1577
+ )
1578
+
1579
+ CaseConfigParamInput_METRIC_TYPE_NAME_AWSOpensearch = CaseConfigInput(
1580
+ label=CaseConfigParamType.metric_type_name,
1581
+ displayLabel="Metric Type",
1582
+ inputHelp="Distance metric type for vector similarity",
1583
+ inputType=InputType.Option,
1584
+ inputConfig={
1585
+ "options": ["l2", "cosine", "ip"],
1586
+ "default": "l2",
1587
+ },
1588
+ )
1267
1589
 
1268
1590
  MilvusLoadConfig = [
1269
1591
  CaseConfigParamInput_IndexType,
@@ -1280,6 +1602,7 @@ MilvusLoadConfig = [
1280
1602
  CaseConfigParamInput_Refine,
1281
1603
  CaseConfigParamInput_RefineType,
1282
1604
  CaseConfigParamInput_NRQ,
1605
+ CaseConfigParamInput_Milvus_use_partition_key,
1283
1606
  ]
1284
1607
  MilvusPerformanceConfig = [
1285
1608
  CaseConfigParamInput_IndexType,
@@ -1307,6 +1630,7 @@ MilvusPerformanceConfig = [
1307
1630
  CaseConfigParamInput_Refine,
1308
1631
  CaseConfigParamInput_RefineType,
1309
1632
  CaseConfigParamInput_RefineK,
1633
+ CaseConfigParamInput_Milvus_use_partition_key,
1310
1634
  ]
1311
1635
 
1312
1636
  WeaviateLoadConfig = [
@@ -1319,11 +1643,25 @@ WeaviatePerformanceConfig = [
1319
1643
  CaseConfigParamInput_EF_Weaviate,
1320
1644
  ]
1321
1645
 
1322
- ESLoadingConfig = [CaseConfigParamInput_EFConstruction_ES, CaseConfigParamInput_M_ES]
1646
+ ESLoadingConfig = [
1647
+ CaseConfigParamInput_IndexType_ES,
1648
+ CaseConfigParamInput_NumShards_ES,
1649
+ CaseConfigParamInput_NumReplica_ES,
1650
+ CaseConfigParamInput_RefreshInterval_ES,
1651
+ CaseConfigParamInput_EFConstruction_ES,
1652
+ CaseConfigParamInput_M_ES,
1653
+ ]
1323
1654
  ESPerformanceConfig = [
1655
+ CaseConfigParamInput_IndexType_ES,
1656
+ CaseConfigParamInput_NumShards_ES,
1657
+ CaseConfigParamInput_NumReplica_ES,
1658
+ CaseConfigParamInput_RefreshInterval_ES,
1324
1659
  CaseConfigParamInput_EFConstruction_ES,
1325
1660
  CaseConfigParamInput_M_ES,
1326
1661
  CaseConfigParamInput_NumCandidates_ES,
1662
+ CaseConfigParamInput_UseRescore_ES,
1663
+ CaseConfigParamInput_OversampleRatio_ES,
1664
+ CaseConfigParamInput_UseRouting_ES,
1327
1665
  ]
1328
1666
 
1329
1667
  AWSOpensearchLoadingConfig = [
@@ -1612,10 +1950,37 @@ LanceDBLoadConfig = [
1612
1950
 
1613
1951
  LanceDBPerformanceConfig = LanceDBLoadConfig
1614
1952
 
1953
+ AWSOpensearchLoadingConfig = [
1954
+ CaseConfigParamInput_EFConstruction_AWSOpensearch,
1955
+ CaseConfigParamInput_M_AWSOpensearch,
1956
+ CaseConfigParamInput_ENGINE_NAME_AWSOpensearch,
1957
+ CaseConfigParamInput_METRIC_TYPE_NAME_AWSOpensearch,
1958
+ CaseConfigParamInput_INDEX_THREAD_QTY_DURING_FORCE_MERGE_AWSOpensearch,
1959
+ CaseConfigParamInput_NUMBER_OF_INDEXING_CLIENTS_AWSOpensearch,
1960
+ CaseConfigParamInput_NUMBER_OF_SHARDS_AWSOpensearch,
1961
+ CaseConfigParamInput_NUMBER_OF_REPLICAS_AWSOpensearch,
1962
+ CaseConfigParamInput_INDEX_THREAD_QTY_AWSOpensearch,
1963
+ ]
1964
+
1965
+ AWSOpenSearchPerformanceConfig = [
1966
+ CaseConfigParamInput_EFConstruction_AWSOpensearch,
1967
+ CaseConfigParamInput_M_AWSOpensearch,
1968
+ CaseConfigParamInput_EF_SEARCH_AWSOpensearch,
1969
+ CaseConfigParamInput_ENGINE_NAME_AWSOpensearch,
1970
+ CaseConfigParamInput_METRIC_TYPE_NAME_AWSOpensearch,
1971
+ CaseConfigParamInput_INDEX_THREAD_QTY_DURING_FORCE_MERGE_AWSOpensearch,
1972
+ CaseConfigParamInput_NUMBER_OF_INDEXING_CLIENTS_AWSOpensearch,
1973
+ CaseConfigParamInput_NUMBER_OF_SHARDS_AWSOpensearch,
1974
+ CaseConfigParamInput_NUMBER_OF_REPLICAS_AWSOpensearch,
1975
+ CaseConfigParamInput_INDEX_THREAD_QTY_AWSOpensearch,
1976
+ ]
1977
+
1978
+ # Map DB to config
1615
1979
  CASE_CONFIG_MAP = {
1616
1980
  DB.Milvus: {
1617
1981
  CaseLabel.Load: MilvusLoadConfig,
1618
1982
  CaseLabel.Performance: MilvusPerformanceConfig,
1983
+ CaseLabel.Streaming: MilvusPerformanceConfig,
1619
1984
  },
1620
1985
  DB.ZillizCloud: {
1621
1986
  CaseLabel.Performance: ZillizCloudPerformanceConfig,
@@ -1676,4 +2041,18 @@ CASE_CONFIG_MAP = {
1676
2041
  CaseLabel.Load: LanceDBLoadConfig,
1677
2042
  CaseLabel.Performance: LanceDBPerformanceConfig,
1678
2043
  },
2044
+ DB.AWSOpenSearch: {
2045
+ CaseLabel.Load: AWSOpensearchLoadingConfig,
2046
+ CaseLabel.Performance: AWSOpenSearchPerformanceConfig,
2047
+ },
1679
2048
  }
2049
+
2050
+
2051
+ def get_case_config_inputs(db: DB, case_label: CaseLabel) -> list[CaseConfigInput]:
2052
+ if db not in CASE_CONFIG_MAP:
2053
+ return []
2054
+ if case_label == CaseLabel.Load:
2055
+ return CASE_CONFIG_MAP[db][CaseLabel.Load]
2056
+ elif case_label == CaseLabel.Performance or case_label == CaseLabel.Streaming:
2057
+ return CASE_CONFIG_MAP[db][CaseLabel.Performance]
2058
+ return []