PyPI - vectordb-bench - Versions diffs - 0.0.29__py3-none-any.whl → 1.0.0__py3-none-any.whl - Mend

vectordb-bench 0.0.29py3-none-any.whl → 1.0.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (90) hide show

vectordb_bench/__init__.py +14 -27
vectordb_bench/backend/assembler.py +19 -6
vectordb_bench/backend/cases.py +186 -23
vectordb_bench/backend/clients/__init__.py +32 -0
vectordb_bench/backend/clients/api.py +22 -1
vectordb_bench/backend/clients/aws_opensearch/aws_opensearch.py +249 -43
vectordb_bench/backend/clients/aws_opensearch/cli.py +51 -21
vectordb_bench/backend/clients/aws_opensearch/config.py +58 -16
vectordb_bench/backend/clients/chroma/chroma.py +6 -2
vectordb_bench/backend/clients/elastic_cloud/config.py +19 -1
vectordb_bench/backend/clients/elastic_cloud/elastic_cloud.py +133 -45
vectordb_bench/backend/clients/lancedb/cli.py +62 -8
vectordb_bench/backend/clients/lancedb/config.py +14 -1
vectordb_bench/backend/clients/lancedb/lancedb.py +21 -9
vectordb_bench/backend/clients/memorydb/memorydb.py +2 -2
vectordb_bench/backend/clients/milvus/cli.py +30 -9
vectordb_bench/backend/clients/milvus/config.py +3 -0
vectordb_bench/backend/clients/milvus/milvus.py +81 -23
vectordb_bench/backend/clients/oceanbase/cli.py +100 -0
vectordb_bench/backend/clients/oceanbase/config.py +125 -0
vectordb_bench/backend/clients/oceanbase/oceanbase.py +215 -0
vectordb_bench/backend/clients/pinecone/pinecone.py +39 -25
vectordb_bench/backend/clients/qdrant_cloud/config.py +59 -3
vectordb_bench/backend/clients/qdrant_cloud/qdrant_cloud.py +100 -33
vectordb_bench/backend/clients/qdrant_local/cli.py +60 -0
vectordb_bench/backend/clients/qdrant_local/config.py +47 -0
vectordb_bench/backend/clients/qdrant_local/qdrant_local.py +232 -0
vectordb_bench/backend/clients/weaviate_cloud/cli.py +29 -3
vectordb_bench/backend/clients/weaviate_cloud/config.py +2 -0
vectordb_bench/backend/clients/weaviate_cloud/weaviate_cloud.py +5 -0
vectordb_bench/backend/dataset.py +143 -27
vectordb_bench/backend/filter.py +76 -0
vectordb_bench/backend/runner/__init__.py +3 -3
vectordb_bench/backend/runner/mp_runner.py +52 -39
vectordb_bench/backend/runner/rate_runner.py +68 -52
vectordb_bench/backend/runner/read_write_runner.py +125 -68
vectordb_bench/backend/runner/serial_runner.py +56 -23
vectordb_bench/backend/task_runner.py +48 -20
vectordb_bench/cli/batch_cli.py +121 -0
vectordb_bench/cli/cli.py +59 -1
vectordb_bench/cli/vectordbbench.py +7 -0
vectordb_bench/config-files/batch_sample_config.yml +17 -0
vectordb_bench/frontend/components/check_results/data.py +16 -11
vectordb_bench/frontend/components/check_results/filters.py +53 -25
vectordb_bench/frontend/components/check_results/headerIcon.py +16 -13
vectordb_bench/frontend/components/check_results/nav.py +20 -0
vectordb_bench/frontend/components/custom/displayCustomCase.py +43 -8
vectordb_bench/frontend/components/custom/displaypPrams.py +10 -5
vectordb_bench/frontend/components/custom/getCustomConfig.py +10 -0
vectordb_bench/frontend/components/label_filter/charts.py +60 -0
vectordb_bench/frontend/components/run_test/caseSelector.py +48 -52
vectordb_bench/frontend/components/run_test/dbSelector.py +9 -5
vectordb_bench/frontend/components/run_test/inputWidget.py +48 -0
vectordb_bench/frontend/components/run_test/submitTask.py +3 -1
vectordb_bench/frontend/components/streaming/charts.py +253 -0
vectordb_bench/frontend/components/streaming/data.py +62 -0
vectordb_bench/frontend/components/tables/data.py +1 -1
vectordb_bench/frontend/components/welcome/explainPrams.py +66 -0
vectordb_bench/frontend/components/welcome/pagestyle.py +106 -0
vectordb_bench/frontend/components/welcome/welcomePrams.py +147 -0
vectordb_bench/frontend/config/dbCaseConfigs.py +420 -41
vectordb_bench/frontend/config/styles.py +32 -2
vectordb_bench/frontend/pages/concurrent.py +5 -1
vectordb_bench/frontend/pages/custom.py +4 -0
vectordb_bench/frontend/pages/label_filter.py +56 -0
vectordb_bench/frontend/pages/quries_per_dollar.py +5 -1
vectordb_bench/frontend/pages/results.py +60 -0
vectordb_bench/frontend/pages/run_test.py +3 -3
vectordb_bench/frontend/pages/streaming.py +135 -0
vectordb_bench/frontend/pages/tables.py +4 -0
vectordb_bench/frontend/vdb_benchmark.py +16 -41
vectordb_bench/interface.py +6 -2
vectordb_bench/metric.py +15 -1
vectordb_bench/models.py +38 -11
vectordb_bench/results/ElasticCloud/result_20250318_standard_elasticcloud.json +5890 -0
vectordb_bench/results/Milvus/result_20250509_standard_milvus.json +6138 -0
vectordb_bench/results/OpenSearch/result_20250224_standard_opensearch.json +7319 -0
vectordb_bench/results/Pinecone/result_20250124_standard_pinecone.json +2365 -0
vectordb_bench/results/QdrantCloud/result_20250602_standard_qdrantcloud.json +3556 -0
vectordb_bench/results/ZillizCloud/result_20250613_standard_zillizcloud.json +6290 -0
vectordb_bench/results/dbPrices.json +12 -4
{vectordb_bench-0.0.29.dist-info → vectordb_bench-1.0.0.dist-info}/METADATA +131 -32
{vectordb_bench-0.0.29.dist-info → vectordb_bench-1.0.0.dist-info}/RECORD +87 -65
{vectordb_bench-0.0.29.dist-info → vectordb_bench-1.0.0.dist-info}/WHEEL +1 -1
vectordb_bench/results/ZillizCloud/result_20230727_standard_zillizcloud.json +0 -791
vectordb_bench/results/ZillizCloud/result_20230808_standard_zillizcloud.json +0 -679
vectordb_bench/results/ZillizCloud/result_20240105_standard_202401_zillizcloud.json +0 -1352
{vectordb_bench-0.0.29.dist-info → vectordb_bench-1.0.0.dist-info}/entry_points.txt +0 -0
{vectordb_bench-0.0.29.dist-info → vectordb_bench-1.0.0.dist-info}/licenses/LICENSE +0 -0
{vectordb_bench-0.0.29.dist-info → vectordb_bench-1.0.0.dist-info}/top_level.txt +0 -0

vectordb_bench/frontend/config/dbCaseConfigs.py CHANGED Viewed

@@ -4,6 +4,7 @@ from pydantic import BaseModel
 from vectordb_bench.backend.cases import CaseLabel, CaseType
 from vectordb_bench.backend.clients import DB
 from vectordb_bench.backend.clients.api import IndexType, MetricType, SQType
+from vectordb_bench.backend.dataset import DatasetWithSizeType
 from vectordb_bench.frontend.components.custom.getCustomConfig import get_custom_configs
 from vectordb_bench.models import CaseConfig, CaseConfigParamType
@@ -23,32 +24,58 @@ class BatchCaseConfig(BaseModel):
     cases: list[CaseConfig] = []
+class InputType(IntEnum):
+    Text = 20001
+    Number = 20002
+    Option = 20003
+    Float = 20004
+    Bool = 20005
+class ConfigInput(BaseModel):
+    label: CaseConfigParamType
+    inputType: InputType = InputType.Text
+    inputConfig: dict = {}
+    inputHelp: str = ""
+    displayLabel: str = ""
+class CaseConfigInput(ConfigInput):
+    # todo type should be a function
+    isDisplayed: typing.Any = lambda config: True
 class UICaseItem(BaseModel):
     isLine: bool = False
+    key: str = ""
     label: str = ""
     description: str = ""
     cases: list[CaseConfig] = []
     caseLabel: CaseLabel = CaseLabel.Performance
+    extra_custom_case_config_inputs: list[ConfigInput] = []
+    tmp_custom_config: dict = dict()
     def __init__(
         self,
         isLine: bool = False,
-        case_id: CaseType | None = None,
-        custom_case: dict | None = None,
-        cases: list[CaseConfig] | None = None,
+        cases: list[CaseConfig] = None,
         label: str = "",
         description: str = "",
         caseLabel: CaseLabel = CaseLabel.Performance,
+        **kwargs,
     ):
         if isLine is True:
-            super().__init__(isLine=True)
-        elif case_id is not None and isinstance(case_id, CaseType):
-            c = case_id.case_cls(custom_case)
+            super().__init__(isLine=True, **kwargs)
+        if cases is None:
+            cases = []
+        elif len(cases) == 1:
+            c = cases[0].case
             super().__init__(
-                label=c.name,
-                description=c.description,
-                cases=[CaseConfig(case_id=case_id, custom_case=custom_case)],
-                caseLabel=c.label,
+                label=label if label else c.name,
+                description=description if description else c.description,
+                cases=cases,
+                caseLabel=caseLabel,
+                **kwargs,
             )
         else:
             super().__init__(
@@ -56,10 +83,26 @@ class UICaseItem(BaseModel):
                 description=description,
                 cases=cases,
                 caseLabel=caseLabel,
+                **kwargs,
             )
     def __hash__(self) -> int:
-        return hash(self.json())
+        return hash(self.key if self.key else self.label)
+    def get_cases(self) -> list[CaseConfig]:
+        # return self.cases
+        if len(self.extra_custom_case_config_inputs) == 0:
+            return self.cases
+        cases = [
+            CaseConfig(
+                case_id=c.case_id,
+                k=c.k,
+                concurrency_search_config=c.concurrency_search_config,
+                custom_case={**c.custom_case, **self.tmp_custom_config},
+            )
+            for c in self.cases
+        ]
+        return cases
 class UICaseItemCluster(BaseModel):
@@ -70,47 +113,181 @@ class UICaseItemCluster(BaseModel):
 def get_custom_case_items() -> list[UICaseItem]:
     custom_configs = get_custom_configs()
     return [
-        UICaseItem(case_id=CaseType.PerformanceCustomDataset, custom_case=custom_config.dict())
+        UICaseItem(
+            label=f"{custom_config.dataset_config.name} - None Filter",
+            cases=[
+                CaseConfig(
+                    case_id=CaseType.PerformanceCustomDataset,
+                    custom_case={
+                        **custom_config.dict(),
+                        "use_filter": False,
+                    },
+                )
+            ],
+        )
+        for custom_config in custom_configs
+    ] + [
+        UICaseItem(
+            label=f"{custom_config.dataset_config.name} - Filter",
+            description=(
+                f'[Batch Cases] This case evaluate search performance under filtering constraints like "color==red."'
+                f"Vdbbench provides an additional column of randomly distributed labels with fixed proportions, "
+                f"such as [0.001, 0.002, 0.005, 0.01, 0.02, 0.05, 0.1, 0.2, 0.5]. "
+                f"Essentially, vdbbench will test each filter label in your own dataset to"
+                " assess the vector database's search performance across different filtering conditions."
+            ),
+            cases=[
+                CaseConfig(
+                    case_id=CaseType.PerformanceCustomDataset,
+                    custom_case={
+                        **custom_config.dict(),
+                        "use_filter": True,
+                        "label_percentage": label_percentage,
+                    },
+                )
+                for label_percentage in custom_config.dataset_config.label_percentages
+            ],
+        )
         for custom_config in custom_configs
+        if custom_config.dataset_config.label_percentages
     ]
+def generate_normal_cases(case_id: CaseType, custom_case: dict | None = None) -> list[CaseConfig]:
+    return [CaseConfig(case_id=case_id, custom_case=custom_case)]
 def get_custom_case_cluter() -> UICaseItemCluster:
     return UICaseItemCluster(label="Custom Search Performance Test", uiCaseItems=get_custom_case_items())
+def generate_custom_streaming_case() -> CaseConfig:
+    return CaseConfig(
+        case_id=CaseType.StreamingPerformanceCase,
+        custom_case=dict(),
+    )
+custom_streaming_config: list[ConfigInput] = [
+    ConfigInput(
+        label=CaseConfigParamType.dataset_with_size_type,
+        displayLabel="dataset",
+        inputType=InputType.Option,
+        inputConfig=dict(options=[dataset.value for dataset in DatasetWithSizeType]),
+    ),
+    ConfigInput(
+        label=CaseConfigParamType.insert_rate,
+        inputType=InputType.Number,
+        inputConfig=dict(step=100, min=100, max=4_000, value=200),
+        inputHelp="fixed insertion rate (rows/s), must be divisible by 100",
+    ),
+    ConfigInput(
+        label=CaseConfigParamType.search_stages,
+        inputType=InputType.Text,
+        inputConfig=dict(value="[0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]"),
+        inputHelp="0<=stage<1.0; do search test when inserting a specified amount of data.",
+    ),
+    ConfigInput(
+        label=CaseConfigParamType.concurrencies,
+        inputType=InputType.Text,
+        inputConfig=dict(value="[5, 10, 20]"),
+        inputHelp="concurrent num of search test while insertion; record max-qps.",
+    ),
+    ConfigInput(
+        label=CaseConfigParamType.optimize_after_write,
+        inputType=InputType.Option,
+        inputConfig=dict(options=[True, False]),
+        inputHelp="whether to optimize after inserting all data",
+    ),
+    ConfigInput(
+        label=CaseConfigParamType.read_dur_after_write,
+        inputType=InputType.Number,
+        inputConfig=dict(step=10, min=30, max=360_000, value=30),
+        inputHelp="search test duration after inserting all data",
+    ),
+]
+def generate_label_filter_cases(dataset_with_size_type: DatasetWithSizeType) -> list[CaseConfig]:
+    label_percentages = dataset_with_size_type.get_manager().data.scalar_label_percentages
+    return [
+        CaseConfig(
+            case_id=CaseType.LabelFilterPerformanceCase,
+            custom_case=dict(dataset_with_size_type=dataset_with_size_type, label_percentage=label_percentage),
+        )
+        for label_percentage in label_percentages
+    ]
 UI_CASE_CLUSTERS: list[UICaseItemCluster] = [
     UICaseItemCluster(
         label="Search Performance Test",
         uiCaseItems=[
-            UICaseItem(case_id=CaseType.Performance768D100M),
-            UICaseItem(case_id=CaseType.Performance768D10M),
-            UICaseItem(case_id=CaseType.Performance768D1M),
+            UICaseItem(cases=generate_normal_cases(CaseType.Performance768D100M)),
+            UICaseItem(cases=generate_normal_cases(CaseType.Performance768D10M)),
+            UICaseItem(cases=generate_normal_cases(CaseType.Performance768D1M)),
             UICaseItem(isLine=True),
-            UICaseItem(case_id=CaseType.Performance1536D5M),
-            UICaseItem(case_id=CaseType.Performance1536D500K),
-            UICaseItem(case_id=CaseType.Performance1536D50K),
+            UICaseItem(cases=generate_normal_cases(CaseType.Performance1024D1M)),
+            UICaseItem(cases=generate_normal_cases(CaseType.Performance1024D10M)),
+            UICaseItem(isLine=True),
+            UICaseItem(cases=generate_normal_cases(CaseType.Performance1536D5M)),
+            UICaseItem(cases=generate_normal_cases(CaseType.Performance1536D500K)),
+            UICaseItem(cases=generate_normal_cases(CaseType.Performance1536D50K)),
         ],
     ),
     UICaseItemCluster(
-        label="Filter Search Performance Test",
+        label="Int-Filter Search Performance Test",
         uiCaseItems=[
-            UICaseItem(case_id=CaseType.Performance768D10M1P),
-            UICaseItem(case_id=CaseType.Performance768D10M99P),
-            UICaseItem(case_id=CaseType.Performance768D1M1P),
-            UICaseItem(case_id=CaseType.Performance768D1M99P),
+            UICaseItem(cases=generate_normal_cases(CaseType.Performance768D10M1P)),
+            UICaseItem(cases=generate_normal_cases(CaseType.Performance768D10M99P)),
+            UICaseItem(cases=generate_normal_cases(CaseType.Performance768D1M1P)),
+            UICaseItem(cases=generate_normal_cases(CaseType.Performance768D1M99P)),
             UICaseItem(isLine=True),
-            UICaseItem(case_id=CaseType.Performance1536D5M1P),
-            UICaseItem(case_id=CaseType.Performance1536D5M99P),
-            UICaseItem(case_id=CaseType.Performance1536D500K1P),
-            UICaseItem(case_id=CaseType.Performance1536D500K99P),
+            UICaseItem(cases=generate_normal_cases(CaseType.Performance1536D5M1P)),
+            UICaseItem(cases=generate_normal_cases(CaseType.Performance1536D5M99P)),
+            UICaseItem(cases=generate_normal_cases(CaseType.Performance1536D500K1P)),
+            UICaseItem(cases=generate_normal_cases(CaseType.Performance1536D500K99P)),
+        ],
+    ),
+    UICaseItemCluster(
+        label="Label-Filter Search Performance Test",
+        uiCaseItems=[
+            UICaseItem(
+                label=f"Label-Filter Search Performance Test - {dataset_with_size_type.value}",
+                description=(
+                    f'[Batch Cases] These cases evaluate search performance under filtering constraints like "color==red." '
+                    "Vdbbench provides an additional column of randomly distributed labels with fixed proportions, "
+                    f"such as {dataset_with_size_type.get_manager().data.scalar_label_percentages}. "
+                    f"Essentially, vdbbench will test each filter label in {dataset_with_size_type.value} to "
+                    "assess the vector database's search performance across different filtering conditions. "
+                ),
+                cases=generate_label_filter_cases(dataset_with_size_type),
+            )
+            for dataset_with_size_type in DatasetWithSizeType
         ],
     ),
     UICaseItemCluster(
         label="Capacity Test",
         uiCaseItems=[
-            UICaseItem(case_id=CaseType.CapacityDim960),
-            UICaseItem(case_id=CaseType.CapacityDim128),
+            UICaseItem(cases=generate_normal_cases(CaseType.CapacityDim960)),
+            UICaseItem(cases=generate_normal_cases(CaseType.CapacityDim128)),
+        ],
+    ),
+    UICaseItemCluster(
+        label="Streaming Test",
+        uiCaseItems=[
+            UICaseItem(
+                label="Customize Streaming Test",
+                description=(
+                    "This case test the search performance during insertion. "
+                    "VDBB will send insert requests to VectorDB at a fixed rate and "
+                    "conduct a search test once the insert count reaches the search_stages. "
+                    "After all data is inserted, optimization and search tests can be "
+                    "optionally performed."
+                ),
+                cases=[generate_custom_streaming_case()],
+                extra_custom_case_config_inputs=custom_streaming_config,
+            )
         ],
     ),
 ]
@@ -123,14 +300,8 @@ DISPLAY_CASE_ORDER: list[CaseType] = [
     CaseType.Performance1536D5M,
     CaseType.Performance1536D500K,
     CaseType.Performance1536D50K,
-    CaseType.Performance768D10M1P,
-    CaseType.Performance768D1M1P,
-    CaseType.Performance1536D5M1P,
-    CaseType.Performance1536D500K1P,
-    CaseType.Performance768D10M99P,
-    CaseType.Performance768D1M99P,
-    CaseType.Performance1536D5M99P,
-    CaseType.Performance1536D500K99P,
+    CaseType.Performance1024D1M,
+    CaseType.Performance1024D10M,
     CaseType.CapacityDim960,
     CaseType.CapacityDim128,
 ]
@@ -146,6 +317,7 @@ class InputType(IntEnum):
     Option = 20003
     Float = 20004
     Bool = 20005
+    Select = 20006
 class CaseConfigInput(BaseModel):
@@ -454,7 +626,7 @@ CaseConfigParamInput_EFConstruction_ES = CaseConfigInput(
     inputConfig={
         "min": 8,
         "max": 512,
-        "value": 360,
+        "value": 128,
     },
 )
@@ -482,7 +654,7 @@ CaseConfigParamInput_EF_SEARCH_AWSOpensearch = CaseConfigInput(
     label=CaseConfigParamType.ef_search,
     inputType=InputType.Number,
     inputConfig={
-        "min": 100,
+        "min": 1,
         "max": 1024,
         "value": 256,
     },
@@ -556,6 +728,67 @@ CaseConfigParamInput_EFConstruction_PgVector = CaseConfigInput(
     isDisplayed=lambda config: config[CaseConfigParamType.IndexType] == IndexType.HNSW.value,
 )
+CaseConfigParamInput_IndexType_ES = CaseConfigInput(
+    label=CaseConfigParamType.IndexType,
+    inputType=InputType.Option,
+    inputConfig={
+        "options": [
+            IndexType.ES_HNSW.value,
+            IndexType.ES_HNSW_INT8.value,
+            IndexType.ES_HNSW_INT4.value,
+            IndexType.ES_HNSW_BBQ.value,
+        ],
+    },
+)
+CaseConfigParamInput_NumShards_ES = CaseConfigInput(
+    label=CaseConfigParamType.number_of_shards,
+    inputType=InputType.Number,
+    inputConfig={
+        "min": 1,
+        "max": 128,
+        "value": 1,
+    },
+)
+CaseConfigParamInput_NumReplica_ES = CaseConfigInput(
+    label=CaseConfigParamType.number_of_replicas,
+    inputType=InputType.Number,
+    inputConfig={
+        "min": 0,
+        "max": 10,
+        "value": 0,
+    },
+)
+CaseConfigParamInput_RefreshInterval_ES = CaseConfigInput(
+    label=CaseConfigParamType.refresh_interval,
+    inputType=InputType.Text,
+    inputConfig={"value": "30s"},
+)
+CaseConfigParamInput_UseRescore_ES = CaseConfigInput(
+    label=CaseConfigParamType.use_rescore,
+    inputType=InputType.Bool,
+    inputConfig={"value": False},
+    isDisplayed=lambda config: config.get(CaseConfigParamType.IndexType, None) != IndexType.ES_HNSW.value,
+)
+CaseConfigParamInput_OversampleRatio_ES = CaseConfigInput(
+    label=CaseConfigParamType.oversample_ratio,
+    inputType=InputType.Float,
+    inputConfig={"min": 1.0, "max": 100.0, "value": 2.0},
+    isDisplayed=lambda config: config.get(CaseConfigParamType.use_rescore, False),
+    inputHelp="num_oversample = oversample_ratio * top_k.",
+)
+CaseConfigParamInput_UseRouting_ES = CaseConfigInput(
+    label=CaseConfigParamType.use_routing,
+    inputType=InputType.Bool,
+    inputConfig={"value": False},
+    inputHelp="Using routing to improve label-filter case performance",
+)
 CaseConfigParamInput_M_ES = CaseConfigInput(
     label=CaseConfigParamType.M,
@@ -563,10 +796,11 @@ CaseConfigParamInput_M_ES = CaseConfigInput(
     inputConfig={
         "min": 4,
         "max": 64,
-        "value": 30,
+        "value": 16,
     },
 )
 CaseConfigParamInput_NumCandidates_ES = CaseConfigInput(
     label=CaseConfigParamType.numCandidates,
     inputType=InputType.Number,
@@ -1203,6 +1437,13 @@ CaseConfigParamInput_CacheSize_MariaDB = CaseConfigInput(
     },
     isDisplayed=lambda config: config.get(CaseConfigParamType.IndexType, None) == IndexType.HNSW.value,
 )
+CaseConfigParamInput_Milvus_use_partition_key = CaseConfigInput(
+    label=CaseConfigParamType.use_partition_key,
+    inputType=InputType.Option,
+    inputHelp="whether to use partition_key for label-filter cases. only works in label-filter cases",
+    inputConfig={"options": [True, False]},
+)
 CaseConfigParamInput_MongoDBQuantizationType = CaseConfigInput(
     label=CaseConfigParamType.mongodb_quantization_type,
@@ -1264,6 +1505,87 @@ CaseConfigParamInput_EFConstruction_Vespa = CaseConfigInput(
     isDisplayed=lambda config: config[CaseConfigParamType.IndexType] == IndexType.HNSW.value,
 )
+CaseConfigParamInput_INDEX_THREAD_QTY_DURING_FORCE_MERGE_AWSOpensearch = CaseConfigInput(
+    label=CaseConfigParamType.index_thread_qty_during_force_merge,
+    displayLabel="Index Thread Qty During Force Merge",
+    inputHelp="Thread count during force merge operations",
+    inputType=InputType.Number,
+    inputConfig={
+        "min": 1,
+        "max": 32,
+        "value": 4,
+    },
+)
+CaseConfigParamInput_NUMBER_OF_INDEXING_CLIENTS_AWSOpensearch = CaseConfigInput(
+    label=CaseConfigParamType.number_of_indexing_clients,
+    displayLabel="Number of Indexing Clients",
+    inputHelp="Number of concurrent clients for data insertion",
+    inputType=InputType.Number,
+    inputConfig={
+        "min": 1,
+        "max": 32,
+        "value": 1,
+    },
+)
+CaseConfigParamInput_NUMBER_OF_SHARDS_AWSOpensearch = CaseConfigInput(
+    label=CaseConfigParamType.number_of_shards,
+    displayLabel="Number of Shards",
+    inputHelp="Number of primary shards for the index",
+    inputType=InputType.Number,
+    inputConfig={
+        "min": 1,
+        "max": 32,
+        "value": 1,
+    },
+)
+CaseConfigParamInput_NUMBER_OF_REPLICAS_AWSOpensearch = CaseConfigInput(
+    label=CaseConfigParamType.number_of_replicas,
+    displayLabel="Number of Replicas",
+    inputHelp="Number of replica copies for each primary shard",
+    inputType=InputType.Number,
+    inputConfig={
+        "min": 0,
+        "max": 10,
+        "value": 1,
+    },
+)
+CaseConfigParamInput_INDEX_THREAD_QTY_AWSOpensearch = CaseConfigInput(
+    label=CaseConfigParamType.index_thread_qty,
+    displayLabel="Index Thread Qty",
+    inputHelp="Thread count for native engine indexing",
+    inputType=InputType.Number,
+    inputConfig={
+        "min": 1,
+        "max": 32,
+        "value": 4,
+    },
+)
+CaseConfigParamInput_ENGINE_NAME_AWSOpensearch = CaseConfigInput(
+    label=CaseConfigParamType.engine_name,
+    displayLabel="Engine",
+    inputHelp="HNSW algorithm implementation to use",
+    inputType=InputType.Option,
+    inputConfig={
+        "options": ["faiss", "nmslib", "lucene"],
+        "default": "faiss",
+    },
+)
+CaseConfigParamInput_METRIC_TYPE_NAME_AWSOpensearch = CaseConfigInput(
+    label=CaseConfigParamType.metric_type_name,
+    displayLabel="Metric Type",
+    inputHelp="Distance metric type for vector similarity",
+    inputType=InputType.Option,
+    inputConfig={
+        "options": ["l2", "cosine", "ip"],
+        "default": "l2",
+    },
+)
 MilvusLoadConfig = [
     CaseConfigParamInput_IndexType,
@@ -1280,6 +1602,7 @@ MilvusLoadConfig = [
     CaseConfigParamInput_Refine,
     CaseConfigParamInput_RefineType,
     CaseConfigParamInput_NRQ,
+    CaseConfigParamInput_Milvus_use_partition_key,
 ]
 MilvusPerformanceConfig = [
     CaseConfigParamInput_IndexType,
@@ -1307,6 +1630,7 @@ MilvusPerformanceConfig = [
     CaseConfigParamInput_Refine,
     CaseConfigParamInput_RefineType,
     CaseConfigParamInput_RefineK,
+    CaseConfigParamInput_Milvus_use_partition_key,
 ]
 WeaviateLoadConfig = [
@@ -1319,11 +1643,25 @@ WeaviatePerformanceConfig = [
     CaseConfigParamInput_EF_Weaviate,
 ]
-ESLoadingConfig = [CaseConfigParamInput_EFConstruction_ES, CaseConfigParamInput_M_ES]
+ESLoadingConfig = [
+    CaseConfigParamInput_IndexType_ES,
+    CaseConfigParamInput_NumShards_ES,
+    CaseConfigParamInput_NumReplica_ES,
+    CaseConfigParamInput_RefreshInterval_ES,
+    CaseConfigParamInput_EFConstruction_ES,
+    CaseConfigParamInput_M_ES,
+]
 ESPerformanceConfig = [
+    CaseConfigParamInput_IndexType_ES,
+    CaseConfigParamInput_NumShards_ES,
+    CaseConfigParamInput_NumReplica_ES,
+    CaseConfigParamInput_RefreshInterval_ES,
     CaseConfigParamInput_EFConstruction_ES,
     CaseConfigParamInput_M_ES,
     CaseConfigParamInput_NumCandidates_ES,
+    CaseConfigParamInput_UseRescore_ES,
+    CaseConfigParamInput_OversampleRatio_ES,
+    CaseConfigParamInput_UseRouting_ES,
 ]
 AWSOpensearchLoadingConfig = [
@@ -1612,10 +1950,37 @@ LanceDBLoadConfig = [
 LanceDBPerformanceConfig = LanceDBLoadConfig
+AWSOpensearchLoadingConfig = [
+    CaseConfigParamInput_EFConstruction_AWSOpensearch,
+    CaseConfigParamInput_M_AWSOpensearch,
+    CaseConfigParamInput_ENGINE_NAME_AWSOpensearch,
+    CaseConfigParamInput_METRIC_TYPE_NAME_AWSOpensearch,
+    CaseConfigParamInput_INDEX_THREAD_QTY_DURING_FORCE_MERGE_AWSOpensearch,
+    CaseConfigParamInput_NUMBER_OF_INDEXING_CLIENTS_AWSOpensearch,
+    CaseConfigParamInput_NUMBER_OF_SHARDS_AWSOpensearch,
+    CaseConfigParamInput_NUMBER_OF_REPLICAS_AWSOpensearch,
+    CaseConfigParamInput_INDEX_THREAD_QTY_AWSOpensearch,
+]
+AWSOpenSearchPerformanceConfig = [
+    CaseConfigParamInput_EFConstruction_AWSOpensearch,
+    CaseConfigParamInput_M_AWSOpensearch,
+    CaseConfigParamInput_EF_SEARCH_AWSOpensearch,
+    CaseConfigParamInput_ENGINE_NAME_AWSOpensearch,
+    CaseConfigParamInput_METRIC_TYPE_NAME_AWSOpensearch,
+    CaseConfigParamInput_INDEX_THREAD_QTY_DURING_FORCE_MERGE_AWSOpensearch,
+    CaseConfigParamInput_NUMBER_OF_INDEXING_CLIENTS_AWSOpensearch,
+    CaseConfigParamInput_NUMBER_OF_SHARDS_AWSOpensearch,
+    CaseConfigParamInput_NUMBER_OF_REPLICAS_AWSOpensearch,
+    CaseConfigParamInput_INDEX_THREAD_QTY_AWSOpensearch,
+]
+# Map DB to config
 CASE_CONFIG_MAP = {
     DB.Milvus: {
         CaseLabel.Load: MilvusLoadConfig,
         CaseLabel.Performance: MilvusPerformanceConfig,
+        CaseLabel.Streaming: MilvusPerformanceConfig,
     },
     DB.ZillizCloud: {
         CaseLabel.Performance: ZillizCloudPerformanceConfig,
@@ -1676,4 +2041,18 @@ CASE_CONFIG_MAP = {
         CaseLabel.Load: LanceDBLoadConfig,
         CaseLabel.Performance: LanceDBPerformanceConfig,
     },
+    DB.AWSOpenSearch: {
+        CaseLabel.Load: AWSOpensearchLoadingConfig,
+        CaseLabel.Performance: AWSOpenSearchPerformanceConfig,
+    },
 }
+def get_case_config_inputs(db: DB, case_label: CaseLabel) -> list[CaseConfigInput]:
+    if db not in CASE_CONFIG_MAP:
+        return []
+    if case_label == CaseLabel.Load:
+        return CASE_CONFIG_MAP[db][CaseLabel.Load]
+    elif case_label == CaseLabel.Performance or case_label == CaseLabel.Streaming:
+        return CASE_CONFIG_MAP[db][CaseLabel.Performance]
+    return []

vectordb-bench 0.0.29__py3-none-any.whl → 1.0.0__py3-none-any.whl

vectordb-bench 0.0.29py3-none-any.whl → 1.0.0py3-none-any.whl