PyPI - vectordb-bench - Versions diffs - 0.0.29__py3-none-any.whl → 1.0.0__py3-none-any.whl - Mend

vectordb-bench 0.0.29py3-none-any.whl → 1.0.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (90) hide show

vectordb_bench/__init__.py +14 -27
vectordb_bench/backend/assembler.py +19 -6
vectordb_bench/backend/cases.py +186 -23
vectordb_bench/backend/clients/__init__.py +32 -0
vectordb_bench/backend/clients/api.py +22 -1
vectordb_bench/backend/clients/aws_opensearch/aws_opensearch.py +249 -43
vectordb_bench/backend/clients/aws_opensearch/cli.py +51 -21
vectordb_bench/backend/clients/aws_opensearch/config.py +58 -16
vectordb_bench/backend/clients/chroma/chroma.py +6 -2
vectordb_bench/backend/clients/elastic_cloud/config.py +19 -1
vectordb_bench/backend/clients/elastic_cloud/elastic_cloud.py +133 -45
vectordb_bench/backend/clients/lancedb/cli.py +62 -8
vectordb_bench/backend/clients/lancedb/config.py +14 -1
vectordb_bench/backend/clients/lancedb/lancedb.py +21 -9
vectordb_bench/backend/clients/memorydb/memorydb.py +2 -2
vectordb_bench/backend/clients/milvus/cli.py +30 -9
vectordb_bench/backend/clients/milvus/config.py +3 -0
vectordb_bench/backend/clients/milvus/milvus.py +81 -23
vectordb_bench/backend/clients/oceanbase/cli.py +100 -0
vectordb_bench/backend/clients/oceanbase/config.py +125 -0
vectordb_bench/backend/clients/oceanbase/oceanbase.py +215 -0
vectordb_bench/backend/clients/pinecone/pinecone.py +39 -25
vectordb_bench/backend/clients/qdrant_cloud/config.py +59 -3
vectordb_bench/backend/clients/qdrant_cloud/qdrant_cloud.py +100 -33
vectordb_bench/backend/clients/qdrant_local/cli.py +60 -0
vectordb_bench/backend/clients/qdrant_local/config.py +47 -0
vectordb_bench/backend/clients/qdrant_local/qdrant_local.py +232 -0
vectordb_bench/backend/clients/weaviate_cloud/cli.py +29 -3
vectordb_bench/backend/clients/weaviate_cloud/config.py +2 -0
vectordb_bench/backend/clients/weaviate_cloud/weaviate_cloud.py +5 -0
vectordb_bench/backend/dataset.py +143 -27
vectordb_bench/backend/filter.py +76 -0
vectordb_bench/backend/runner/__init__.py +3 -3
vectordb_bench/backend/runner/mp_runner.py +52 -39
vectordb_bench/backend/runner/rate_runner.py +68 -52
vectordb_bench/backend/runner/read_write_runner.py +125 -68
vectordb_bench/backend/runner/serial_runner.py +56 -23
vectordb_bench/backend/task_runner.py +48 -20
vectordb_bench/cli/batch_cli.py +121 -0
vectordb_bench/cli/cli.py +59 -1
vectordb_bench/cli/vectordbbench.py +7 -0
vectordb_bench/config-files/batch_sample_config.yml +17 -0
vectordb_bench/frontend/components/check_results/data.py +16 -11
vectordb_bench/frontend/components/check_results/filters.py +53 -25
vectordb_bench/frontend/components/check_results/headerIcon.py +16 -13
vectordb_bench/frontend/components/check_results/nav.py +20 -0
vectordb_bench/frontend/components/custom/displayCustomCase.py +43 -8
vectordb_bench/frontend/components/custom/displaypPrams.py +10 -5
vectordb_bench/frontend/components/custom/getCustomConfig.py +10 -0
vectordb_bench/frontend/components/label_filter/charts.py +60 -0
vectordb_bench/frontend/components/run_test/caseSelector.py +48 -52
vectordb_bench/frontend/components/run_test/dbSelector.py +9 -5
vectordb_bench/frontend/components/run_test/inputWidget.py +48 -0
vectordb_bench/frontend/components/run_test/submitTask.py +3 -1
vectordb_bench/frontend/components/streaming/charts.py +253 -0
vectordb_bench/frontend/components/streaming/data.py +62 -0
vectordb_bench/frontend/components/tables/data.py +1 -1
vectordb_bench/frontend/components/welcome/explainPrams.py +66 -0
vectordb_bench/frontend/components/welcome/pagestyle.py +106 -0
vectordb_bench/frontend/components/welcome/welcomePrams.py +147 -0
vectordb_bench/frontend/config/dbCaseConfigs.py +420 -41
vectordb_bench/frontend/config/styles.py +32 -2
vectordb_bench/frontend/pages/concurrent.py +5 -1
vectordb_bench/frontend/pages/custom.py +4 -0
vectordb_bench/frontend/pages/label_filter.py +56 -0
vectordb_bench/frontend/pages/quries_per_dollar.py +5 -1
vectordb_bench/frontend/pages/results.py +60 -0
vectordb_bench/frontend/pages/run_test.py +3 -3
vectordb_bench/frontend/pages/streaming.py +135 -0
vectordb_bench/frontend/pages/tables.py +4 -0
vectordb_bench/frontend/vdb_benchmark.py +16 -41
vectordb_bench/interface.py +6 -2
vectordb_bench/metric.py +15 -1
vectordb_bench/models.py +38 -11
vectordb_bench/results/ElasticCloud/result_20250318_standard_elasticcloud.json +5890 -0
vectordb_bench/results/Milvus/result_20250509_standard_milvus.json +6138 -0
vectordb_bench/results/OpenSearch/result_20250224_standard_opensearch.json +7319 -0
vectordb_bench/results/Pinecone/result_20250124_standard_pinecone.json +2365 -0
vectordb_bench/results/QdrantCloud/result_20250602_standard_qdrantcloud.json +3556 -0
vectordb_bench/results/ZillizCloud/result_20250613_standard_zillizcloud.json +6290 -0
vectordb_bench/results/dbPrices.json +12 -4
{vectordb_bench-0.0.29.dist-info → vectordb_bench-1.0.0.dist-info}/METADATA +131 -32
{vectordb_bench-0.0.29.dist-info → vectordb_bench-1.0.0.dist-info}/RECORD +87 -65
{vectordb_bench-0.0.29.dist-info → vectordb_bench-1.0.0.dist-info}/WHEEL +1 -1
vectordb_bench/results/ZillizCloud/result_20230727_standard_zillizcloud.json +0 -791
vectordb_bench/results/ZillizCloud/result_20230808_standard_zillizcloud.json +0 -679
vectordb_bench/results/ZillizCloud/result_20240105_standard_202401_zillizcloud.json +0 -1352
{vectordb_bench-0.0.29.dist-info → vectordb_bench-1.0.0.dist-info}/entry_points.txt +0 -0
{vectordb_bench-0.0.29.dist-info → vectordb_bench-1.0.0.dist-info}/licenses/LICENSE +0 -0
{vectordb_bench-0.0.29.dist-info → vectordb_bench-1.0.0.dist-info}/top_level.txt +0 -0

vectordb_bench/frontend/components/check_results/nav.py CHANGED Viewed

@@ -20,3 +20,23 @@ def NavToResults(st, key="nav-to-results"):
     navClick = st.button("< &nbsp;&nbsp;Back to Results", key=key)
     if navClick:
         switch_page("vdb benchmark")
+def NavToPages(st):
+    options = [
+        {"name": "Run Test", "link": "run_test"},
+        {"name": "Results", "link": "results"},
+        {"name": "Quries Per Dollar", "link": "quries_per_dollar"},
+        {"name": "Concurrent", "link": "concurrent"},
+        {"name": "Label Filter", "link": "label_filter"},
+        {"name": "Streaming", "link": "streaming"},
+        {"name": "Tables", "link": "tables"},
+        {"name": "Custom Dataset", "link": "custom"},
+    ]
+    html = ""
+    for i, option in enumerate(options):
+        html += f'<a href="/{option["link"]}" target="_self" style="text-decoration: none; padding: 0.1px 0.2px;">{option["name"]}</a>'
+        if i < len(options) - 1:
+            html += '<span style="color: #888; margin: 0 5px;">|</span>'
+    st.markdown(html, unsafe_allow_html=True)

vectordb_bench/frontend/components/custom/displayCustomCase.py CHANGED Viewed

@@ -12,7 +12,7 @@ def displayCustomCase(customCase: CustomCaseConfig, st, key):
         "Folder Path", key=f"{key}_dir", value=customCase.dataset_config.dir
     )
-    columns = st.columns(4)
+    columns = st.columns(3)
     customCase.dataset_config.dim = columns[0].number_input(
         "dim", key=f"{key}_dim", value=customCase.dataset_config.dim
     )
@@ -22,16 +22,51 @@ def displayCustomCase(customCase: CustomCaseConfig, st, key):
     customCase.dataset_config.metric_type = columns[2].selectbox(
         "metric type", key=f"{key}_metric_type", options=["L2", "Cosine", "IP"]
     )
-    customCase.dataset_config.file_count = columns[3].number_input(
-        "train file count", key=f"{key}_file_count", value=customCase.dataset_config.file_count
+    columns = st.columns(3)
+    customCase.dataset_config.train_name = columns[0].text_input(
+        "train file name",
+        key=f"{key}_train_name",
+        value=customCase.dataset_config.train_name,
+    )
+    customCase.dataset_config.test_name = columns[1].text_input(
+        "test file name", key=f"{key}_test_name", value=customCase.dataset_config.test_name
+    )
+    customCase.dataset_config.gt_name = columns[2].text_input(
+        "ground truth file name", key=f"{key}_gt_name", value=customCase.dataset_config.gt_name
+    )
+    columns = st.columns([1, 1, 2, 2])
+    customCase.dataset_config.train_id_name = columns[0].text_input(
+        "train id name", key=f"{key}_train_id_name", value=customCase.dataset_config.train_id_name
+    )
+    customCase.dataset_config.train_col_name = columns[1].text_input(
+        "train emb name", key=f"{key}_train_col_name", value=customCase.dataset_config.train_col_name
+    )
+    customCase.dataset_config.test_col_name = columns[2].text_input(
+        "test emb name", key=f"{key}_test_col_name", value=customCase.dataset_config.test_col_name
+    )
+    customCase.dataset_config.gt_col_name = columns[3].text_input(
+        "ground truth emb name", key=f"{key}_gt_col_name", value=customCase.dataset_config.gt_col_name
     )
-    columns = st.columns(4)
-    customCase.dataset_config.use_shuffled = columns[0].checkbox(
-        "use shuffled data", key=f"{key}_use_shuffled", value=customCase.dataset_config.use_shuffled
+    columns = st.columns(2)
+    customCase.dataset_config.scalar_labels_name = columns[0].text_input(
+        "scalar labels file name",
+        key=f"{key}_scalar_labels_file_name",
+        value=customCase.dataset_config.scalar_labels_name,
     )
-    customCase.dataset_config.with_gt = columns[1].checkbox(
-        "with groundtruth", key=f"{key}_with_gt", value=customCase.dataset_config.with_gt
+    default_label_percentages = ",".join(map(str, customCase.dataset_config.with_label_percentages))
+    label_percentage_input = columns[1].text_input(
+        "label percentages",
+        key=f"{key}_label_percantages",
+        value=default_label_percentages,
     )
+    try:
+        customCase.dataset_config.label_percentages = [
+            float(item.strip()) for item in label_percentage_input.split(",") if item.strip()
+        ]
+    except ValueError as e:
+        st.write(f"<span style='color:red'>{e},please input correct number</span>", unsafe_allow_html=True)
     customCase.description = st.text_area("description", key=f"{key}_description", value=customCase.description)

vectordb_bench/frontend/components/custom/displaypPrams.py CHANGED Viewed

@@ -2,13 +2,18 @@ def displayParams(st):
     st.markdown(
         """
 - `Folder Path` - The path to the folder containing all the files. Please ensure that all files in the folder are in the `Parquet` format.
-  - Vectors data files: The file must be named `train.parquet` and should have two columns: `id` as an incrementing `int` and `emb` as an array of `float32`.
-  - Query test vectors: The file must be named `test.parquet` and should have two columns: `id` as an incrementing `int` and `emb` as an array of `float32`.
-  - Ground truth file: The file must be named `neighbors.parquet` and should have two columns: `id` corresponding to query vectors and `neighbors_id` as an array of `int`.
+  - Vectors data files: The file should have two kinds of columns: `id` as an incrementing `int` and `emb` as an array of `float32`. The name of two columns could be defined on your own.
+  - Query test vectors: The file could be named on your own and should have two kinds of columns: `id` as an incrementing `int` and `emb` as an array of `float32`. The `id` column must be named as `id`, and `emb` column could be defined on your own.
+  - Ground truth file: The file could be named on your own and should have two kinds of columns: `id` corresponding to query vectors and `neighbors_id` as an array of `int`. The `id` column must be named as `id`, and `neighbors_id` column could be defined on your own.
-- `Train File Count` - If the vector file is too large, you can consider splitting it into multiple files. The naming format for the split files should be `train-[index]-of-[file_count].parquet`. For example, `train-01-of-10.parquet` represents the second file (0-indexed) among 10 split files.
+- `Train File Name` - If the number of train file is `more than one`, please input all your train file name and `split with ','` without the `.parquet` file extensionthe. For example, if there are two train file and the name of them are `train1.parquet` and `train2.parquet`, then input `train1,train2`.
+- `Ground Truth Emb Name` - No matter whether filter file is applied or not, the `neighbors_id` column in ground truth file must have the same name.
+- `Scalar Labels File Name ` - If there is a scalar labels file, please input the filename without the .parquet extension. The file should have two columns: `id` as an incrementing `int` and `labels` as an array of `string`. The `id` column must correspond one-to-one with the `id` column in train file..
+- `Label percentages` - If you have filter file, please input label percentage you want to real run and `split with ','` when it's `more than one`. If you `don't have` filter file, than `keep the text vacant.`
-- `Use Shuffled Data` - If you check this option, the vector data files need to be modified. VectorDBBench will load the data labeled with `shuffle`. For example, use `shuffle_train.parquet` instead of `train.parquet` and `shuffle_train-04-of-10.parquet` instead of `train-04-of-10.parquet`. The `id` column in the shuffled data can be in any order.
 """
     )
     st.caption(

vectordb_bench/frontend/components/custom/getCustomConfig.py CHANGED Viewed

@@ -14,6 +14,16 @@ class CustomDatasetConfig(BaseModel):
     file_count: int = 1
     use_shuffled: bool = False
     with_gt: bool = True
+    train_name: str = "train"
+    test_name: str = "test"
+    gt_name: str = "neighbors"
+    train_id_name: str = "id"
+    train_col_name: str = "emb"
+    test_col_name: str = "emb"
+    gt_col_name: str = "neighbors_id"
+    scalar_labels_name: str = "scalar_labels"
+    label_percentages: list[str] = []
+    with_label_percentages: list[float] = [0.001, 0.02, 0.5]
 class CustomCaseConfig(BaseModel):

vectordb_bench/frontend/components/label_filter/charts.py ADDED Viewed

@@ -0,0 +1,60 @@
+import plotly.express as px
+from vectordb_bench.metric import metric_unit_map
+def drawCharts(st, allData, **kwargs):
+    dataset_names = list(set([data["dataset_name"] for data in allData]))
+    dataset_names.sort()
+    for dataset_name in dataset_names:
+        container = st.container()
+        container.subheader(dataset_name)
+        data = [d for d in allData if d["dataset_name"] == dataset_name]
+        drawChartByMetric(container, data, **kwargs)
+def drawChartByMetric(st, data, metrics=("qps", "recall"), **kwargs):
+    columns = st.columns(len(metrics))
+    for i, metric in enumerate(metrics):
+        container = columns[i]
+        container.markdown(f"#### {metric}")
+        drawChart(container, data, metric)
+def getRange(metric, data, padding_multipliers):
+    minV = min([d.get(metric, 0) for d in data])
+    maxV = max([d.get(metric, 0) for d in data])
+    padding = maxV - minV
+    rangeV = [
+        minV - padding * padding_multipliers[0],
+        maxV + padding * padding_multipliers[1],
+    ]
+    return rangeV
+def drawChart(st, data: list[object], metric):
+    unit = metric_unit_map.get(metric, "")
+    x = "filter_rate"
+    xrange = getRange(x, data, [0.05, 0.1])
+    y = metric
+    yrange = getRange(y, data, [0.2, 0.1])
+    data.sort(key=lambda a: a[x])
+    fig = px.line(
+        data,
+        x=x,
+        y=y,
+        color="db_name",
+        line_group="db_name",
+        text=metric,
+        markers=True,
+    )
+    fig.update_xaxes(range=xrange)
+    fig.update_yaxes(range=yrange)
+    fig.update_traces(textposition="bottom right", texttemplate="%{y:,.4~r}" + unit)
+    fig.update_layout(
+        margin=dict(l=0, r=0, t=40, b=0, pad=8),
+        legend=dict(orientation="h", yanchor="bottom", y=1, xanchor="right", x=1, title=""),
+    )
+    st.plotly_chart(fig, use_container_width=True)

vectordb_bench/frontend/components/run_test/caseSelector.py CHANGED Viewed

@@ -1,8 +1,21 @@
-from vectordb_bench.frontend.config.styles import *
-from vectordb_bench.frontend.config.dbCaseConfigs import *
+from vectordb_bench.backend.clients import DB
+from vectordb_bench.frontend.components.run_test.inputWidget import inputWidget
 from collections import defaultdict
+from vectordb_bench.frontend.config.dbCaseConfigs import (
+    UI_CASE_CLUSTERS,
+    UICaseItem,
+    UICaseItemCluster,
+    get_case_config_inputs,
+    get_custom_case_cluter,
+)
+from vectordb_bench.frontend.config.styles import (
+    CASE_CONFIG_SETTING_COLUMNS,
+    CHECKBOX_INDENT,
+    DB_CASE_CONFIG_SETTING_COLUMNS,
+)
 from vectordb_bench.frontend.utils import addHorizontalLine
+from vectordb_bench.models import CaseConfig
 def caseSelector(st, activedDbList: list[DB]):
@@ -24,7 +37,7 @@ def caseSelector(st, activedDbList: list[DB]):
         activedCaseList += caseClusterExpander(st, caseCluster, dbToCaseClusterConfigs, activedDbList)
     for db in dbToCaseClusterConfigs:
         for uiCaseItem in dbToCaseClusterConfigs[db]:
-            for case in uiCaseItem.cases:
+            for case in uiCaseItem.get_cases():
                 dbToCaseConfigs[db][case] = dbToCaseClusterConfigs[db][uiCaseItem]
     return activedCaseList, dbToCaseConfigs
@@ -48,15 +61,38 @@ def caseItemCheckbox(st, dbToCaseClusterConfigs, uiCaseItem: UICaseItem, actived
         unsafe_allow_html=True,
     )
+    caseConfigSetting(st.container(), uiCaseItem)
     if selected:
-        caseConfigSetting(st.container(), dbToCaseClusterConfigs, uiCaseItem, activedDbList)
+        dbCaseConfigSetting(st.container(), dbToCaseClusterConfigs, uiCaseItem, activedDbList)
+    return uiCaseItem.get_cases() if selected else []
+def caseConfigSetting(st, uiCaseItem: UICaseItem):
+    config_inputs = uiCaseItem.extra_custom_case_config_inputs
+    if len(config_inputs) == 0:
+        return
-    return uiCaseItem.cases if selected else []
+    columns = st.columns(
+        [
+            1,
+            *[DB_CASE_CONFIG_SETTING_COLUMNS / CASE_CONFIG_SETTING_COLUMNS] * CASE_CONFIG_SETTING_COLUMNS,
+        ]
+    )
+    columns[0].markdown(
+        f"<div style='margin: 0 0 24px {CHECKBOX_INDENT}px; font-size: 18px; font-weight: 600;'>Custom Config</div>",
+        unsafe_allow_html=True,
+    )
+    for i, config_input in enumerate(config_inputs):
+        column = columns[1 + i % CASE_CONFIG_SETTING_COLUMNS]
+        key = f"custom-config-{uiCaseItem.label}-{config_input.label.value}"
+        uiCaseItem.tmp_custom_config[config_input.label.value] = inputWidget(column, config=config_input, key=key)
-def caseConfigSetting(st, dbToCaseClusterConfigs, uiCaseItem: UICaseItem, activedDbList: list[DB]):
+def dbCaseConfigSetting(st, dbToCaseClusterConfigs, uiCaseItem: UICaseItem, activedDbList: list[DB]):
     for db in activedDbList:
-        columns = st.columns(1 + CASE_CONFIG_SETTING_COLUMNS)
+        columns = st.columns(1 + DB_CASE_CONFIG_SETTING_COLUMNS)
         # column 0 - title
         dbColumn = columns[0]
         dbColumn.markdown(
@@ -64,52 +100,12 @@ def caseConfigSetting(st, dbToCaseClusterConfigs, uiCaseItem: UICaseItem, active
             unsafe_allow_html=True,
         )
         k = 0
-        caseConfig = dbToCaseClusterConfigs[db][uiCaseItem]
-        for config in CASE_CONFIG_MAP.get(db, {}).get(uiCaseItem.caseLabel, []):
-            if config.isDisplayed(caseConfig):
-                column = columns[1 + k % CASE_CONFIG_SETTING_COLUMNS]
+        dbCaseConfig = dbToCaseClusterConfigs[db][uiCaseItem]
+        for config in get_case_config_inputs(db, uiCaseItem.caseLabel):
+            if config.isDisplayed(dbCaseConfig):
+                column = columns[1 + k % DB_CASE_CONFIG_SETTING_COLUMNS]
                 key = "%s-%s-%s" % (db, uiCaseItem.label, config.label.value)
-                if config.inputType == InputType.Text:
-                    caseConfig[config.label] = column.text_input(
-                        config.displayLabel if config.displayLabel else config.label.value,
-                        key=key,
-                        help=config.inputHelp,
-                        value=config.inputConfig["value"],
-                    )
-                elif config.inputType == InputType.Option:
-                    caseConfig[config.label] = column.selectbox(
-                        config.displayLabel if config.displayLabel else config.label.value,
-                        config.inputConfig["options"],
-                        key=key,
-                        help=config.inputHelp,
-                    )
-                elif config.inputType == InputType.Number:
-                    caseConfig[config.label] = column.number_input(
-                        config.displayLabel if config.displayLabel else config.label.value,
-                        # format="%d",
-                        step=config.inputConfig.get("step", 1),
-                        min_value=config.inputConfig["min"],
-                        max_value=config.inputConfig["max"],
-                        key=key,
-                        value=config.inputConfig["value"],
-                        help=config.inputHelp,
-                    )
-                elif config.inputType == InputType.Float:
-                    caseConfig[config.label] = column.number_input(
-                        config.displayLabel if config.displayLabel else config.label.value,
-                        step=config.inputConfig.get("step", 0.1),
-                        min_value=config.inputConfig["min"],
-                        max_value=config.inputConfig["max"],
-                        key=key,
-                        value=config.inputConfig["value"],
-                        help=config.inputHelp,
-                    )
-                elif config.inputType == InputType.Bool:
-                    caseConfig[config.label] = column.checkbox(
-                        config.displayLabel if config.displayLabel else config.label.value,
-                        value=config.inputConfig["value"],
-                        help=config.inputHelp,
-                    )
+                dbCaseConfig[config.label] = inputWidget(column, config, key)
                 k += 1
         if k == 0:
             columns[1].write("Auto")

vectordb_bench/frontend/components/run_test/dbSelector.py CHANGED Viewed

@@ -1,9 +1,10 @@
 from streamlit.runtime.media_file_storage import MediaFileStorageError
 from vectordb_bench.frontend.config.styles import DB_SELECTOR_COLUMNS, DB_TO_ICON
 from vectordb_bench.frontend.config.dbCaseConfigs import DB_LIST
+import streamlit as st
-def dbSelector(st):
+def dbSelector(st: st):
     st.markdown(
         "<div style='height: 12px;'></div>",
         unsafe_allow_html=True,
@@ -20,11 +21,14 @@ def dbSelector(st):
     for i, db in enumerate(DB_LIST):
         column = dbContainerColumns[i % DB_SELECTOR_COLUMNS]
         dbIsActived[db] = column.checkbox(db.name)
-        try:
-            column.image(DB_TO_ICON.get(db, ""))
-        except MediaFileStorageError:
+        image_src = DB_TO_ICON.get(db, None)
+        if image_src:
+            column.markdown(
+                f'<img src="{image_src}" style="width:100px;height:100px;object-fit:contain;object-position:center;margin-bottom:10px;">',
+                unsafe_allow_html=True,
+            )
+        else:
             column.warning(f"{db.name} image not available")
-            pass
     activedDbList = [db for db in DB_LIST if dbIsActived[db]]
     return activedDbList

vectordb_bench/frontend/components/run_test/inputWidget.py ADDED Viewed

@@ -0,0 +1,48 @@
+from vectordb_bench.frontend.config.dbCaseConfigs import CaseConfigInput, InputType
+def inputWidget(st, config: CaseConfigInput, key: str):
+    if config.inputType == InputType.Text:
+        return st.text_input(
+            config.displayLabel if config.displayLabel else config.label.value,
+            key=key,
+            help=config.inputHelp,
+            value=config.inputConfig["value"],
+        )
+    if config.inputType == InputType.Option:
+        return st.selectbox(
+            config.displayLabel if config.displayLabel else config.label.value,
+            config.inputConfig["options"],
+            key=key,
+            help=config.inputHelp,
+        )
+    if config.inputType == InputType.Number:
+        return st.number_input(
+            config.displayLabel if config.displayLabel else config.label.value,
+            # format="%d",
+            step=config.inputConfig.get("step", 1),
+            min_value=config.inputConfig["min"],
+            max_value=config.inputConfig["max"],
+            key=key,
+            value=config.inputConfig["value"],
+            help=config.inputHelp,
+        )
+    if config.inputType == InputType.Float:
+        return st.number_input(
+            config.displayLabel if config.displayLabel else config.label.value,
+            step=config.inputConfig.get("step", 0.1),
+            min_value=config.inputConfig["min"],
+            max_value=config.inputConfig["max"],
+            key=key,
+            value=config.inputConfig["value"],
+            help=config.inputHelp,
+        )
+    if config.inputType == InputType.Bool:
+        return st.selectbox(
+            config.displayLabel if config.displayLabel else config.label.value,
+            options=[True, False],
+            index=0 if config.inputConfig["value"] else 1,
+            key=key,
+            help=config.inputHelp,
+        )
+    raise Exception(f"Invalid InputType: {config.inputType}")

vectordb_bench/frontend/components/run_test/submitTask.py CHANGED Viewed

@@ -86,7 +86,9 @@ def controlPanel(st, tasks: list[TaskConfig], taskLabel, isAllValid):
         currentTaskId = benchmark_runner.get_current_task_id()
         tasksCount = benchmark_runner.get_tasks_count()
         text = f":running: Running Task {currentTaskId} / {tasksCount}"
-        st.progress(currentTaskId / tasksCount, text=text)
+        if tasksCount > 0:
+            st.progress(currentTaskId / tasksCount, text=text)
         columns = st.columns(6)
         columns[0].button(

vectordb_bench/frontend/components/streaming/charts.py ADDED Viewed

@@ -0,0 +1,253 @@
+import plotly.graph_objects as go
+from vectordb_bench.frontend.components.streaming.data import (
+    DisplayedMetric,
+    StreamingData,
+    get_streaming_data,
+)
+from vectordb_bench.frontend.config.styles import (
+    COLORS_10,
+    COLORS_2,
+    SCATTER_LINE_WIDTH,
+    SCATTER_MAKER_SIZE,
+    STREAMING_CHART_COLUMNS,
+)
+def drawChartsByCase(
+    st,
+    allData,
+    showCaseNames: list[str],
+    **kwargs,
+):
+    allData = [d for d in allData if len(d["st_search_stage_list"]) > 0]
+    for case_name in showCaseNames:
+        data = [d for d in allData if d["case_name"] == case_name]
+        if len(data) == 0:
+            continue
+        container = st.container()
+        container.write("")  # blank line
+        container.subheader(case_name)
+        drawChartByMetric(container, data, case_name=case_name, **kwargs)
+        container.write("")  # blank line
+def drawChartByMetric(
+    st,
+    case_data,
+    case_name: str,
+    line_chart_displayed_y_metrics: list[tuple[DisplayedMetric, str]],
+    **kwargs,
+):
+    columns = st.columns(STREAMING_CHART_COLUMNS)
+    streaming_data = get_streaming_data(case_data)
+    # line chart
+    for i, metric_info in enumerate(line_chart_displayed_y_metrics):
+        metric, note = metric_info
+        container = columns[i % STREAMING_CHART_COLUMNS]
+        container.markdown(f"#### {metric.value.capitalize()}")
+        container.markdown(f"{note}")
+        key = f"{case_name}-{metric.value}"
+        drawLineChart(container, streaming_data, metric=metric, key=key, **kwargs)
+    # bar chart
+    container = columns[len(line_chart_displayed_y_metrics) % STREAMING_CHART_COLUMNS]
+    container.markdown("#### Duration")
+    container.markdown(
+        "insert more than ideal-insert-duration (dash-line) means exceeding the maximum processing capacity.",
+        help="vectordb need more time to process accumulated insert requests.",
+    )
+    key = f"{case_name}-duration"
+    drawBarChart(container, case_data, key=key, **kwargs)
+    # drawLineChart(container, data, line_x_displayed_label, label)
+    # drawTestChart(container)
+def drawLineChart(
+    st,
+    streaming_data: list[StreamingData],
+    metric: DisplayedMetric,
+    key: str,
+    with_last_optimized_data=True,
+    **kwargs,
+):
+    db_names = list({d.db_name for d in streaming_data})
+    db_names.sort()
+    x_metric = kwargs.get("line_chart_displayed_x_metric", DisplayedMetric.search_stage)
+    fig = go.Figure()
+    if x_metric == DisplayedMetric.search_time:
+        ideal_insert_duration = streaming_data[0].ideal_insert_duration
+        fig.add_shape(
+            type="line",
+            y0=min([getattr(d, metric.value) for d in streaming_data]),
+            y1=max([getattr(d, metric.value) for d in streaming_data]),
+            x0=ideal_insert_duration,
+            x1=ideal_insert_duration,
+            line=dict(color="#999", width=SCATTER_LINE_WIDTH, dash="dot"),
+            showlegend=True,
+            name="insert 100% standard time",
+        )
+    for i, db_name in enumerate(db_names):
+        data = [d for d in streaming_data if d.db_name == db_name]
+        color = COLORS_10[i]
+        if with_last_optimized_data:
+            fig.add_trace(
+                get_optimized_scatter(
+                    data,
+                    db_name=db_name,
+                    metric=metric,
+                    color=color,
+                    **kwargs,
+                )
+            )
+        fig.add_trace(
+            get_normal_scatter(
+                data,
+                db_name=db_name,
+                metric=metric,
+                color=color,
+                **kwargs,
+            )
+        )
+    fig.update_layout(
+        margin=dict(l=0, r=0, t=40, b=0, pad=8),
+        legend=dict(orientation="h", yanchor="bottom", y=1, xanchor="left", x=0, title=""),
+    )
+    x_title = "Search Stages (%)"
+    if x_metric == DisplayedMetric.search_time:
+        x_title = "Actual Time (s)"
+    fig.update_layout(xaxis_title=x_title)
+    st.plotly_chart(fig, use_container_width=True, key=key)
+def get_normal_scatter(
+    data: list[StreamingData],
+    db_name: str,
+    metric: DisplayedMetric,
+    color: str,
+    line_chart_displayed_x_metric: DisplayedMetric,
+    **kwargs,
+):
+    unit = ""
+    if "latency" in metric.value:
+        unit = "ms"
+    data.sort(key=lambda x: getattr(x, line_chart_displayed_x_metric.value))
+    data = [d for d in data if not d.optimized]
+    hovertemplate = f"%{{text}}% data inserted.<br>{metric.value}=%{{y:.4g}}{unit}"
+    if line_chart_displayed_x_metric == DisplayedMetric.search_time:
+        hovertemplate = f"%{{text}}% data inserted.<br>actual_time=%{{x:.4g}}s<br>{metric.value}=%{{y:.4g}}{unit}"
+    return go.Scatter(
+        x=[getattr(d, line_chart_displayed_x_metric.value) for d in data],
+        y=[getattr(d, metric.value) for d in data],
+        text=[d.search_stage for d in data],
+        mode="markers+lines",
+        name=db_name,
+        marker=dict(color=color, size=SCATTER_MAKER_SIZE),
+        line=dict(dash="solid", width=SCATTER_LINE_WIDTH, color=color),
+        legendgroup=db_name,
+        hovertemplate=hovertemplate,
+    )
+def get_optimized_scatter(
+    data: list[StreamingData],
+    db_name: str,
+    metric: DisplayedMetric,
+    color: str,
+    line_chart_displayed_x_metric: DisplayedMetric,
+    **kwargs,
+):
+    unit = ""
+    if "latency" in metric.value:
+        unit = "ms"
+    data.sort(key=lambda x: x.search_stage)
+    if not data[-1].optimized or len(data) < 2:
+        return go.Scatter()
+    data = data[-2:]
+    hovertemplate = f"all data inserted and <b style='color: #333;'>optimized</b>.<br>{metric.value}=%{{y:.4g}}{unit}"
+    if line_chart_displayed_x_metric == DisplayedMetric.search_time:
+        hovertemplate = f"all data inserted and <b style='color: #333;'>optimized</b>.<br>actual_time=%{{x:.4g}}s<br>{metric.value}=%{{y:.4g}}{unit}"
+    return go.Scatter(
+        x=[getattr(d, line_chart_displayed_x_metric.value) for d in data],
+        y=[getattr(d, metric.value) for d in data],
+        text=[d.search_stage for d in data],
+        mode="markers+lines",
+        name=db_name,
+        legendgroup=db_name,
+        marker=dict(color=color, size=[0, SCATTER_MAKER_SIZE]),
+        line=dict(dash="dash", width=SCATTER_LINE_WIDTH, color=color),
+        hovertemplate=hovertemplate,
+        showlegend=False,
+    )
+def drawBarChart(
+    st,
+    data,
+    key: str,
+    with_last_optimized_data=True,
+    **kwargs,
+):
+    if len(data) < 1:
+        return
+    fig = go.Figure()
+    # ideal insert duration
+    ideal_insert_duration = data[0]["st_ideal_insert_duration"]
+    fig.add_shape(
+        type="line",
+        y0=-0.5,
+        y1=len(data) - 0.5,
+        x0=ideal_insert_duration,
+        x1=ideal_insert_duration,
+        line=dict(color="#999", width=SCATTER_LINE_WIDTH, dash="dot"),
+        showlegend=True,
+        name="insert 100% standard time",
+    )
+    # insert duration
+    fig.add_trace(
+        get_bar(
+            data,
+            metric=DisplayedMetric.insert_duration,
+            color=COLORS_2[0],
+            **kwargs,
+        )
+    )
+    # optimized duration
+    if with_last_optimized_data:
+        fig.add_trace(
+            get_bar(
+                data,
+                metric=DisplayedMetric.optimize_duration,
+                color=COLORS_2[1],
+                **kwargs,
+            )
+        )
+    fig.update_layout(
+        margin=dict(l=0, r=0, t=40, b=0, pad=8),
+        legend=dict(orientation="h", yanchor="bottom", y=1, xanchor="left", x=0, title=""),
+    )
+    fig.update_layout(xaxis_title="time (s)")
+    fig.update_layout(barmode="stack")
+    fig.update_traces(width=0.15)
+    st.plotly_chart(fig, use_container_width=True, key=key)
+def get_bar(
+    data: list[StreamingData],
+    metric: DisplayedMetric,
+    color: str,
+    **kwargs,
+):
+    return go.Bar(
+        x=[d[metric.value] for d in data],
+        y=[d["db_name"] for d in data],
+        name=metric,
+        marker_color=color,
+        orientation="h",
+        hovertemplate="%{y} %{x:.2f}s",
+    )

vectordb-bench 0.0.29__py3-none-any.whl → 1.0.0__py3-none-any.whl

vectordb-bench 0.0.29py3-none-any.whl → 1.0.0py3-none-any.whl