PyPI - vectordb-bench - Versions diffs - 0.0.19__py3-none-any.whl → 0.0.21__py3-none-any.whl - Mend

vectordb-bench 0.0.19py3-none-any.whl → 0.0.21py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (105) hide show

vectordb_bench/__init__.py +49 -24
vectordb_bench/__main__.py +4 -3
vectordb_bench/backend/assembler.py +12 -13
vectordb_bench/backend/cases.py +55 -45
vectordb_bench/backend/clients/__init__.py +85 -14
vectordb_bench/backend/clients/aliyun_elasticsearch/aliyun_elasticsearch.py +1 -2
vectordb_bench/backend/clients/aliyun_elasticsearch/config.py +3 -4
vectordb_bench/backend/clients/aliyun_opensearch/aliyun_opensearch.py +112 -77
vectordb_bench/backend/clients/aliyun_opensearch/config.py +6 -7
vectordb_bench/backend/clients/alloydb/alloydb.py +59 -84
vectordb_bench/backend/clients/alloydb/cli.py +51 -34
vectordb_bench/backend/clients/alloydb/config.py +30 -30
vectordb_bench/backend/clients/api.py +13 -24
vectordb_bench/backend/clients/aws_opensearch/aws_opensearch.py +50 -54
vectordb_bench/backend/clients/aws_opensearch/cli.py +4 -7
vectordb_bench/backend/clients/aws_opensearch/config.py +13 -9
vectordb_bench/backend/clients/aws_opensearch/run.py +69 -59
vectordb_bench/backend/clients/chroma/chroma.py +39 -40
vectordb_bench/backend/clients/chroma/config.py +4 -2
vectordb_bench/backend/clients/elastic_cloud/config.py +5 -5
vectordb_bench/backend/clients/elastic_cloud/elastic_cloud.py +24 -26
vectordb_bench/backend/clients/memorydb/cli.py +8 -8
vectordb_bench/backend/clients/memorydb/config.py +2 -2
vectordb_bench/backend/clients/memorydb/memorydb.py +67 -58
vectordb_bench/backend/clients/milvus/cli.py +41 -83
vectordb_bench/backend/clients/milvus/config.py +18 -8
vectordb_bench/backend/clients/milvus/milvus.py +19 -39
vectordb_bench/backend/clients/pgdiskann/cli.py +29 -22
vectordb_bench/backend/clients/pgdiskann/config.py +29 -26
vectordb_bench/backend/clients/pgdiskann/pgdiskann.py +56 -77
vectordb_bench/backend/clients/pgvecto_rs/cli.py +9 -11
vectordb_bench/backend/clients/pgvecto_rs/config.py +8 -14
vectordb_bench/backend/clients/pgvecto_rs/pgvecto_rs.py +34 -43
vectordb_bench/backend/clients/pgvector/cli.py +40 -31
vectordb_bench/backend/clients/pgvector/config.py +63 -73
vectordb_bench/backend/clients/pgvector/pgvector.py +98 -104
vectordb_bench/backend/clients/pgvectorscale/cli.py +38 -24
vectordb_bench/backend/clients/pgvectorscale/config.py +14 -15
vectordb_bench/backend/clients/pgvectorscale/pgvectorscale.py +39 -49
vectordb_bench/backend/clients/pinecone/config.py +1 -0
vectordb_bench/backend/clients/pinecone/pinecone.py +15 -25
vectordb_bench/backend/clients/qdrant_cloud/config.py +11 -10
vectordb_bench/backend/clients/qdrant_cloud/qdrant_cloud.py +41 -35
vectordb_bench/backend/clients/redis/cli.py +6 -12
vectordb_bench/backend/clients/redis/config.py +7 -5
vectordb_bench/backend/clients/redis/redis.py +95 -62
vectordb_bench/backend/clients/test/cli.py +2 -3
vectordb_bench/backend/clients/test/config.py +2 -2
vectordb_bench/backend/clients/test/test.py +5 -9
vectordb_bench/backend/clients/weaviate_cloud/cli.py +3 -4
vectordb_bench/backend/clients/weaviate_cloud/config.py +2 -2
vectordb_bench/backend/clients/weaviate_cloud/weaviate_cloud.py +37 -26
vectordb_bench/backend/clients/zilliz_cloud/cli.py +14 -11
vectordb_bench/backend/clients/zilliz_cloud/config.py +2 -4
vectordb_bench/backend/clients/zilliz_cloud/zilliz_cloud.py +1 -1
vectordb_bench/backend/data_source.py +18 -14
vectordb_bench/backend/dataset.py +47 -27
vectordb_bench/backend/result_collector.py +2 -3
vectordb_bench/backend/runner/__init__.py +4 -6
vectordb_bench/backend/runner/mp_runner.py +56 -23
vectordb_bench/backend/runner/rate_runner.py +30 -19
vectordb_bench/backend/runner/read_write_runner.py +46 -22
vectordb_bench/backend/runner/serial_runner.py +81 -46
vectordb_bench/backend/runner/util.py +4 -3
vectordb_bench/backend/task_runner.py +92 -92
vectordb_bench/backend/utils.py +17 -10
vectordb_bench/base.py +0 -1
vectordb_bench/cli/cli.py +65 -60
vectordb_bench/cli/vectordbbench.py +6 -7
vectordb_bench/frontend/components/check_results/charts.py +8 -19
vectordb_bench/frontend/components/check_results/data.py +4 -16
vectordb_bench/frontend/components/check_results/filters.py +8 -16
vectordb_bench/frontend/components/check_results/nav.py +4 -4
vectordb_bench/frontend/components/check_results/priceTable.py +1 -3
vectordb_bench/frontend/components/check_results/stPageConfig.py +2 -1
vectordb_bench/frontend/components/concurrent/charts.py +12 -12
vectordb_bench/frontend/components/custom/displayCustomCase.py +17 -11
vectordb_bench/frontend/components/custom/displaypPrams.py +4 -2
vectordb_bench/frontend/components/custom/getCustomConfig.py +1 -2
vectordb_bench/frontend/components/custom/initStyle.py +1 -1
vectordb_bench/frontend/components/get_results/saveAsImage.py +2 -0
vectordb_bench/frontend/components/run_test/caseSelector.py +3 -9
vectordb_bench/frontend/components/run_test/dbConfigSetting.py +1 -4
vectordb_bench/frontend/components/run_test/dbSelector.py +1 -1
vectordb_bench/frontend/components/run_test/generateTasks.py +8 -8
vectordb_bench/frontend/components/run_test/submitTask.py +14 -18
vectordb_bench/frontend/components/tables/data.py +3 -6
vectordb_bench/frontend/config/dbCaseConfigs.py +51 -84
vectordb_bench/frontend/pages/concurrent.py +3 -5
vectordb_bench/frontend/pages/custom.py +30 -9
vectordb_bench/frontend/pages/quries_per_dollar.py +3 -3
vectordb_bench/frontend/pages/run_test.py +3 -7
vectordb_bench/frontend/utils.py +1 -1
vectordb_bench/frontend/vdb_benchmark.py +4 -6
vectordb_bench/interface.py +45 -24
vectordb_bench/log_util.py +59 -64
vectordb_bench/metric.py +10 -11
vectordb_bench/models.py +26 -43
{vectordb_bench-0.0.19.dist-info → vectordb_bench-0.0.21.dist-info}/METADATA +22 -15
vectordb_bench-0.0.21.dist-info/RECORD +135 -0
{vectordb_bench-0.0.19.dist-info → vectordb_bench-0.0.21.dist-info}/WHEEL +1 -1
vectordb_bench-0.0.19.dist-info/RECORD +0 -135
{vectordb_bench-0.0.19.dist-info → vectordb_bench-0.0.21.dist-info}/LICENSE +0 -0
{vectordb_bench-0.0.19.dist-info → vectordb_bench-0.0.21.dist-info}/entry_points.txt +0 -0
{vectordb_bench-0.0.19.dist-info → vectordb_bench-0.0.21.dist-info}/top_level.txt +0 -0

vectordb_bench/cli/cli.py CHANGED Viewed

@@ -1,27 +1,27 @@
 import logging
+import os
 import time
+from collections.abc import Callable
 from concurrent.futures import wait
 from datetime import datetime
 from pprint import pformat
 from typing import (
     Annotated,
-    Callable,
-    List,
-    Optional,
-    Type,
+    Any,
     TypedDict,
     Unpack,
     get_origin,
     get_type_hints,
-    Dict,
-    Any,
 )
 import click
+from yaml import load
 from vectordb_bench.backend.clients.api import MetricType
 from .. import config
 from ..backend.clients import DB
-from ..interface import benchMarkRunner, global_result_future
+from ..interface import benchmark_runner, global_result_future
 from ..models import (
     CaseConfig,
     CaseType,
@@ -31,8 +31,7 @@ from ..models import (
     TaskConfig,
     TaskStage,
 )
-import os
-from yaml import load
 try:
     from yaml import CLoader as Loader
 except ImportError:
@@ -46,8 +45,8 @@ def click_get_defaults_from_file(ctx, param, value):
         else:
             input_file = os.path.join(config.CONFIG_LOCAL_DIR, value)
         try:
-            with open(input_file, 'r') as f:
-                _config: Dict[str, Dict[str, Any]] = load(f.read(), Loader=Loader)
+            with open(input_file) as f:
+                _config: dict[str, dict[str, Any]] = load(f.read(), Loader=Loader)
                 ctx.default_map = _config.get(ctx.command.name, {})
         except Exception as e:
             raise click.BadParameter(f"Failed to load config file: {e}")
@@ -55,7 +54,7 @@ def click_get_defaults_from_file(ctx, param, value):
 def click_parameter_decorators_from_typed_dict(
-    typed_dict: Type,
+    typed_dict: type,
 ) -> Callable[[click.decorators.FC], click.decorators.FC]:
     """A convenience method decorator that will read in a TypedDict with parameters defined by Annotated types.
     from .models import CaseConfig, CaseType, DBCaseConfig, DBConfig, TaskConfig, TaskStage
@@ -91,15 +90,12 @@ def click_parameter_decorators_from_typed_dict(
     decorators = []
     for _, t in get_type_hints(typed_dict, include_extras=True).items():
         assert get_origin(t) is Annotated
-        if (
-            len(t.__metadata__) == 1
-            and t.__metadata__[0].__module__ == "click.decorators"
-        ):
+        if len(t.__metadata__) == 1 and t.__metadata__[0].__module__ == "click.decorators":
             # happy path -- only accept Annotated[..., Union[click.option,click.argument,...]] with no additional metadata defined (len=1)
             decorators.append(t.__metadata__[0])
         else:
             raise RuntimeError(
-                "Click-TypedDict decorator parsing must only contain root type and a click decorator like click.option. See docstring"
+                "Click-TypedDict decorator parsing must only contain root type and a click decorator like click.option. See docstring",
             )
     def deco(f):
@@ -132,11 +128,11 @@ def parse_task_stages(
     load: bool,
     search_serial: bool,
     search_concurrent: bool,
-) -> List[TaskStage]:
+) -> list[TaskStage]:
     stages = []
     if load and not drop_old:
         raise RuntimeError("Dropping old data cannot be skipped if loading data")
-    elif drop_old and not load:
+    if drop_old and not load:
         raise RuntimeError("Load cannot be skipped if dropping old data")
     if drop_old:
         stages.append(TaskStage.DROP_OLD)
@@ -149,12 +145,19 @@ def parse_task_stages(
     return stages
-def check_custom_case_parameters(ctx, param, value):
-    if ctx.params.get("case_type") == "PerformanceCustomDataset":
-        if value is None:
-            raise click.BadParameter("Custom case parameters\
-                                     \n--custom-case-name\n--custom-dataset-name\n--custom-dataset-dir\n--custom-dataset-size \
-                                     \n--custom-dataset-dim\n--custom-dataset-file-count\n are required")
+# ruff: noqa
+def check_custom_case_parameters(ctx: any, param: any, value: any):
+    if ctx.params.get("case_type") == "PerformanceCustomDataset" and value is None:
+        raise click.BadParameter(
+            """ Custom case parameters
+--custom-case-name
+--custom-dataset-name
+--custom-dataset-dir
+--custom-dataset-sizes
+--custom-dataset-dim
+--custom-dataset-file-count
+are required """,
+        )
     return value
@@ -175,7 +178,7 @@ def get_custom_case_config(parameters: dict) -> dict:
                 "file_count": parameters["custom_dataset_file_count"],
                 "use_shuffled": parameters["custom_dataset_use_shuffled"],
                 "with_gt": parameters["custom_dataset_with_gt"],
-            }
+            },
         }
     return custom_case_config
@@ -186,12 +189,14 @@ log = logging.getLogger(__name__)
 class CommonTypedDict(TypedDict):
     config_file: Annotated[
         bool,
-        click.option('--config-file',
-                     type=click.Path(),
-                     callback=click_get_defaults_from_file,
-                     is_eager=True,
-                     expose_value=False,
-                     help='Read configuration from yaml file'),
+        click.option(
+            "--config-file",
+            type=click.Path(),
+            callback=click_get_defaults_from_file,
+            is_eager=True,
+            expose_value=False,
+            help="Read configuration from yaml file",
+        ),
     ]
     drop_old: Annotated[
         bool,
@@ -246,9 +251,11 @@ class CommonTypedDict(TypedDict):
     db_label: Annotated[
         str,
         click.option(
-            "--db-label", type=str, help="Db label, default: date in ISO format",
+            "--db-label",
+            type=str,
+            help="Db label, default: date in ISO format",
             show_default=True,
-            default=datetime.now().isoformat()
+            default=datetime.now().isoformat(),
         ),
     ]
     dry_run: Annotated[
@@ -282,7 +289,7 @@ class CommonTypedDict(TypedDict):
         ),
     ]
     num_concurrency: Annotated[
-        List[str],
+        list[str],
         click.option(
             "--num-concurrency",
             type=str,
@@ -298,7 +305,7 @@ class CommonTypedDict(TypedDict):
             "--custom-case-name",
             help="Custom dataset case name",
             callback=check_custom_case_parameters,
-        )
+        ),
     ]
     custom_case_description: Annotated[
         str,
@@ -307,7 +314,7 @@ class CommonTypedDict(TypedDict):
             help="Custom dataset case description",
             default="This is a customized dataset.",
             show_default=True,
-        )
+        ),
     ]
     custom_case_load_timeout: Annotated[
         int,
@@ -316,7 +323,7 @@ class CommonTypedDict(TypedDict):
             help="Custom dataset case load timeout",
             default=36000,
             show_default=True,
-        )
+        ),
     ]
     custom_case_optimize_timeout: Annotated[
         int,
@@ -325,7 +332,7 @@ class CommonTypedDict(TypedDict):
             help="Custom dataset case optimize timeout",
             default=36000,
             show_default=True,
-        )
+        ),
     ]
     custom_dataset_name: Annotated[
         str,
@@ -397,60 +404,60 @@ class CommonTypedDict(TypedDict):
 class HNSWBaseTypedDict(TypedDict):
-    m: Annotated[Optional[int], click.option("--m", type=int, help="hnsw m")]
+    m: Annotated[int | None, click.option("--m", type=int, help="hnsw m")]
     ef_construction: Annotated[
-        Optional[int],
+        int | None,
         click.option("--ef-construction", type=int, help="hnsw ef-construction"),
     ]
 class HNSWBaseRequiredTypedDict(TypedDict):
-    m: Annotated[Optional[int], click.option("--m", type=int, help="hnsw m", required=True)]
+    m: Annotated[int | None, click.option("--m", type=int, help="hnsw m", required=True)]
     ef_construction: Annotated[
-        Optional[int],
+        int | None,
         click.option("--ef-construction", type=int, help="hnsw ef-construction", required=True),
     ]
 class HNSWFlavor1(HNSWBaseTypedDict):
     ef_search: Annotated[
-        Optional[int], click.option("--ef-search", type=int, help="hnsw ef-search", is_eager=True)
+        int | None,
+        click.option("--ef-search", type=int, help="hnsw ef-search", is_eager=True),
     ]
 class HNSWFlavor2(HNSWBaseTypedDict):
     ef_runtime: Annotated[
-        Optional[int], click.option("--ef-runtime", type=int, help="hnsw ef-runtime")
+        int | None,
+        click.option("--ef-runtime", type=int, help="hnsw ef-runtime"),
     ]
 class HNSWFlavor3(HNSWBaseRequiredTypedDict):
     ef_search: Annotated[
-        Optional[int], click.option("--ef-search", type=int, help="hnsw ef-search", required=True)
+        int | None,
+        click.option("--ef-search", type=int, help="hnsw ef-search", required=True),
     ]
 class IVFFlatTypedDict(TypedDict):
-    lists: Annotated[
-        Optional[int], click.option("--lists", type=int, help="ivfflat lists")
-    ]
-    probes: Annotated[
-        Optional[int], click.option("--probes", type=int, help="ivfflat probes")
-    ]
+    lists: Annotated[int | None, click.option("--lists", type=int, help="ivfflat lists")]
+    probes: Annotated[int | None, click.option("--probes", type=int, help="ivfflat probes")]
 class IVFFlatTypedDictN(TypedDict):
     nlist: Annotated[
-        Optional[int], click.option("--lists", "nlist", type=int, help="ivfflat lists", required=True)
+        int | None,
+        click.option("--lists", "nlist", type=int, help="ivfflat lists", required=True),
     ]
     nprobe: Annotated[
-        Optional[int], click.option("--probes", "nprobe", type=int, help="ivfflat probes", required=True)
+        int | None,
+        click.option("--probes", "nprobe", type=int, help="ivfflat probes", required=True),
     ]
 @click.group()
-def cli():
-    ...
+def cli(): ...
 def run(
@@ -482,9 +489,7 @@ def run(
             custom_case=get_custom_case_config(parameters),
         ),
         stages=parse_task_stages(
-            (
-                False if not parameters["load"] else parameters["drop_old"]
-            ),  # only drop old data if loading new data
+            (False if not parameters["load"] else parameters["drop_old"]),  # only drop old data if loading new data
             parameters["load"],
             parameters["search_serial"],
             parameters["search_concurrent"],
@@ -493,7 +498,7 @@ def run(
     log.info(f"Task:\n{pformat(task)}\n")
     if not parameters["dry_run"]:
-        benchMarkRunner.run([task])
+        benchmark_runner.run([task])
         time.sleep(5)
         if global_result_future:
             wait([global_result_future])

vectordb_bench/cli/vectordbbench.py CHANGED Viewed

@@ -1,16 +1,15 @@
-from ..backend.clients.pgvector.cli import PgVectorHNSW
+from ..backend.clients.alloydb.cli import AlloyDBScaNN
+from ..backend.clients.aws_opensearch.cli import AWSOpenSearch
+from ..backend.clients.memorydb.cli import MemoryDB
+from ..backend.clients.milvus.cli import MilvusAutoIndex
+from ..backend.clients.pgdiskann.cli import PgDiskAnn
 from ..backend.clients.pgvecto_rs.cli import PgVectoRSHNSW, PgVectoRSIVFFlat
+from ..backend.clients.pgvector.cli import PgVectorHNSW
 from ..backend.clients.pgvectorscale.cli import PgVectorScaleDiskAnn
-from ..backend.clients.pgdiskann.cli import PgDiskAnn
 from ..backend.clients.redis.cli import Redis
-from ..backend.clients.memorydb.cli import MemoryDB
 from ..backend.clients.test.cli import Test
 from ..backend.clients.weaviate_cloud.cli import Weaviate
 from ..backend.clients.zilliz_cloud.cli import ZillizAutoIndex
-from ..backend.clients.milvus.cli import MilvusAutoIndex
-from ..backend.clients.aws_opensearch.cli import AWSOpenSearch
-from ..backend.clients.alloydb.cli import AlloyDBScaNN
 from .cli import cli
 cli.add_command(PgVectorHNSW)

vectordb_bench/frontend/components/check_results/charts.py CHANGED Viewed

@@ -1,8 +1,7 @@
-from vectordb_bench.backend.cases import Case
 from vectordb_bench.frontend.components.check_results.expanderStyle import (
     initMainExpanderStyle,
 )
-from vectordb_bench.metric import metricOrder, isLowerIsBetterMetric, metricUnitMap
+from vectordb_bench.metric import metric_order, isLowerIsBetterMetric, metric_unit_map
 from vectordb_bench.frontend.config.styles import *
 from vectordb_bench.models import ResultLabel
 import plotly.express as px
@@ -21,9 +20,7 @@ def drawCharts(st, allData, failedTasks, caseNames: list[str]):
 def showFailedDBs(st, errorDBs):
     failedDBs = [db for db, label in errorDBs.items() if label == ResultLabel.FAILED]
-    timeoutDBs = [
-        db for db, label in errorDBs.items() if label == ResultLabel.OUTOFRANGE
-    ]
+    timeoutDBs = [db for db, label in errorDBs.items() if label == ResultLabel.OUTOFRANGE]
     showFailedText(st, "Failed", failedDBs)
     showFailedText(st, "Timeout", timeoutDBs)
@@ -41,7 +38,7 @@ def drawChart(data, st, key_prefix: str):
     metricsSet = set()
     for d in data:
         metricsSet = metricsSet.union(d["metricsSet"])
-    showMetrics = [metric for metric in metricOrder if metric in metricsSet]
+    showMetrics = [metric for metric in metric_order if metric in metricsSet]
     for i, metric in enumerate(showMetrics):
         container = st.container()
@@ -72,9 +69,7 @@ def getLabelToShapeMap(data):
             else:
                 usedShapes.add(labelIndexMap[label] % len(PATTERN_SHAPES))
-    labelToShapeMap = {
-        label: getPatternShape(index) for label, index in labelIndexMap.items()
-    }
+    labelToShapeMap = {label: getPatternShape(index) for label, index in labelIndexMap.items()}
     return labelToShapeMap
@@ -96,11 +91,9 @@ def drawMetricChart(data, metric, st, key: str):
     xpadding = (xmax - xmin) / 16
     xpadding_multiplier = 1.8
     xrange = [xmin, xmax + xpadding * xpadding_multiplier]
-    unit = metricUnitMap.get(metric, "")
+    unit = metric_unit_map.get(metric, "")
     labelToShapeMap = getLabelToShapeMap(dataWithMetric)
-    categoryorder = (
-        "total descending" if isLowerIsBetterMetric(metric) else "total ascending"
-    )
+    categoryorder = "total descending" if isLowerIsBetterMetric(metric) else "total ascending"
     fig = px.bar(
         dataWithMetric,
         x=metric,
@@ -137,18 +130,14 @@ def drawMetricChart(data, metric, st, key: str):
             color="#333",
             size=12,
         ),
-        marker=dict(
-            pattern=dict(fillmode="overlay", fgcolor="#fff", fgopacity=1, size=7)
-        ),
+        marker=dict(pattern=dict(fillmode="overlay", fgcolor="#fff", fgopacity=1, size=7)),
         texttemplate="%{x:,.4~r}" + unit,
     )
     fig.update_layout(
         margin=dict(l=0, r=0, t=48, b=12, pad=8),
         bargap=0.25,
         showlegend=False,
-        legend=dict(
-            orientation="h", yanchor="bottom", y=1, xanchor="right", x=1, title=""
-        ),
+        legend=dict(orientation="h", yanchor="bottom", y=1, xanchor="right", x=1, title=""),
         # legend=dict(orientation="v", title=""),
         yaxis={"categoryorder": categoryorder},
         title=dict(

vectordb_bench/frontend/components/check_results/data.py CHANGED Viewed

@@ -1,6 +1,5 @@
 from collections import defaultdict
 from dataclasses import asdict
-from vectordb_bench.backend.cases import Case
 from vectordb_bench.metric import isLowerIsBetterMetric
 from vectordb_bench.models import CaseResult, ResultLabel
@@ -24,10 +23,7 @@ def getFilterTasks(
         task
         for task in tasks
         if task.task_config.db_name in dbNames
-        and task.task_config.case_config.case_id.case_cls(
-            task.task_config.case_config.custom_case
-        ).name
-        in caseNames
+        and task.task_config.case_config.case_id.case_cls(task.task_config.case_config.custom_case).name in caseNames
     ]
     return filterTasks
@@ -39,9 +35,7 @@ def mergeTasks(tasks: list[CaseResult]):
         db = task.task_config.db.value
         db_label = task.task_config.db_config.db_label or ""
         version = task.task_config.db_config.version or ""
-        case = task.task_config.case_config.case_id.case_cls(
-            task.task_config.case_config.custom_case
-        )
+        case = task.task_config.case_config.case_id.case_cls(task.task_config.case_config.custom_case)
         dbCaseMetricsMap[db_name][case.name] = {
             "db": db,
             "db_label": db_label,
@@ -86,9 +80,7 @@ def mergeTasks(tasks: list[CaseResult]):
 def mergeMetrics(metrics_1: dict, metrics_2: dict) -> dict:
     metrics = {**metrics_1}
     for key, value in metrics_2.items():
-        metrics[key] = (
-            getBetterMetric(key, value, metrics[key]) if key in metrics else value
-        )
+        metrics[key] = getBetterMetric(key, value, metrics[key]) if key in metrics else value
     return metrics
@@ -99,11 +91,7 @@ def getBetterMetric(metric, value_1, value_2):
             return value_2
         if value_2 < 1e-7:
             return value_1
-        return (
-            min(value_1, value_2)
-            if isLowerIsBetterMetric(metric)
-            else max(value_1, value_2)
-        )
+        return min(value_1, value_2) if isLowerIsBetterMetric(metric) else max(value_1, value_2)
     except Exception:
         return value_1

vectordb_bench/frontend/components/check_results/filters.py CHANGED Viewed

@@ -20,23 +20,17 @@ def getshownData(results: list[TestResult], st):
     shownResults = getshownResults(results, st)
     showDBNames, showCaseNames = getShowDbsAndCases(shownResults, st)
-    shownData, failedTasks = getChartData(
-        shownResults, showDBNames, showCaseNames)
+    shownData, failedTasks = getChartData(shownResults, showDBNames, showCaseNames)
     return shownData, failedTasks, showCaseNames
 def getshownResults(results: list[TestResult], st) -> list[CaseResult]:
     resultSelectOptions = [
-        result.task_label
-        if result.task_label != result.run_id
-        else f"res-{result.run_id[:4]}"
-        for result in results
+        result.task_label if result.task_label != result.run_id else f"res-{result.run_id[:4]}" for result in results
     ]
     if len(resultSelectOptions) == 0:
-        st.write(
-            "There are no results to display. Please wait for the task to complete or run a new task."
-        )
+        st.write("There are no results to display. Please wait for the task to complete or run a new task.")
         return []
     selectedResultSelectedOptions = st.multiselect(
@@ -58,13 +52,12 @@ def getShowDbsAndCases(result: list[CaseResult], st) -> tuple[list[str], list[st
     allDbNames = list(set({res.task_config.db_name for res in result}))
     allDbNames.sort()
     allCases: list[Case] = [
-        res.task_config.case_config.case_id.case_cls(
-            res.task_config.case_config.custom_case)
-        for res in result
+        res.task_config.case_config.case_id.case_cls(res.task_config.case_config.custom_case) for res in result
     ]
     allCaseNameSet = set({case.name for case in allCases})
-    allCaseNames = [case_name for case_name in CASE_NAME_ORDER if case_name in allCaseNameSet] + \
-        [case_name for case_name in allCaseNameSet if case_name not in CASE_NAME_ORDER]
+    allCaseNames = [case_name for case_name in CASE_NAME_ORDER if case_name in allCaseNameSet] + [
+        case_name for case_name in allCaseNameSet if case_name not in CASE_NAME_ORDER
+    ]
     # DB Filter
     dbFilterContainer = st.container()
@@ -120,8 +113,7 @@ def filterView(container, header, options, col, optionLables=None):
     )
     if optionLables is None:
         optionLables = options
-    isActive = {option: st.session_state[selectAllState]
-                for option in optionLables}
+    isActive = {option: st.session_state[selectAllState] for option in optionLables}
     for i, option in enumerate(optionLables):
         isActive[option] = columns[i % col].checkbox(
             optionLables[i],

vectordb_bench/frontend/components/check_results/nav.py CHANGED Viewed

@@ -7,15 +7,15 @@ def NavToRunTest(st):
     navClick = st.button("Run Your Test &nbsp;&nbsp;>")
     if navClick:
         switch_page("run test")
 def NavToQuriesPerDollar(st):
     st.subheader("Compare qps with price.")
     navClick = st.button("QP$ (Quries per Dollar) &nbsp;&nbsp;>")
     if navClick:
         switch_page("quries_per_dollar")
 def NavToResults(st, key="nav-to-results"):
     navClick = st.button("< &nbsp;&nbsp;Back to Results", key=key)
     if navClick:

vectordb_bench/frontend/components/check_results/priceTable.py CHANGED Viewed

@@ -7,9 +7,7 @@ from vectordb_bench.frontend.config.dbPrices import DB_DBLABEL_TO_PRICE
 def priceTable(container, data):
-    dbAndLabelSet = {
-        (d["db"], d["db_label"]) for d in data if d["db"] != DB.Milvus.value
-    }
+    dbAndLabelSet = {(d["db"], d["db_label"]) for d in data if d["db"] != DB.Milvus.value}
     dbAndLabelList = list(dbAndLabelSet)
     dbAndLabelList.sort()

vectordb_bench/frontend/components/check_results/stPageConfig.py CHANGED Viewed

@@ -9,10 +9,11 @@ def initResultsPageConfig(st):
         # initial_sidebar_state="collapsed",
     )
 def initRunTestPageConfig(st):
     st.set_page_config(
         page_title=PAGE_TITLE,
         page_icon=FAVICON,
         # layout="wide",
         initial_sidebar_state="collapsed",
-    )
+    )

vectordb_bench/frontend/components/concurrent/charts.py CHANGED Viewed

@@ -14,24 +14,24 @@ def drawChartsByCase(allData, showCaseNames: list[str], st, latency_type: str):
         data = [
             {
                 "conc_num": caseData["conc_num_list"][i],
-                "qps": caseData["conc_qps_list"][i]
-                if 0 <= i < len(caseData["conc_qps_list"])
-                else 0,
-                "latency_p99": caseData["conc_latency_p99_list"][i] * 1000
-                if 0 <= i < len(caseData["conc_latency_p99_list"])
-                else 0,
-                "latency_avg": caseData["conc_latency_avg_list"][i] * 1000
-                if 0 <= i < len(caseData["conc_latency_avg_list"])
-                else 0,
+                "qps": (caseData["conc_qps_list"][i] if 0 <= i < len(caseData["conc_qps_list"]) else 0),
+                "latency_p99": (
+                    caseData["conc_latency_p99_list"][i] * 1000
+                    if 0 <= i < len(caseData["conc_latency_p99_list"])
+                    else 0
+                ),
+                "latency_avg": (
+                    caseData["conc_latency_avg_list"][i] * 1000
+                    if 0 <= i < len(caseData["conc_latency_avg_list"])
+                    else 0
+                ),
                 "db_name": caseData["db_name"],
                 "db": caseData["db"],
             }
             for caseData in caseDataList
             for i in range(len(caseData["conc_num_list"]))
         ]
-        drawChart(
-            data, chartContainer, key=f"{caseName}-qps-p99", x_metric=latency_type
-        )
+        drawChart(data, chartContainer, key=f"{caseName}-qps-p99", x_metric=latency_type)
 def getRange(metric, data, padding_multipliers):

vectordb_bench/frontend/components/custom/displayCustomCase.py CHANGED Viewed

@@ -1,4 +1,3 @@
 from vectordb_bench.frontend.components.custom.getCustomConfig import CustomCaseConfig
@@ -6,26 +5,33 @@ def displayCustomCase(customCase: CustomCaseConfig, st, key):
     columns = st.columns([1, 2])
     customCase.dataset_config.name = columns[0].text_input(
-        "Name", key=f"{key}_name", value=customCase.dataset_config.name)
+        "Name", key=f"{key}_name", value=customCase.dataset_config.name
+    )
     customCase.name = f"{customCase.dataset_config.name} (Performace Case)"
     customCase.dataset_config.dir = columns[1].text_input(
-        "Folder Path", key=f"{key}_dir", value=customCase.dataset_config.dir)
+        "Folder Path", key=f"{key}_dir", value=customCase.dataset_config.dir
+    )
     columns = st.columns(4)
     customCase.dataset_config.dim = columns[0].number_input(
-        "dim", key=f"{key}_dim", value=customCase.dataset_config.dim)
+        "dim", key=f"{key}_dim", value=customCase.dataset_config.dim
+    )
     customCase.dataset_config.size = columns[1].number_input(
-        "size", key=f"{key}_size", value=customCase.dataset_config.size)
+        "size", key=f"{key}_size", value=customCase.dataset_config.size
+    )
     customCase.dataset_config.metric_type = columns[2].selectbox(
-        "metric type", key=f"{key}_metric_type", options=["L2", "Cosine", "IP"])
+        "metric type", key=f"{key}_metric_type", options=["L2", "Cosine", "IP"]
+    )
     customCase.dataset_config.file_count = columns[3].number_input(
-        "train file count", key=f"{key}_file_count", value=customCase.dataset_config.file_count)
+        "train file count", key=f"{key}_file_count", value=customCase.dataset_config.file_count
+    )
     columns = st.columns(4)
     customCase.dataset_config.use_shuffled = columns[0].checkbox(
-        "use shuffled data",  key=f"{key}_use_shuffled", value=customCase.dataset_config.use_shuffled)
+        "use shuffled data", key=f"{key}_use_shuffled", value=customCase.dataset_config.use_shuffled
+    )
     customCase.dataset_config.with_gt = columns[1].checkbox(
-        "with groundtruth",  key=f"{key}_with_gt", value=customCase.dataset_config.with_gt)
+        "with groundtruth", key=f"{key}_with_gt", value=customCase.dataset_config.with_gt
+    )
-    customCase.description = st.text_area(
-        "description", key=f"{key}_description", value=customCase.description)
+    customCase.description = st.text_area("description", key=f"{key}_description", value=customCase.description)

vectordb_bench/frontend/components/custom/displaypPrams.py CHANGED Viewed

@@ -1,5 +1,6 @@
 def displayParams(st):
-    st.markdown("""
+    st.markdown(
+        """
 - `Folder Path` - The path to the folder containing all the files. Please ensure that all files in the folder are in the `Parquet` format.
   - Vectors data files: The file must be named `train.parquet` and should have two columns: `id` as an incrementing `int` and `emb` as an array of `float32`.
   - Query test vectors: The file must be named `test.parquet` and should have two columns: `id` as an incrementing `int` and `emb` as an array of `float32`.
@@ -8,4 +9,5 @@ def displayParams(st):
 - `Train File Count` - If the vector file is too large, you can consider splitting it into multiple files. The naming format for the split files should be `train-[index]-of-[file_count].parquet`. For example, `train-01-of-10.parquet` represents the second file (0-indexed) among 10 split files.
 - `Use Shuffled Data` - If you check this option, the vector data files need to be modified. VectorDBBench will load the data labeled with `shuffle`. For example, use `shuffle_train.parquet` instead of `train.parquet` and `shuffle_train-04-of-10.parquet` instead of `train-04-of-10.parquet`. The `id` column in the shuffled data can be in any order.
-""")
+"""
+    )

vectordb-bench 0.0.19__py3-none-any.whl → 0.0.21__py3-none-any.whl

vectordb-bench 0.0.19py3-none-any.whl → 0.0.21py3-none-any.whl