PyPI - vectordb-bench - Versions diffs - 1.0.4__py3-none-any.whl → 1.0.7__py3-none-any.whl - Mend

vectordb-bench 1.0.4py3-none-any.whl → 1.0.7py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (77) hide show

vectordb_bench/__init__.py +1 -0
vectordb_bench/backend/cases.py +45 -1
vectordb_bench/backend/clients/__init__.py +47 -0
vectordb_bench/backend/clients/api.py +2 -0
vectordb_bench/backend/clients/aws_opensearch/aws_opensearch.py +104 -40
vectordb_bench/backend/clients/aws_opensearch/cli.py +52 -15
vectordb_bench/backend/clients/aws_opensearch/config.py +27 -7
vectordb_bench/backend/clients/hologres/cli.py +50 -0
vectordb_bench/backend/clients/hologres/config.py +121 -0
vectordb_bench/backend/clients/hologres/hologres.py +365 -0
vectordb_bench/backend/clients/lancedb/lancedb.py +1 -0
vectordb_bench/backend/clients/milvus/cli.py +29 -9
vectordb_bench/backend/clients/milvus/config.py +2 -0
vectordb_bench/backend/clients/milvus/milvus.py +1 -1
vectordb_bench/backend/clients/oceanbase/cli.py +1 -0
vectordb_bench/backend/clients/oceanbase/config.py +3 -1
vectordb_bench/backend/clients/oceanbase/oceanbase.py +20 -4
vectordb_bench/backend/clients/oss_opensearch/cli.py +155 -0
vectordb_bench/backend/clients/oss_opensearch/config.py +157 -0
vectordb_bench/backend/clients/oss_opensearch/oss_opensearch.py +582 -0
vectordb_bench/backend/clients/oss_opensearch/run.py +166 -0
vectordb_bench/backend/clients/pgdiskann/cli.py +45 -0
vectordb_bench/backend/clients/pgdiskann/config.py +16 -0
vectordb_bench/backend/clients/pgdiskann/pgdiskann.py +94 -26
vectordb_bench/backend/clients/s3_vectors/config.py +41 -0
vectordb_bench/backend/clients/s3_vectors/s3_vectors.py +171 -0
vectordb_bench/backend/clients/tidb/cli.py +0 -4
vectordb_bench/backend/clients/tidb/config.py +22 -2
vectordb_bench/backend/clients/zilliz_cloud/cli.py +14 -1
vectordb_bench/backend/clients/zilliz_cloud/config.py +4 -1
vectordb_bench/backend/dataset.py +70 -0
vectordb_bench/backend/filter.py +17 -0
vectordb_bench/backend/runner/mp_runner.py +4 -0
vectordb_bench/backend/runner/rate_runner.py +23 -11
vectordb_bench/backend/runner/read_write_runner.py +10 -9
vectordb_bench/backend/runner/serial_runner.py +23 -7
vectordb_bench/backend/task_runner.py +5 -4
vectordb_bench/cli/cli.py +36 -0
vectordb_bench/cli/vectordbbench.py +4 -0
vectordb_bench/fig/custom_case_run_test.png +0 -0
vectordb_bench/fig/custom_dataset.png +0 -0
vectordb_bench/fig/homepage/bar-chart.png +0 -0
vectordb_bench/fig/homepage/concurrent.png +0 -0
vectordb_bench/fig/homepage/custom.png +0 -0
vectordb_bench/fig/homepage/label_filter.png +0 -0
vectordb_bench/fig/homepage/qp$.png +0 -0
vectordb_bench/fig/homepage/run_test.png +0 -0
vectordb_bench/fig/homepage/streaming.png +0 -0
vectordb_bench/fig/homepage/table.png +0 -0
vectordb_bench/fig/run_test_select_case.png +0 -0
vectordb_bench/fig/run_test_select_db.png +0 -0
vectordb_bench/fig/run_test_submit.png +0 -0
vectordb_bench/frontend/components/check_results/filters.py +1 -4
vectordb_bench/frontend/components/check_results/nav.py +2 -1
vectordb_bench/frontend/components/concurrent/charts.py +5 -0
vectordb_bench/frontend/components/int_filter/charts.py +60 -0
vectordb_bench/frontend/components/streaming/data.py +7 -0
vectordb_bench/frontend/components/welcome/welcomePrams.py +42 -4
vectordb_bench/frontend/config/dbCaseConfigs.py +142 -16
vectordb_bench/frontend/config/styles.py +4 -0
vectordb_bench/frontend/pages/concurrent.py +1 -1
vectordb_bench/frontend/pages/custom.py +1 -1
vectordb_bench/frontend/pages/int_filter.py +56 -0
vectordb_bench/frontend/pages/streaming.py +16 -3
vectordb_bench/interface.py +5 -1
vectordb_bench/metric.py +7 -0
vectordb_bench/models.py +39 -4
vectordb_bench/results/S3Vectors/result_20250722_standard_s3vectors.json +2509 -0
vectordb_bench/results/getLeaderboardDataV2.py +23 -2
vectordb_bench/results/leaderboard_v2.json +200 -0
vectordb_bench/results/leaderboard_v2_streaming.json +128 -0
{vectordb_bench-1.0.4.dist-info → vectordb_bench-1.0.7.dist-info}/METADATA +40 -8
{vectordb_bench-1.0.4.dist-info → vectordb_bench-1.0.7.dist-info}/RECORD +77 -51
{vectordb_bench-1.0.4.dist-info → vectordb_bench-1.0.7.dist-info}/WHEEL +0 -0
{vectordb_bench-1.0.4.dist-info → vectordb_bench-1.0.7.dist-info}/entry_points.txt +0 -0
{vectordb_bench-1.0.4.dist-info → vectordb_bench-1.0.7.dist-info}/licenses/LICENSE +0 -0
{vectordb_bench-1.0.4.dist-info → vectordb_bench-1.0.7.dist-info}/top_level.txt +0 -0

vectordb_bench/backend/clients/oceanbase/config.py CHANGED Viewed

@@ -85,6 +85,7 @@ class OceanBaseHNSWConfig(OceanBaseIndexConfig, DBCaseConfig):
 class OceanBaseIVFConfig(OceanBaseIndexConfig, DBCaseConfig):
     m: int
     sample_per_nlist: int
+    nbits: int | None = None
     nlist: int
     index: IndexType
     ivf_nprobes: int | None = None
@@ -96,8 +97,9 @@ class OceanBaseIVFConfig(OceanBaseIndexConfig, DBCaseConfig):
                 "metric_type": self.parse_metric(),
                 "index_type": self.index.value,
                 "params": {
-                    "m": self.M,
+                    "m": self.m,
                     "sample_per_nlist": self.sample_per_nlist,
+                    "nbits": self.nbits,
                     "nlist": self.nlist,
                 },
             }

vectordb_bench/backend/clients/oceanbase/oceanbase.py CHANGED Viewed

@@ -7,6 +7,8 @@ from typing import Any
 import mysql.connector as mysql
+from vectordb_bench.backend.filter import Filter, FilterOp
 from ..api import IndexType, VectorDB
 from .config import OceanBaseConfigDict, OceanBaseHNSWConfig
@@ -16,6 +18,12 @@ OCEANBASE_DEFAULT_LOAD_BATCH_SIZE = 256
 class OceanBase(VectorDB):
+    supported_filter_types: list[FilterOp] = [
+        FilterOp.NonFilter,
+        FilterOp.NumGE,
+        FilterOp.StrEqual,
+    ]
     def __init__(
         self,
         dim: int,
@@ -187,22 +195,30 @@ class OceanBase(VectorDB):
         return insert_count, None
+    def prepare_filter(self, filters: Filter):
+        if filters.type == FilterOp.NonFilter:
+            self.expr = ""
+        elif filters.type == FilterOp.NumGE:
+            self.expr = f"WHERE id >= {filters.int_value}"
+        elif filters.type == FilterOp.StrEqual:
+            self.expr = f"WHERE id == '{filters.label_value}'"
+        else:
+            msg = f"Not support Filter for Oceanbase - {filters}"
+            raise ValueError(msg)
     def search_embedding(
         self,
         query: list[float],
         k: int = 100,
-        filters: dict[str, Any] | None = None,
-        timeout: int | None = None,
     ) -> list[int]:
         if not self._cursor:
             raise ValueError("Cursor is not initialized")
         packed = struct.pack(f"<{len(query)}f", *query)
         hex_vec = packed.hex()
-        filter_clause = f"WHERE id >= {filters['id']}" if filters else ""
         query_str = (
             f"SELECT id FROM {self.table_name} "  # noqa: S608
-            f"{filter_clause} ORDER BY "
+            f"{self.expr} ORDER BY "
             f"{self.db_case_config.parse_metric_func_str()}(embedding, X'{hex_vec}') "
             f"APPROXIMATE LIMIT {k}"
         )

vectordb_bench/backend/clients/oss_opensearch/cli.py ADDED Viewed

@@ -0,0 +1,155 @@
+import logging
+from typing import Annotated, TypedDict, Unpack
+import click
+from pydantic import SecretStr
+from ....cli.cli import (
+    CommonTypedDict,
+    HNSWFlavor1,
+    cli,
+    click_parameter_decorators_from_typed_dict,
+    run,
+)
+from .. import DB
+from .config import OSSOpenSearchQuantization, OSSOS_Engine
+log = logging.getLogger(__name__)
+class OSSOpenSearchTypedDict(TypedDict):
+    host: Annotated[str, click.option("--host", type=str, help="Db host", required=True)]
+    port: Annotated[int, click.option("--port", type=int, default=80, help="Db Port")]
+    user: Annotated[str, click.option("--user", type=str, help="Db User")]
+    password: Annotated[str, click.option("--password", type=str, help="Db password")]
+    number_of_shards: Annotated[
+        int,
+        click.option("--number-of-shards", type=int, help="Number of primary shards for the index", default=1),
+    ]
+    number_of_replicas: Annotated[
+        int,
+        click.option(
+            "--number-of-replicas", type=int, help="Number of replica copies for each primary shard", default=1
+        ),
+    ]
+    index_thread_qty: Annotated[
+        int,
+        click.option(
+            "--index-thread-qty",
+            type=int,
+            help="Thread count for native engine indexing",
+            default=4,
+        ),
+    ]
+    engine: Annotated[
+        str,
+        click.option(
+            "--engine",
+            type=click.Choice(["nmslib", "faiss", "lucene"], case_sensitive=False),
+            help="HNSW algorithm implementation to use",
+            default="faiss",
+        ),
+    ]
+    metric_type: Annotated[
+        str,
+        click.option(
+            "--metric-type",
+            type=click.Choice(["l2", "cosine", "ip"], case_sensitive=False),
+            help="Distance metric type for vector similarity",
+            default="l2",
+        ),
+    ]
+    number_of_segments: Annotated[
+        int,
+        click.option("--number-of-segments", type=int, help="Target number of segments after merging", default=1),
+    ]
+    refresh_interval: Annotated[
+        str,
+        click.option(
+            "--refresh-interval", type=str, help="How often to make new data available for search", default="60s"
+        ),
+    ]
+    force_merge_enabled: Annotated[
+        bool,
+        click.option("--force-merge-enabled", type=bool, help="Whether to perform force merge operation", default=True),
+    ]
+    flush_threshold_size: Annotated[
+        str,
+        click.option(
+            "--flush-threshold-size", type=str, help="Size threshold for flushing the transaction log", default="5120mb"
+        ),
+    ]
+    cb_threshold: Annotated[
+        str,
+        click.option(
+            "--cb-threshold",
+            type=str,
+            help="k-NN Memory circuit breaker threshold",
+            default="50%",
+        ),
+    ]
+    quantization_type: Annotated[
+        str | None,
+        click.option(
+            "--quantization-type",
+            type=click.Choice(["fp32", "fp16"]),
+            help="quantization type for vectors (in index)",
+            default="fp32",
+            required=False,
+        ),
+    ]
+    engine: Annotated[
+        str | None,
+        click.option(
+            "--engine",
+            type=click.Choice(["faiss", "lucene"]),
+            help="quantization type for vectors (in index)",
+            default="faiss",
+            required=False,
+        ),
+    ]
+class OSSOpenSearchHNSWTypedDict(CommonTypedDict, OSSOpenSearchTypedDict, HNSWFlavor1): ...
+@cli.command()
+@click_parameter_decorators_from_typed_dict(OSSOpenSearchHNSWTypedDict)
+def OSSOpenSearch(**parameters: Unpack[OSSOpenSearchHNSWTypedDict]):
+    from .config import OSSOpenSearchConfig, OSSOpenSearchIndexConfig
+    run(
+        db=DB.OSSOpenSearch,
+        db_config=OSSOpenSearchConfig(
+            host=parameters["host"],
+            port=parameters["port"],
+            user=parameters["user"],
+            password=SecretStr(parameters["password"]),
+        ),
+        db_case_config=OSSOpenSearchIndexConfig(
+            number_of_shards=parameters["number_of_shards"],
+            number_of_replicas=parameters["number_of_replicas"],
+            index_thread_qty=parameters["index_thread_qty"],
+            number_of_segments=parameters["number_of_segments"],
+            refresh_interval=parameters["refresh_interval"],
+            force_merge_enabled=parameters["force_merge_enabled"],
+            flush_threshold_size=parameters["flush_threshold_size"],
+            index_thread_qty_during_force_merge=parameters["index_thread_qty_during_force_merge"],
+            cb_threshold=parameters["cb_threshold"],
+            efConstruction=parameters["ef_construction"],
+            efSearch=parameters["ef_runtime"],
+            M=parameters["m"],
+            engine=OSSOS_Engine(parameters["engine"]),
+            quantization_type=OSSOpenSearchQuantization(parameters["quantization_type"]),
+        ),
+        **parameters,
+    )

vectordb_bench/backend/clients/oss_opensearch/config.py ADDED Viewed

@@ -0,0 +1,157 @@
+import logging
+from enum import Enum
+from pydantic import BaseModel, SecretStr, root_validator, validator
+from ..api import DBCaseConfig, DBConfig, MetricType
+log = logging.getLogger(__name__)
+class OSSOpenSearchConfig(DBConfig, BaseModel):
+    host: str = ""
+    port: int = 80
+    user: str | None = None
+    password: SecretStr | None = None
+    def to_dict(self) -> dict:
+        use_ssl = self.port == 443
+        http_auth = (
+            (self.user, self.password.get_secret_value())
+            if self.user is not None and self.password is not None and len(self.user) != 0 and len(self.password) != 0
+            else ()
+        )
+        return {
+            "hosts": [{"host": self.host, "port": self.port}],
+            "http_auth": http_auth,
+            "use_ssl": use_ssl,
+            "http_compress": True,
+            "verify_certs": use_ssl,
+            "ssl_assert_hostname": False,
+            "ssl_show_warn": False,
+            "timeout": 600,
+        }
+    @validator("*")
+    def not_empty_field(cls, v: any, field: any):
+        if (
+            field.name in cls.common_short_configs()
+            or field.name in cls.common_long_configs()
+            or field.name in ["user", "password", "host"]
+        ):
+            return v
+        if isinstance(v, str | SecretStr) and len(v) == 0:
+            raise ValueError("Empty string!")
+        return v
+class OSSOS_Engine(Enum):
+    faiss = "faiss"
+    lucene = "lucene"
+class OSSOpenSearchQuantization(Enum):
+    fp32 = "fp32"
+    fp16 = "fp16"
+class OSSOpenSearchIndexConfig(BaseModel, DBCaseConfig):
+    metric_type: MetricType = MetricType.L2
+    engine: OSSOS_Engine = OSSOS_Engine.faiss
+    efConstruction: int = 256
+    efSearch: int = 100
+    engine_name: str | None = None
+    metric_type_name: str | None = None
+    M: int = 16
+    index_thread_qty: int | None = 4
+    number_of_shards: int | None = 1
+    number_of_replicas: int | None = 0
+    number_of_segments: int | None = 1
+    refresh_interval: str | None = "60s"
+    force_merge_enabled: bool | None = True
+    flush_threshold_size: str | None = "5120mb"
+    index_thread_qty_during_force_merge: int = 8
+    cb_threshold: str | None = "50%"
+    number_of_indexing_clients: int | None = 1
+    use_routing: bool = False  # for label-filter cases
+    oversample_factor: float = 1.0
+    quantization_type: OSSOpenSearchQuantization = OSSOpenSearchQuantization.fp32
+    @root_validator
+    def validate_engine_name(cls, values: dict):
+        """Map engine_name string from UI to engine enum"""
+        if values.get("engine_name"):
+            engine_name = values["engine_name"].lower()
+            if engine_name == "faiss":
+                values["engine"] = OSSOS_Engine.faiss
+            elif engine_name == "lucene":
+                values["engine"] = OSSOS_Engine.lucene
+            else:
+                log.warning(f"Unknown engine_name: {engine_name}, defaulting to faiss")
+                values["engine"] = OSSOS_Engine.faiss
+        return values
+    def __eq__(self, obj: any):
+        return (
+            self.engine == obj.engine
+            and self.M == obj.M
+            and self.efConstruction == obj.efConstruction
+            and self.number_of_shards == obj.number_of_shards
+            and self.number_of_replicas == obj.number_of_replicas
+            and self.number_of_segments == obj.number_of_segments
+            and self.use_routing == obj.use_routing
+            and self.quantization_type == obj.quantization_type
+        )
+    def __hash__(self) -> int:
+        return hash(
+            (
+                self.engine,
+                self.M,
+                self.efConstruction,
+                self.number_of_shards,
+                self.number_of_replicas,
+                self.number_of_segments,
+                self.use_routing,
+                self.quantization_type,
+            )
+        )
+    def parse_metric(self) -> str:
+        log.info(f"User specified metric_type: {self.metric_type_name}")
+        self.metric_type = MetricType[self.metric_type_name.upper()]
+        if self.metric_type == MetricType.IP:
+            return "innerproduct"
+        if self.metric_type == MetricType.COSINE:
+            return "cosinesimil"
+        if self.metric_type == MetricType.L2:
+            log.info("Using l2 as specified by user")
+            return "l2"
+        return "l2"
+    @property
+    def use_quant(self) -> bool:
+        return self.quantization_type is not OSSOpenSearchQuantization.fp32
+    def index_param(self) -> dict:
+        log.info(f"Using engine: {self.engine} for index creation")
+        log.info(f"Using metric_type: {self.metric_type_name} for index creation")
+        log.info(f"Resulting space_type: {self.parse_metric()} for index creation")
+        return {
+            "name": "hnsw",
+            "engine": self.engine.value,
+            "space_type": self.parse_metric(),
+            "parameters": {
+                "ef_construction": self.efConstruction,
+                "m": self.M,
+                **(
+                    {"encoder": {"name": "sq", "parameters": {"type": self.quantization_type.value}}}
+                    if self.use_quant
+                    else {}
+                ),
+            },
+        }
+    def search_param(self) -> dict:
+        return {"ef_search": self.efSearch}

vectordb-bench 1.0.4__py3-none-any.whl → 1.0.7__py3-none-any.whl

vectordb-bench 1.0.4py3-none-any.whl → 1.0.7py3-none-any.whl