PyPI - vectordb-bench - Versions diffs - 0.0.30__py3-none-any.whl → 1.0.0__py3-none-any.whl - Mend

vectordb-bench 0.0.30py3-none-any.whl → 1.0.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (76) hide show

vectordb_bench/__init__.py +14 -27
vectordb_bench/backend/assembler.py +19 -6
vectordb_bench/backend/cases.py +186 -23
vectordb_bench/backend/clients/__init__.py +16 -0
vectordb_bench/backend/clients/api.py +22 -1
vectordb_bench/backend/clients/aws_opensearch/aws_opensearch.py +82 -41
vectordb_bench/backend/clients/aws_opensearch/config.py +23 -4
vectordb_bench/backend/clients/chroma/chroma.py +6 -2
vectordb_bench/backend/clients/elastic_cloud/config.py +19 -1
vectordb_bench/backend/clients/elastic_cloud/elastic_cloud.py +133 -45
vectordb_bench/backend/clients/milvus/config.py +1 -0
vectordb_bench/backend/clients/milvus/milvus.py +74 -22
vectordb_bench/backend/clients/oceanbase/cli.py +100 -0
vectordb_bench/backend/clients/oceanbase/config.py +125 -0
vectordb_bench/backend/clients/oceanbase/oceanbase.py +215 -0
vectordb_bench/backend/clients/pinecone/pinecone.py +39 -25
vectordb_bench/backend/clients/qdrant_cloud/config.py +59 -3
vectordb_bench/backend/clients/qdrant_cloud/qdrant_cloud.py +100 -33
vectordb_bench/backend/dataset.py +143 -27
vectordb_bench/backend/filter.py +76 -0
vectordb_bench/backend/runner/__init__.py +3 -3
vectordb_bench/backend/runner/mp_runner.py +52 -39
vectordb_bench/backend/runner/rate_runner.py +68 -52
vectordb_bench/backend/runner/read_write_runner.py +125 -68
vectordb_bench/backend/runner/serial_runner.py +56 -23
vectordb_bench/backend/task_runner.py +48 -20
vectordb_bench/cli/cli.py +59 -1
vectordb_bench/cli/vectordbbench.py +3 -0
vectordb_bench/frontend/components/check_results/data.py +16 -11
vectordb_bench/frontend/components/check_results/filters.py +53 -25
vectordb_bench/frontend/components/check_results/headerIcon.py +16 -13
vectordb_bench/frontend/components/check_results/nav.py +20 -0
vectordb_bench/frontend/components/custom/displayCustomCase.py +43 -8
vectordb_bench/frontend/components/custom/displaypPrams.py +10 -5
vectordb_bench/frontend/components/custom/getCustomConfig.py +10 -0
vectordb_bench/frontend/components/label_filter/charts.py +60 -0
vectordb_bench/frontend/components/run_test/caseSelector.py +48 -52
vectordb_bench/frontend/components/run_test/dbSelector.py +9 -5
vectordb_bench/frontend/components/run_test/inputWidget.py +48 -0
vectordb_bench/frontend/components/run_test/submitTask.py +3 -1
vectordb_bench/frontend/components/streaming/charts.py +253 -0
vectordb_bench/frontend/components/streaming/data.py +62 -0
vectordb_bench/frontend/components/tables/data.py +1 -1
vectordb_bench/frontend/components/welcome/explainPrams.py +66 -0
vectordb_bench/frontend/components/welcome/pagestyle.py +106 -0
vectordb_bench/frontend/components/welcome/welcomePrams.py +147 -0
vectordb_bench/frontend/config/dbCaseConfigs.py +307 -40
vectordb_bench/frontend/config/styles.py +32 -2
vectordb_bench/frontend/pages/concurrent.py +5 -1
vectordb_bench/frontend/pages/custom.py +4 -0
vectordb_bench/frontend/pages/label_filter.py +56 -0
vectordb_bench/frontend/pages/quries_per_dollar.py +5 -1
vectordb_bench/frontend/pages/results.py +60 -0
vectordb_bench/frontend/pages/run_test.py +3 -3
vectordb_bench/frontend/pages/streaming.py +135 -0
vectordb_bench/frontend/pages/tables.py +4 -0
vectordb_bench/frontend/vdb_benchmark.py +16 -41
vectordb_bench/interface.py +6 -2
vectordb_bench/metric.py +15 -1
vectordb_bench/models.py +31 -11
vectordb_bench/results/ElasticCloud/result_20250318_standard_elasticcloud.json +5890 -0
vectordb_bench/results/Milvus/result_20250509_standard_milvus.json +6138 -0
vectordb_bench/results/OpenSearch/result_20250224_standard_opensearch.json +7319 -0
vectordb_bench/results/Pinecone/result_20250124_standard_pinecone.json +2365 -0
vectordb_bench/results/QdrantCloud/result_20250602_standard_qdrantcloud.json +3556 -0
vectordb_bench/results/ZillizCloud/result_20250613_standard_zillizcloud.json +6290 -0
vectordb_bench/results/dbPrices.json +12 -4
{vectordb_bench-0.0.30.dist-info → vectordb_bench-1.0.0.dist-info}/METADATA +85 -32
{vectordb_bench-0.0.30.dist-info → vectordb_bench-1.0.0.dist-info}/RECORD +73 -56
vectordb_bench/results/ZillizCloud/result_20230727_standard_zillizcloud.json +0 -791
vectordb_bench/results/ZillizCloud/result_20230808_standard_zillizcloud.json +0 -679
vectordb_bench/results/ZillizCloud/result_20240105_standard_202401_zillizcloud.json +0 -1352
{vectordb_bench-0.0.30.dist-info → vectordb_bench-1.0.0.dist-info}/WHEEL +0 -0
{vectordb_bench-0.0.30.dist-info → vectordb_bench-1.0.0.dist-info}/entry_points.txt +0 -0
{vectordb_bench-0.0.30.dist-info → vectordb_bench-1.0.0.dist-info}/licenses/LICENSE +0 -0
{vectordb_bench-0.0.30.dist-info → vectordb_bench-1.0.0.dist-info}/top_level.txt +0 -0

vectordb_bench/backend/clients/milvus/milvus.py CHANGED Viewed

@@ -7,6 +7,8 @@ from contextlib import contextmanager
 from pymilvus import Collection, CollectionSchema, DataType, FieldSchema, MilvusException, utility
+from vectordb_bench.backend.filter import Filter, FilterOp
 from ..api import VectorDB
 from .config import MilvusIndexConfig
@@ -16,6 +18,12 @@ MILVUS_LOAD_REQS_SIZE = 1.5 * 1024 * 1024
 class Milvus(VectorDB):
+    supported_filter_types: list[FilterOp] = [
+        FilterOp.NonFilter,
+        FilterOp.NumGE,
+        FilterOp.StrEqual,
+    ]
     def __init__(
         self,
         dim: int,
@@ -24,6 +32,7 @@ class Milvus(VectorDB):
         collection_name: str = "VectorDBBenchCollection",
         drop_old: bool = False,
         name: str = "Milvus",
+        with_scalar_labels: bool = False,
         **kwargs,
     ):
         """Initialize wrapper around the milvus vector database."""
@@ -32,11 +41,15 @@ class Milvus(VectorDB):
         self.case_config = db_case_config
         self.collection_name = collection_name
         self.batch_size = int(MILVUS_LOAD_REQS_SIZE / (dim * 4))
+        self.with_scalar_labels = with_scalar_labels
         self._primary_field = "pk"
-        self._scalar_field = "id"
+        self._scalar_id_field = "id"
+        self._scalar_label_field = "label"
         self._vector_field = "vector"
-        self._index_name = "vector_idx"
+        self._vector_index_name = "vector_idx"
+        self._scalar_id_index_name = "id_sort_idx"
+        self._scalar_labels_index_name = "labels_idx"
         from pymilvus import connections
@@ -53,9 +66,20 @@ class Milvus(VectorDB):
         if not utility.has_collection(self.collection_name):
             fields = [
                 FieldSchema(self._primary_field, DataType.INT64, is_primary=True),
-                FieldSchema(self._scalar_field, DataType.INT64),
+                FieldSchema(self._scalar_id_field, DataType.INT64),
                 FieldSchema(self._vector_field, DataType.FLOAT_VECTOR, dim=dim),
             ]
+            if self.with_scalar_labels:
+                is_partition_key = db_case_config.use_partition_key
+                log.info(f"with_scalar_labels, add a new varchar field, as partition_key: {is_partition_key}")
+                fields.append(
+                    FieldSchema(
+                        self._scalar_label_field,
+                        DataType.VARCHAR,
+                        max_length=256,
+                        is_partition_key=is_partition_key,
+                    )
+                )
             log.info(f"{self.name} create collection: {self.collection_name}")
@@ -67,16 +91,37 @@ class Milvus(VectorDB):
                 num_shards=self.db_config.get("num_shards"),
             )
-            log.info(f"{self.name} create index: index_params: {self.case_config.index_param()}")
-            col.create_index(
-                self._vector_field,
-                self.case_config.index_param(),
-                index_name=self._index_name,
-            )
+            self.create_index()
             col.load()
         connections.disconnect("default")
+    def create_index(self):
+        col = Collection(self.collection_name)
+        # vector index
+        col.create_index(
+            self._vector_field,
+            self.case_config.index_param(),
+            index_name=self._vector_index_name,
+        )
+        # scalar index for range-expr (int-filter)
+        col.create_index(
+            self._scalar_id_field,
+            index_params={
+                "index_type": "STL_SORT",
+            },
+            index_name=self._scalar_id_index_name,
+        )
+        # scalar index for varchar (label-filter)
+        if self.with_scalar_labels:
+            col.create_index(
+                self._scalar_label_field,
+                index_params={
+                    "index_type": "BITMAP",
+                },
+                index_name=self._scalar_labels_index_name,
+            )
     @contextmanager
     def init(self):
         """
@@ -109,17 +154,13 @@ class Milvus(VectorDB):
         try:
             self.col.flush()
             # wait for index done and load refresh
-            self.col.create_index(
-                self._vector_field,
-                self.case_config.index_param(),
-                index_name=self._index_name,
-            )
+            self.create_index()
-            utility.wait_for_index_building_complete(self.collection_name)
+            utility.wait_for_index_building_complete(self.collection_name, index_name=self._vector_index_name)
             def wait_index():
                 while True:
-                    progress = utility.index_building_progress(self.collection_name)
+                    progress = utility.index_building_progress(self.collection_name, index_name=self._vector_index_name)
                     if progress.get("pending_index_rows", -1) == 0:
                         break
                     time.sleep(5)
@@ -162,6 +203,7 @@ class Milvus(VectorDB):
         self,
         embeddings: Iterable[list[float]],
         metadata: list[int],
+        labels_data: list[str] | None = None,
         **kwargs,
     ) -> tuple[int, Exception]:
         """Insert embeddings into Milvus. should call self.init() first"""
@@ -177,32 +219,42 @@ class Milvus(VectorDB):
                     metadata[batch_start_offset:batch_end_offset],
                     embeddings[batch_start_offset:batch_end_offset],
                 ]
+                if self.with_scalar_labels:
+                    insert_data.append(labels_data[batch_start_offset:batch_end_offset])
                 res = self.col.insert(insert_data)
                 insert_count += len(res.primary_keys)
         except MilvusException as e:
             log.info(f"Failed to insert data: {e}")
-            return (insert_count, e)
-        return (insert_count, None)
+            return insert_count, e
+        return insert_count, None
+    def prepare_filter(self, filters: Filter):
+        if filters.type == FilterOp.NonFilter:
+            self.expr = ""
+        elif filters.type == FilterOp.NumGE:
+            self.expr = f"{self._scalar_id_field} >= {filters.int_value}"
+        elif filters.type == FilterOp.StrEqual:
+            self.expr = f"{self._scalar_label_field} == '{filters.label_value}'"
+        else:
+            msg = f"Not support Filter for Milvus - {filters}"
+            raise ValueError(msg)
     def search_embedding(
         self,
         query: list[float],
         k: int = 100,
-        filters: dict | None = None,
         timeout: int | None = None,
     ) -> list[int]:
         """Perform a search on a query embedding and return results."""
         assert self.col is not None
-        expr = f"{self._scalar_field} {filters.get('metadata')}" if filters else ""
         # Perform the search.
         res = self.col.search(
             data=[query],
             anns_field=self._vector_field,
             param=self.case_config.search_param(),
             limit=k,
-            expr=expr,
+            expr=self.expr,
         )
         # Organize results.

vectordb_bench/backend/clients/oceanbase/cli.py ADDED Viewed

@@ -0,0 +1,100 @@
+import os
+from typing import Annotated, Unpack
+import click
+from pydantic import SecretStr
+from vectordb_bench.backend.clients import DB
+from vectordb_bench.cli.cli import (
+    CommonTypedDict,
+    HNSWFlavor4,
+    OceanBaseIVFTypedDict,
+    cli,
+    click_parameter_decorators_from_typed_dict,
+    run,
+)
+from ..api import IndexType
+class OceanBaseTypedDict(CommonTypedDict):
+    host: Annotated[str, click.option("--host", type=str, help="OceanBase host", default="")]
+    user: Annotated[str, click.option("--user", type=str, help="OceanBase username", required=True)]
+    password: Annotated[
+        str,
+        click.option(
+            "--password",
+            type=str,
+            help="OceanBase database password",
+            default=lambda: os.environ.get("OB_PASSWORD", ""),
+        ),
+    ]
+    database: Annotated[str, click.option("--database", type=str, help="DataBase name", required=True)]
+    port: Annotated[int, click.option("--port", type=int, help="OceanBase port", required=True)]
+class OceanBaseHNSWTypedDict(CommonTypedDict, OceanBaseTypedDict, HNSWFlavor4): ...
+@cli.command()
+@click_parameter_decorators_from_typed_dict(OceanBaseHNSWTypedDict)
+def OceanBaseHNSW(**parameters: Unpack[OceanBaseHNSWTypedDict]):
+    from .config import OceanBaseConfig, OceanBaseHNSWConfig
+    run(
+        db=DB.OceanBase,
+        db_config=OceanBaseConfig(
+            db_label=parameters["db_label"],
+            user=SecretStr(parameters["user"]),
+            password=SecretStr(parameters["password"]),
+            host=parameters["host"],
+            port=parameters["port"],
+            database=parameters["database"],
+        ),
+        db_case_config=OceanBaseHNSWConfig(
+            m=parameters["m"],
+            efConstruction=parameters["ef_construction"],
+            ef_search=parameters["ef_search"],
+            index=parameters["index_type"],
+        ),
+        **parameters,
+    )
+class OceanBaseIVFTypedDict(CommonTypedDict, OceanBaseTypedDict, OceanBaseIVFTypedDict): ...
+@cli.command()
+@click_parameter_decorators_from_typed_dict(OceanBaseIVFTypedDict)
+def OceanBaseIVF(**parameters: Unpack[OceanBaseIVFTypedDict]):
+    from .config import OceanBaseConfig, OceanBaseIVFConfig
+    type_str = parameters["index_type"]
+    if type_str == "IVF_FLAT":
+        input_index_type = IndexType.IVFFlat
+    elif type_str == "IVF_PQ":
+        input_index_type = IndexType.IVFPQ
+    elif type_str == "IVF_SQ8":
+        input_index_type = IndexType.IVFSQ8
+    input_m = 0 if parameters["m"] is None else parameters["m"]
+    run(
+        db=DB.OceanBase,
+        db_config=OceanBaseConfig(
+            db_label=parameters["db_label"],
+            user=SecretStr(parameters["user"]),
+            password=SecretStr(parameters["password"]),
+            host=parameters["host"],
+            port=parameters["port"],
+            database=parameters["database"],
+        ),
+        db_case_config=OceanBaseIVFConfig(
+            m=input_m,
+            nlist=parameters["nlist"],
+            sample_per_nlist=parameters["sample_per_nlist"],
+            index=input_index_type,
+            ivf_nprobes=parameters["ivf_nprobes"],
+        ),
+        **parameters,
+    )

vectordb_bench/backend/clients/oceanbase/config.py ADDED Viewed

@@ -0,0 +1,125 @@
+from typing import TypedDict
+from pydantic import BaseModel, SecretStr, validator
+from ..api import DBCaseConfig, DBConfig, IndexType, MetricType
+class OceanBaseConfigDict(TypedDict):
+    user: str
+    host: str
+    port: str
+    password: str
+    database: str
+class OceanBaseConfig(DBConfig):
+    user: SecretStr = SecretStr("root@perf")
+    password: SecretStr
+    host: str
+    port: int
+    database: str
+    def to_dict(self) -> OceanBaseConfigDict:
+        user_str = self.user.get_secret_value()
+        pwd_str = self.password.get_secret_value()
+        return {
+            "user": user_str,
+            "host": self.host,
+            "port": self.port,
+            "password": pwd_str,
+            "database": self.database,
+        }
+    @validator("*")
+    def not_empty_field(cls, v: any, field: any):
+        if field.name in ["password", "host", "db_label"]:
+            return v
+        if isinstance(v, str | SecretStr) and len(v) == 0:
+            raise ValueError("Empty string!")
+        return v
+class OceanBaseIndexConfig(BaseModel):
+    index: IndexType
+    metric_type: MetricType | None = None
+    lib: str = "vsag"
+    def parse_metric(self) -> str:
+        if self.metric_type == MetricType.L2 or (
+            self.index == IndexType.HNSW_BQ and self.metric_type == MetricType.COSINE
+        ):
+            return "l2"
+        if self.metric_type == MetricType.IP:
+            return "inner_product"
+        return "cosine"
+    def parse_metric_func_str(self) -> str:
+        if self.metric_type == MetricType.L2 or (
+            self.index == IndexType.HNSW_BQ and self.metric_type == MetricType.COSINE
+        ):
+            return "l2_distance"
+        if self.metric_type == MetricType.IP:
+            return "negative_inner_product"
+        return "cosine_distance"
+class OceanBaseHNSWConfig(OceanBaseIndexConfig, DBCaseConfig):
+    m: int
+    efConstruction: int
+    ef_search: int | None = None
+    index: IndexType
+    def index_param(self) -> dict:
+        return {
+            "lib": self.lib,
+            "metric_type": self.parse_metric(),
+            "index_type": self.index.value,
+            "params": {"m": self.m, "ef_construction": self.efConstruction},
+        }
+    def search_param(self) -> dict:
+        return {"metric_type": self.parse_metric_func_str(), "params": {"ef_search": self.ef_search}}
+class OceanBaseIVFConfig(OceanBaseIndexConfig, DBCaseConfig):
+    m: int
+    sample_per_nlist: int
+    nlist: int
+    index: IndexType
+    ivf_nprobes: int | None = None
+    def index_param(self) -> dict:
+        if self.index == IndexType.IVFPQ:
+            return {
+                "lib": "OB",
+                "metric_type": self.parse_metric(),
+                "index_type": self.index.value,
+                "params": {
+                    "m": self.M,
+                    "sample_per_nlist": self.sample_per_nlist,
+                    "nlist": self.nlist,
+                },
+            }
+        return {
+            "lib": "OB",
+            "metric_type": self.parse_metric(),
+            "index_type": self.index.value,
+            "params": {
+                "sample_per_nlist": self.sample_per_nlist,
+                "nlist": self.nlist,
+            },
+        }
+    def search_param(self) -> dict:
+        return {"metric_type": self.metric_type, "params": {"ivf_nprobes": self.ivf_nprobes}}
+_oceanbase_case_config = {
+    IndexType.HNSW_SQ: OceanBaseHNSWConfig,
+    IndexType.HNSW: OceanBaseHNSWConfig,
+    IndexType.HNSW_BQ: OceanBaseHNSWConfig,
+    IndexType.IVFFlat: OceanBaseIVFConfig,
+    IndexType.IVFPQ: OceanBaseIVFConfig,
+    IndexType.IVFSQ8: OceanBaseIVFConfig,
+}

vectordb_bench/backend/clients/oceanbase/oceanbase.py ADDED Viewed

@@ -0,0 +1,215 @@
+import logging
+import struct
+import time
+from collections.abc import Generator
+from contextlib import contextmanager
+from typing import Any
+import mysql.connector as mysql
+from ..api import IndexType, VectorDB
+from .config import OceanBaseConfigDict, OceanBaseHNSWConfig
+log = logging.getLogger(__name__)
+OCEANBASE_DEFAULT_LOAD_BATCH_SIZE = 256
+class OceanBase(VectorDB):
+    def __init__(
+        self,
+        dim: int,
+        db_config: OceanBaseConfigDict,
+        db_case_config: OceanBaseHNSWConfig,
+        collection_name: str = "items",
+        drop_old: bool = False,
+        **kwargs,
+    ):
+        self.name = "OceanBase"
+        self.dim = dim
+        self.db_config = db_config
+        self.db_case_config = db_case_config
+        self.table_name = collection_name
+        self.load_batch_size = OCEANBASE_DEFAULT_LOAD_BATCH_SIZE
+        self._index_name = "vidx"
+        self._primary_field = "id"
+        self._vector_field = "embedding"
+        log.info(
+            f"{self.name} initialized with config:\nDatabase: {self.db_config}\nCase Config: {self.db_case_config}"
+        )
+        self._conn = None
+        self._cursor = None
+        try:
+            self._connect()
+            if drop_old:
+                self._drop_table()
+                self._create_table()
+        finally:
+            self._disconnect()
+    def _connect(self):
+        try:
+            self._conn = mysql.connect(
+                host=self.db_config["host"],
+                user=self.db_config["user"],
+                port=self.db_config["port"],
+                password=self.db_config["password"],
+                database=self.db_config["database"],
+            )
+            self._cursor = self._conn.cursor()
+        except mysql.Error:
+            log.exception("Failed to connect to the database")
+            raise
+    def _disconnect(self):
+        if self._cursor:
+            self._cursor.close()
+            self._cursor = None
+        if self._conn:
+            self._conn.close()
+            self._conn = None
+    @contextmanager
+    def init(self) -> Generator[None, None, None]:
+        try:
+            self._connect()
+            self._cursor.execute("SET autocommit=1")
+            if self.db_case_config.index in {IndexType.HNSW, IndexType.HNSW_SQ, IndexType.HNSW_BQ}:
+                self._cursor.execute(
+                    f"SET ob_hnsw_ef_search={(self.db_case_config.search_param())['params']['ef_search']}"
+                )
+            else:
+                self._cursor.execute(
+                    f"SET ob_ivf_nprobes={(self.db_case_config.search_param())['params']['ivf_nprobes']}"
+                )
+            yield
+        finally:
+            self._disconnect()
+    def _drop_table(self):
+        if not self._cursor:
+            raise ValueError("Cursor is not initialized")
+        log.info(f"Dropping table {self.table_name}")
+        self._cursor.execute(f"DROP TABLE IF EXISTS {self.table_name}")
+    def _create_table(self):
+        if not self._cursor:
+            raise ValueError("Cursor is not initialized")
+        log.info(f"Creating table {self.table_name}")
+        create_table_query = f"""
+        CREATE TABLE {self.table_name} (
+            id INT PRIMARY KEY,
+            embedding VECTOR({self.dim})
+        );
+        """
+        self._cursor.execute(create_table_query)
+    def optimize(self, data_size: int):
+        index_params = self.db_case_config.index_param()
+        index_args = ", ".join(f"{k}={v}" for k, v in index_params["params"].items())
+        index_query = (
+            f"CREATE /*+ PARALLEL(18) */ VECTOR INDEX idx1 "
+            f"ON {self.table_name}(embedding) "
+            f"WITH (distance={self.db_case_config.parse_metric()}, "
+            f"type={index_params['index_type']}, lib={index_params['lib']}, {index_args}"
+        )
+        if self.db_case_config.index in {IndexType.HNSW, IndexType.HNSW_SQ, IndexType.HNSW_BQ}:
+            index_query += ", extra_info_max_size=32"
+        index_query += ")"
+        log.info("Create index query: %s", index_query)
+        try:
+            log.info("Creating index...")
+            start_time = time.time()
+            self._cursor.execute(index_query)
+            log.info(f"Index created in {time.time() - start_time:.2f} seconds")
+            log.info("Performing major freeze...")
+            self._cursor.execute("ALTER SYSTEM MAJOR FREEZE;")
+            time.sleep(10)
+            self._wait_for_major_compaction()
+            log.info("Gathering schema statistics...")
+            self._cursor.execute("CALL dbms_stats.gather_schema_stats('test', degree => 96);")
+        except mysql.Error:
+            log.exception("Failed to optimize index")
+            raise
+    def need_normalize_cosine(self) -> bool:
+        if self.db_case_config.index == IndexType.HNSW_BQ:
+            log.info("current HNSW_BQ only supports L2, cosine dataset need normalize.")
+            return True
+        return False
+    def _wait_for_major_compaction(self):
+        while True:
+            self._cursor.execute(
+                "SELECT IF(COUNT(*) = COUNT(STATUS = 'IDLE' OR NULL), 'TRUE', 'FALSE') "
+                "AS all_status_idle FROM oceanbase.DBA_OB_ZONE_MAJOR_COMPACTION;"
+            )
+            all_status_idle = self._cursor.fetchone()[0]
+            if all_status_idle == "TRUE":
+                break
+            time.sleep(10)
+    def insert_embeddings(
+        self,
+        embeddings: list[list[float]],
+        metadata: list[int],
+        **kwargs: Any,
+    ) -> tuple[int, Exception | None]:
+        if not self._cursor:
+            raise ValueError("Cursor is not initialized")
+        insert_count = 0
+        try:
+            for batch_start in range(0, len(embeddings), self.load_batch_size):
+                batch_end = min(batch_start + self.load_batch_size, len(embeddings))
+                batch = [(metadata[i], embeddings[i]) for i in range(batch_start, batch_end)]
+                values = ", ".join(f"({item_id}, '[{','.join(map(str, embedding))}]')" for item_id, embedding in batch)
+                self._cursor.execute(
+                    f"INSERT /*+ ENABLE_PARALLEL_DML PARALLEL(32) */ INTO {self.table_name} VALUES {values}"  # noqa: S608
+                )
+                insert_count += len(batch)
+        except mysql.Error:
+            log.exception("Failed to insert embeddings")
+            raise
+        return insert_count, None
+    def search_embedding(
+        self,
+        query: list[float],
+        k: int = 100,
+        filters: dict[str, Any] | None = None,
+        timeout: int | None = None,
+    ) -> list[int]:
+        if not self._cursor:
+            raise ValueError("Cursor is not initialized")
+        packed = struct.pack(f"<{len(query)}f", *query)
+        hex_vec = packed.hex()
+        filter_clause = f"WHERE id >= {filters['id']}" if filters else ""
+        query_str = (
+            f"SELECT id FROM {self.table_name} "  # noqa: S608
+            f"{filter_clause} ORDER BY "
+            f"{self.db_case_config.parse_metric_func_str()}(embedding, X'{hex_vec}') "
+            f"APPROXIMATE LIMIT {k}"
+        )
+        try:
+            self._cursor.execute(query_str)
+            return [row[0] for row in self._cursor.fetchall()]
+        except mysql.Error:
+            log.exception("Failed to execute search query")
+            raise

vectordb-bench 0.0.30__py3-none-any.whl → 1.0.0__py3-none-any.whl

vectordb-bench 0.0.30py3-none-any.whl → 1.0.0py3-none-any.whl