PyPI - vectordb-bench - Versions diffs - 0.0.29__py3-none-any.whl → 0.0.30__py3-none-any.whl - Mend

vectordb-bench 0.0.29py3-none-any.whl → 0.0.30py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (28) hide show

vectordb_bench/backend/clients/__init__.py CHANGED Viewed

@@ -27,6 +27,7 @@ class DB(Enum):
     Pinecone = "Pinecone"
     ElasticCloud = "ElasticCloud"
     QdrantCloud = "QdrantCloud"
+    QdrantLocal = "QdrantLocal"
     WeaviateCloud = "WeaviateCloud"
     PgVector = "PgVector"
     PgVectoRS = "PgVectoRS"
@@ -75,6 +76,11 @@ class DB(Enum):
             return QdrantCloud
+        if self == DB.QdrantLocal:
+            from .qdrant_local.qdrant_local import QdrantLocal
+            return QdrantLocal
         if self == DB.WeaviateCloud:
             from .weaviate_cloud.weaviate_cloud import WeaviateCloud
@@ -201,6 +207,11 @@ class DB(Enum):
             return QdrantConfig
+        if self == DB.QdrantLocal:
+            from .qdrant_local.config import QdrantLocalConfig
+            return QdrantLocalConfig
         if self == DB.WeaviateCloud:
             from .weaviate_cloud.config import WeaviateConfig
@@ -323,6 +334,11 @@ class DB(Enum):
             return QdrantIndexConfig
+        if self == DB.QdrantLocal:
+            from .qdrant_local.config import QdrantLocalIndexConfig
+            return QdrantLocalIndexConfig
         if self == DB.WeaviateCloud:
             from .weaviate_cloud.config import WeaviateIndexConfig

vectordb_bench/backend/clients/aws_opensearch/aws_opensearch.py CHANGED Viewed

@@ -36,6 +36,7 @@ class AWSOpenSearch(VectorDB):
         self.vector_col_name = vector_col_name
         log.info(f"AWS_OpenSearch client config: {self.db_config}")
+        log.info(f"AWS_OpenSearch db case config : {self.case_config}")
         client = OpenSearch(**self.db_config)
         if drop_old:
             log.info(f"AWS_OpenSearch client drop old index: {self.index_name}")
@@ -43,6 +44,14 @@ class AWSOpenSearch(VectorDB):
             if is_existed:
                 client.indices.delete(index=self.index_name)
             self._create_index(client)
+        else:
+            is_existed = client.indices.exists(index=self.index_name)
+            if not is_existed:
+                self._create_index(client)
+                log.info(f"AWS_OpenSearch client create index: {self.index_name}")
+            self._update_ef_search_before_search(client)
+            self._load_graphs_to_memory(client)
     @classmethod
     def config_cls(cls) -> AWSOpenSearchConfig:
@@ -52,7 +61,17 @@ class AWSOpenSearch(VectorDB):
     def case_config_cls(cls, index_type: IndexType | None = None) -> AWSOpenSearchIndexConfig:
         return AWSOpenSearchIndexConfig
-    def _create_index(self, client: OpenSearch):
+    def _create_index(self, client: OpenSearch) -> None:
+        ef_search_value = (
+            self.case_config.ef_search if self.case_config.ef_search is not None else self.case_config.efSearch
+        )
+        log.info(f"Creating index with ef_search: {ef_search_value}")
+        log.info(f"Creating index with number_of_replicas: {self.case_config.number_of_replicas}")
+        log.info(f"Creating index with engine: {self.case_config.engine}")
+        log.info(f"Creating index with metric type: {self.case_config.metric_type_name}")
+        log.info(f"All case_config parameters: {self.case_config.__dict__}")
         cluster_settings_body = {
             "persistent": {
                 "knn.algo_param.index_thread_qty": self.case_config.index_thread_qty,
@@ -64,18 +83,15 @@ class AWSOpenSearch(VectorDB):
             "index": {
                 "knn": True,
                 "number_of_shards": self.case_config.number_of_shards,
-                "number_of_replicas": 0,
+                "number_of_replicas": self.case_config.number_of_replicas,
                 "translog.flush_threshold_size": self.case_config.flush_threshold_size,
-                # Setting trans log threshold to 5GB
-                **(
-                    {"knn.algo_param.ef_search": self.case_config.ef_search}
-                    if self.case_config.engine == AWSOS_Engine.nmslib
-                    else {}
-                ),
+                "knn.advanced.approximate_threshold": "-1",
             },
             "refresh_interval": self.case_config.refresh_interval,
         }
+        settings["index"]["knn.algo_param.ef_search"] = ef_search_value
         mappings = {
+            "_source": {"excludes": [self.vector_col_name], "recovery_source_excludes": [self.vector_col_name]},
             "properties": {
                 **{categoryCol: {"type": "keyword"} for categoryCol in self.category_col_names},
                 self.vector_col_name: {
@@ -86,6 +102,8 @@ class AWSOpenSearch(VectorDB):
             },
         }
         try:
+            log.info(f"Creating index with settings: {settings}")
+            log.info(f"Creating index with mappings: {mappings}")
             client.indices.create(
                 index=self.index_name,
                 body={"settings": settings, "mappings": mappings},
@@ -112,6 +130,18 @@ class AWSOpenSearch(VectorDB):
         """Insert the embeddings to the opensearch."""
         assert self.client is not None, "should self.init() first"
+        num_clients = self.case_config.number_of_indexing_clients or 1
+        log.info(f"Number of indexing clients from case_config: {num_clients}")
+        if num_clients <= 1:
+            log.info("Using single client for data insertion")
+            return self._insert_with_single_client(embeddings, metadata)
+        log.info(f"Using {num_clients} parallel clients for data insertion")
+        return self._insert_with_multiple_clients(embeddings, metadata, num_clients)
+    def _insert_with_single_client(
+        self, embeddings: Iterable[list[float]], metadata: list[int]
+    ) -> tuple[int, Exception]:
         insert_data = []
         for i in range(len(embeddings)):
             insert_data.append(
@@ -129,7 +159,108 @@ class AWSOpenSearch(VectorDB):
         except Exception as e:
             log.warning(f"Failed to insert data: {self.index_name} error: {e!s}")
             time.sleep(10)
-            return self.insert_embeddings(embeddings, metadata)
+            return self._insert_with_single_client(embeddings, metadata)
+    def _insert_with_multiple_clients(
+        self, embeddings: Iterable[list[float]], metadata: list[int], num_clients: int
+    ) -> tuple[int, Exception]:
+        import concurrent.futures
+        from concurrent.futures import ThreadPoolExecutor
+        embeddings_list = list(embeddings)
+        chunk_size = max(1, len(embeddings_list) // num_clients)
+        chunks = []
+        for i in range(0, len(embeddings_list), chunk_size):
+            end = min(i + chunk_size, len(embeddings_list))
+            chunks.append((embeddings_list[i:end], metadata[i:end]))
+        clients = []
+        for _ in range(min(num_clients, len(chunks))):
+            client = OpenSearch(**self.db_config)
+            clients.append(client)
+        log.info(f"AWS_OpenSearch using {len(clients)} parallel clients for data insertion")
+        def insert_chunk(client_idx: int, chunk_idx: int):
+            chunk_embeddings, chunk_metadata = chunks[chunk_idx]
+            client = clients[client_idx]
+            insert_data = []
+            for i in range(len(chunk_embeddings)):
+                insert_data.append(
+                    {"index": {"_index": self.index_name, self.id_col_name: chunk_metadata[i]}},
+                )
+                insert_data.append({self.vector_col_name: chunk_embeddings[i]})
+            try:
+                resp = client.bulk(insert_data)
+                log.info(f"Client {client_idx} added {len(resp['items'])} documents")
+                return len(chunk_embeddings), None
+            except Exception as e:
+                log.warning(f"Client {client_idx} failed to insert data: {e!s}")
+                return 0, e
+        results = []
+        with ThreadPoolExecutor(max_workers=len(clients)) as executor:
+            futures = []
+            for chunk_idx in range(len(chunks)):
+                client_idx = chunk_idx % len(clients)
+                futures.append(executor.submit(insert_chunk, client_idx, chunk_idx))
+            for future in concurrent.futures.as_completed(futures):
+                count, error = future.result()
+                results.append((count, error))
+        from contextlib import suppress
+        for client in clients:
+            with suppress(Exception):
+                client.close()
+        total_count = sum(count for count, _ in results)
+        errors = [error for _, error in results if error is not None]
+        if errors:
+            log.warning("Some clients failed to insert data, retrying with single client")
+            time.sleep(10)
+            return self._insert_with_single_client(embeddings, metadata)
+        resp = self.client.indices.stats(self.index_name)
+        log.info(
+            f"""Total document count in index after parallel insertion:
+                {resp['_all']['primaries']['indexing']['index_total']}""",
+        )
+        return (total_count, None)
+    def _update_ef_search_before_search(self, client: OpenSearch):
+        ef_search_value = (
+            self.case_config.ef_search if self.case_config.ef_search is not None else self.case_config.efSearch
+        )
+        try:
+            index_settings = client.indices.get_settings(index=self.index_name)
+            current_ef_search = (
+                index_settings.get(self.index_name, {})
+                .get("settings", {})
+                .get("index", {})
+                .get("knn.algo_param", {})
+                .get("ef_search")
+            )
+            if current_ef_search != str(ef_search_value):
+                log.info(f"Updating ef_search before search from {current_ef_search} to {ef_search_value}")
+                settings_body = {"index": {"knn.algo_param.ef_search": ef_search_value}}
+                client.indices.put_settings(index=self.index_name, body=settings_body)
+                log.info(f"Successfully updated ef_search to {ef_search_value} before search")
+            log.info(f"Current engine: {self.case_config.engine}")
+            log.info(f"Current metric_type: {self.case_config.metric_type_name}")
+        except Exception as e:
+            log.warning(f"Failed to update ef_search parameter before search: {e}")
     def search_embedding(
         self,
@@ -151,9 +282,18 @@ class AWSOpenSearch(VectorDB):
         body = {
             "size": k,
-            "query": {"knn": {self.vector_col_name: {"vector": query, "k": k}}},
+            "query": {
+                "knn": {
+                    self.vector_col_name: {
+                        "vector": query,
+                        "k": k,
+                        "method_parameters": {"ef_search": self.case_config.efSearch},
+                    }
+                }
+            },
             **({"filter": {"range": {self.id_col_name: {"gt": filters["id"]}}}} if filters else {}),
         }
         try:
             resp = self.client.search(
                 index=self.index_name,
@@ -162,6 +302,7 @@ class AWSOpenSearch(VectorDB):
                 _source=False,
                 docvalue_fields=[self.id_col_name],
                 stored_fields="_none_",
+                preference="_only_local" if self.case_config.number_of_shards == 1 else None,
             )
             log.debug(f"Search took: {resp['took']}")
             log.debug(f"Search shards: {resp['_shards']}")
@@ -173,6 +314,7 @@ class AWSOpenSearch(VectorDB):
     def optimize(self, data_size: int | None = None):
         """optimize will be called between insertion and search in performance cases."""
+        self._update_ef_search()
         # Call refresh first to ensure that all segments are created
         self._refresh_index()
         if self.case_config.force_merge_enabled:
@@ -182,7 +324,22 @@ class AWSOpenSearch(VectorDB):
         # Call refresh again to ensure that the index is ready after force merge.
         self._refresh_index()
         # ensure that all graphs are loaded in memory and ready for search
-        self._load_graphs_to_memory()
+        self._load_graphs_to_memory(self.client)
+    def _update_ef_search(self):
+        ef_search_value = (
+            self.case_config.ef_search if self.case_config.ef_search is not None else self.case_config.efSearch
+        )
+        log.info(f"Updating ef_search parameter to: {ef_search_value}")
+        settings_body = {"index": {"knn.algo_param.ef_search": ef_search_value}}
+        try:
+            self.client.indices.put_settings(index=self.index_name, body=settings_body)
+            log.info(f"Successfully updated ef_search to {ef_search_value}")
+            log.info(f"Current engine: {self.case_config.engine}")
+            log.info(f"Current metric_type: {self.case_config.metric_type}")
+        except Exception as e:
+            log.warning(f"Failed to update ef_search parameter: {e}")
     def _update_replicas(self):
         index_settings = self.client.indices.get_settings(index=self.index_name)
@@ -200,7 +357,7 @@ class AWSOpenSearch(VectorDB):
         while True:
             res = self.client.cat.indices(index=self.index_name, h="health", format="json")
             health = res[0]["health"]
-            if health != "green":
+            if health == "green":
                 break
             log.info(f"The index {self.index_name} has health : {health} and is not green. Retrying")
             time.sleep(SECONDS_WAITING_FOR_REPLICAS_TO_BE_ENABLED_SEC)
@@ -228,8 +385,16 @@ class AWSOpenSearch(VectorDB):
             "persistent": {"knn.algo_param.index_thread_qty": self.case_config.index_thread_qty_during_force_merge}
         }
         self.client.cluster.put_settings(cluster_settings_body)
+        log.info("Updating the graph threshold to ensure that during merge we can do graph creation.")
+        output = self.client.indices.put_settings(
+            index=self.index_name, body={"index.knn.advanced.approximate_threshold": "0"}
+        )
+        log.info(f"response of updating setting is: {output}")
         log.debug(f"Starting force merge for index {self.index_name}")
-        force_merge_endpoint = f"/{self.index_name}/_forcemerge?max_num_segments=1&wait_for_completion=false"
+        segments = self.case_config.number_of_segments
+        force_merge_endpoint = f"/{self.index_name}/_forcemerge?max_num_segments={segments}&wait_for_completion=false"
         force_merge_task_id = self.client.transport.perform_request("POST", force_merge_endpoint)["task"]
         while True:
             time.sleep(WAITING_FOR_FORCE_MERGE_SEC)
@@ -238,8 +403,8 @@ class AWSOpenSearch(VectorDB):
                 break
         log.debug(f"Completed force merge for index {self.index_name}")
-    def _load_graphs_to_memory(self):
+    def _load_graphs_to_memory(self, client: OpenSearch):
         if self.case_config.engine != AWSOS_Engine.lucene:
             log.info("Calling warmup API to load graphs into memory")
             warmup_endpoint = f"/_plugins/_knn/warmup/{self.index_name}"
-            self.client.transport.perform_request("GET", warmup_endpoint)
+            client.transport.perform_request("GET", warmup_endpoint)

vectordb_bench/backend/clients/aws_opensearch/cli.py CHANGED Viewed

@@ -1,3 +1,4 @@
+import logging
 from typing import Annotated, TypedDict, Unpack
 import click
@@ -5,18 +6,21 @@ from pydantic import SecretStr
 from ....cli.cli import (
     CommonTypedDict,
-    HNSWFlavor2,
+    HNSWFlavor1,
     cli,
     click_parameter_decorators_from_typed_dict,
     run,
 )
 from .. import DB
+from .config import AWSOS_Engine, AWSOSQuantization
+log = logging.getLogger(__name__)
 class AWSOpenSearchTypedDict(TypedDict):
     host: Annotated[str, click.option("--host", type=str, help="Db host", required=True)]
-    port: Annotated[int, click.option("--port", type=int, default=443, help="Db Port")]
-    user: Annotated[str, click.option("--user", type=str, default="admin", help="Db User")]
+    port: Annotated[int, click.option("--port", type=int, default=80, help="Db Port")]
+    user: Annotated[str, click.option("--user", type=str, help="Db User")]
     password: Annotated[str, click.option("--password", type=str, help="Db password")]
     number_of_shards: Annotated[
         int,
@@ -38,23 +42,23 @@ class AWSOpenSearchTypedDict(TypedDict):
         ),
     ]
-    index_thread_qty_during_force_merge: Annotated[
-        int,
+    engine: Annotated[
+        str,
         click.option(
-            "--index-thread-qty-during-force-merge",
-            type=int,
-            help="Thread count during force merge operations",
-            default=4,
+            "--engine",
+            type=click.Choice(["nmslib", "faiss", "lucene"], case_sensitive=False),
+            help="HNSW algorithm implementation to use",
+            default="faiss",
         ),
     ]
-    number_of_indexing_clients: Annotated[
-        int,
+    metric_type: Annotated[
+        str,
         click.option(
-            "--number-of-indexing-clients",
-            type=int,
-            help="Number of concurrent indexing clients",
-            default=1,
+            "--metric-type",
+            type=click.Choice(["l2", "cosine", "ip"], case_sensitive=False),
+            help="Distance metric type for vector similarity",
+            default="l2",
         ),
     ]
@@ -64,26 +68,26 @@ class AWSOpenSearchTypedDict(TypedDict):
     ]
     refresh_interval: Annotated[
-        int,
+        str,
         click.option(
             "--refresh-interval", type=str, help="How often to make new data available for search", default="60s"
         ),
     ]
     force_merge_enabled: Annotated[
-        int,
+        bool,
         click.option("--force-merge-enabled", type=bool, help="Whether to perform force merge operation", default=True),
     ]
     flush_threshold_size: Annotated[
-        int,
+        str,
         click.option(
             "--flush-threshold-size", type=str, help="Size threshold for flushing the transaction log", default="5120mb"
         ),
     ]
     cb_threshold: Annotated[
-        int,
+        str,
         click.option(
             "--cb-threshold",
             type=str,
@@ -92,8 +96,30 @@ class AWSOpenSearchTypedDict(TypedDict):
         ),
     ]
+    quantization_type: Annotated[
+        str | None,
+        click.option(
+            "--quantization-type",
+            type=click.Choice(["fp32", "fp16"]),
+            help="quantization type for vectors (in index)",
+            default="fp32",
+            required=False,
+        ),
+    ]
+    engine: Annotated[
+        str | None,
+        click.option(
+            "--engine",
+            type=click.Choice(["faiss", "lucene"]),
+            help="quantization type for vectors (in index)",
+            default="faiss",
+            required=False,
+        ),
+    ]
-class AWSOpenSearchHNSWTypedDict(CommonTypedDict, AWSOpenSearchTypedDict, HNSWFlavor2): ...
+class AWSOpenSearchHNSWTypedDict(CommonTypedDict, AWSOpenSearchTypedDict, HNSWFlavor1): ...
 @cli.command()
@@ -117,9 +143,13 @@ def AWSOpenSearch(**parameters: Unpack[AWSOpenSearchHNSWTypedDict]):
             refresh_interval=parameters["refresh_interval"],
             force_merge_enabled=parameters["force_merge_enabled"],
             flush_threshold_size=parameters["flush_threshold_size"],
-            number_of_indexing_clients=parameters["number_of_indexing_clients"],
             index_thread_qty_during_force_merge=parameters["index_thread_qty_during_force_merge"],
             cb_threshold=parameters["cb_threshold"],
+            efConstruction=parameters["ef_construction"],
+            efSearch=parameters["ef_runtime"],
+            M=parameters["m"],
+            engine=AWSOS_Engine(parameters["engine"]),
+            quantization_type=AWSOSQuantization(parameters["quantization_type"]),
         ),
         **parameters,
     )

vectordb_bench/backend/clients/aws_opensearch/config.py CHANGED Viewed

@@ -10,17 +10,21 @@ log = logging.getLogger(__name__)
 class AWSOpenSearchConfig(DBConfig, BaseModel):
     host: str = ""
-    port: int = 443
+    port: int = 80
     user: str = ""
     password: SecretStr = ""
     def to_dict(self) -> dict:
+        use_ssl = self.port == 443
+        http_auth = (
+            (self.user, self.password.get_secret_value()) if len(self.user) != 0 and len(self.password) != 0 else ()
+        )
         return {
             "hosts": [{"host": self.host, "port": self.port}],
-            "http_auth": (self.user, self.password.get_secret_value()),
-            "use_ssl": True,
+            "http_auth": http_auth,
+            "use_ssl": use_ssl,
             "http_compress": True,
-            "verify_certs": True,
+            "verify_certs": use_ssl,
             "ssl_assert_hostname": False,
             "ssl_show_warn": False,
             "timeout": 600,
@@ -28,16 +32,22 @@ class AWSOpenSearchConfig(DBConfig, BaseModel):
 class AWSOS_Engine(Enum):
-    nmslib = "nmslib"
     faiss = "faiss"
-    lucene = "Lucene"
+    lucene = "lucene"
+class AWSOSQuantization(Enum):
+    fp32 = "fp32"
+    fp16 = "fp16"
 class AWSOpenSearchIndexConfig(BaseModel, DBCaseConfig):
     metric_type: MetricType = MetricType.L2
     engine: AWSOS_Engine = AWSOS_Engine.faiss
     efConstruction: int = 256
-    efSearch: int = 256
+    ef_search: int = 200
+    engine_name: str | None = None
+    metric_type_name: str | None = None
     M: int = 16
     index_thread_qty: int | None = 4
     number_of_shards: int | None = 1
@@ -46,31 +56,44 @@ class AWSOpenSearchIndexConfig(BaseModel, DBCaseConfig):
     refresh_interval: str | None = "60s"
     force_merge_enabled: bool | None = True
     flush_threshold_size: str | None = "5120mb"
-    number_of_indexing_clients: int | None = 1
     index_thread_qty_during_force_merge: int
     cb_threshold: str | None = "50%"
+    quantization_type: AWSOSQuantization = AWSOSQuantization.fp32
     def parse_metric(self) -> str:
+        log.info(f"User specified metric_type: {self.metric_type_name}")
+        self.metric_type = MetricType[self.metric_type_name.upper()]
         if self.metric_type == MetricType.IP:
             return "innerproduct"
         if self.metric_type == MetricType.COSINE:
-            if self.engine == AWSOS_Engine.faiss:
-                log.info(
-                    "Using innerproduct because faiss doesn't support cosine as metric type for Opensearch",
-                )
-                return "innerproduct"
             return "cosinesimil"
+        if self.metric_type == MetricType.L2:
+            log.info("Using l2 as specified by user")
+            return "l2"
         return "l2"
     def index_param(self) -> dict:
+        log.info(f"Using engine: {self.engine} for index creation")
+        log.info(f"Using metric_type: {self.metric_type_name} for index creation")
+        log.info(f"Resulting space_type: {self.parse_metric()} for index creation")
+        parameters = {"ef_construction": self.efConstruction, "m": self.M}
+        if self.engine == AWSOS_Engine.faiss and self.faiss_use_fp16:
+            parameters["encoder"] = {"name": "sq", "parameters": {"type": "fp16"}}
         return {
             "name": "hnsw",
-            "space_type": self.parse_metric(),
             "engine": self.engine.value,
             "parameters": {
                 "ef_construction": self.efConstruction,
                 "m": self.M,
                 "ef_search": self.efSearch,
+                **(
+                    {"encoder": {"name": "sq", "parameters": {"type": self.quantization_type.fp16.value}}}
+                    if self.quantization_type is not AWSOSQuantization.fp32
+                    else {}
+                ),
             },
         }

vectordb_bench/backend/clients/lancedb/cli.py CHANGED Viewed

@@ -58,10 +58,46 @@ def LanceDBAutoIndex(**parameters: Unpack[LanceDBTypedDict]):
     )
+class LanceDBIVFPQTypedDict(CommonTypedDict, LanceDBTypedDict):
+    num_partitions: Annotated[
+        int,
+        click.option(
+            "--num-partitions",
+            type=int,
+            default=0,
+            help="Number of partitions for IVFPQ index, unset = use LanceDB default",
+        ),
+    ]
+    num_sub_vectors: Annotated[
+        int,
+        click.option(
+            "--num-sub-vectors",
+            type=int,
+            default=0,
+            help="Number of sub-vectors for IVFPQ index, unset = use LanceDB default",
+        ),
+    ]
+    nbits: Annotated[
+        int,
+        click.option(
+            "--nbits",
+            type=int,
+            default=8,
+            help="Number of bits for IVFPQ index (must be 4 or 8), unset = use LanceDB default",
+        ),
+    ]
+    nprobes: Annotated[
+        int,
+        click.option(
+            "--nprobes", type=int, default=0, help="Number of probes for IVFPQ search, unset = use LanceDB default"
+        ),
+    ]
 @cli.command()
-@click_parameter_decorators_from_typed_dict(LanceDBTypedDict)
-def LanceDBIVFPQ(**parameters: Unpack[LanceDBTypedDict]):
-    from .config import LanceDBConfig, _lancedb_case_config
+@click_parameter_decorators_from_typed_dict(LanceDBIVFPQTypedDict)
+def LanceDBIVFPQ(**parameters: Unpack[LanceDBIVFPQTypedDict]):
+    from .config import LanceDBConfig, LanceDBIndexConfig
     run(
         db=DB.LanceDB,
@@ -70,15 +106,29 @@ def LanceDBIVFPQ(**parameters: Unpack[LanceDBTypedDict]):
             uri=parameters["uri"],
             token=SecretStr(parameters["token"]) if parameters.get("token") else None,
         ),
-        db_case_config=_lancedb_case_config.get(IndexType.IVFPQ)(),
+        db_case_config=LanceDBIndexConfig(
+            index=IndexType.IVFPQ,
+            num_partitions=parameters["num_partitions"],
+            num_sub_vectors=parameters["num_sub_vectors"],
+            nbits=parameters["nbits"],
+            nprobes=parameters["nprobes"],
+        ),
         **parameters,
     )
+class LanceDBHNSWTypedDict(CommonTypedDict, LanceDBTypedDict):
+    m: Annotated[int, click.option("--m", type=int, default=0, help="HNSW parameter m")]
+    ef_construction: Annotated[
+        int, click.option("--ef-construction", type=int, default=0, help="HNSW parameter ef_construction")
+    ]
+    ef: Annotated[int, click.option("--ef", type=int, default=0, help="HNSW search parameter ef")]
 @cli.command()
-@click_parameter_decorators_from_typed_dict(LanceDBTypedDict)
-def LanceDBHNSW(**parameters: Unpack[LanceDBTypedDict]):
-    from .config import LanceDBConfig, _lancedb_case_config
+@click_parameter_decorators_from_typed_dict(LanceDBHNSWTypedDict)
+def LanceDBHNSW(**parameters: Unpack[LanceDBHNSWTypedDict]):
+    from .config import LanceDBConfig, LanceDBHNSWIndexConfig
     run(
         db=DB.LanceDB,
@@ -87,6 +137,10 @@ def LanceDBHNSW(**parameters: Unpack[LanceDBTypedDict]):
             uri=parameters["uri"],
             token=SecretStr(parameters["token"]) if parameters.get("token") else None,
         ),
-        db_case_config=_lancedb_case_config.get(IndexType.HNSW)(),
+        db_case_config=LanceDBHNSWIndexConfig(
+            m=parameters["m"],
+            ef_construction=parameters["ef_construction"],
+            ef=parameters["ef"],
+        ),
         **parameters,
     )

vectordb-bench 0.0.29__py3-none-any.whl → 0.0.30__py3-none-any.whl

vectordb-bench 0.0.29py3-none-any.whl → 0.0.30py3-none-any.whl