PyPI - vectordb-bench - Versions diffs - 0.0.19__py3-none-any.whl → 0.0.20__py3-none-any.whl - Mend

vectordb-bench 0.0.19py3-none-any.whl → 0.0.20py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (105) hide show

vectordb_bench/__init__.py +49 -24
vectordb_bench/__main__.py +4 -3
vectordb_bench/backend/assembler.py +12 -13
vectordb_bench/backend/cases.py +55 -45
vectordb_bench/backend/clients/__init__.py +75 -14
vectordb_bench/backend/clients/aliyun_elasticsearch/aliyun_elasticsearch.py +1 -2
vectordb_bench/backend/clients/aliyun_elasticsearch/config.py +3 -4
vectordb_bench/backend/clients/aliyun_opensearch/aliyun_opensearch.py +111 -70
vectordb_bench/backend/clients/aliyun_opensearch/config.py +6 -7
vectordb_bench/backend/clients/alloydb/alloydb.py +58 -80
vectordb_bench/backend/clients/alloydb/cli.py +51 -34
vectordb_bench/backend/clients/alloydb/config.py +30 -30
vectordb_bench/backend/clients/api.py +5 -9
vectordb_bench/backend/clients/aws_opensearch/aws_opensearch.py +46 -47
vectordb_bench/backend/clients/aws_opensearch/cli.py +4 -7
vectordb_bench/backend/clients/aws_opensearch/config.py +13 -9
vectordb_bench/backend/clients/aws_opensearch/run.py +69 -59
vectordb_bench/backend/clients/chroma/chroma.py +38 -36
vectordb_bench/backend/clients/chroma/config.py +4 -2
vectordb_bench/backend/clients/elastic_cloud/config.py +5 -5
vectordb_bench/backend/clients/elastic_cloud/elastic_cloud.py +23 -22
vectordb_bench/backend/clients/memorydb/cli.py +8 -8
vectordb_bench/backend/clients/memorydb/config.py +2 -2
vectordb_bench/backend/clients/memorydb/memorydb.py +65 -53
vectordb_bench/backend/clients/milvus/cli.py +41 -83
vectordb_bench/backend/clients/milvus/config.py +18 -8
vectordb_bench/backend/clients/milvus/milvus.py +18 -19
vectordb_bench/backend/clients/pgdiskann/cli.py +29 -22
vectordb_bench/backend/clients/pgdiskann/config.py +29 -26
vectordb_bench/backend/clients/pgdiskann/pgdiskann.py +55 -73
vectordb_bench/backend/clients/pgvecto_rs/cli.py +9 -11
vectordb_bench/backend/clients/pgvecto_rs/config.py +8 -14
vectordb_bench/backend/clients/pgvecto_rs/pgvecto_rs.py +33 -34
vectordb_bench/backend/clients/pgvector/cli.py +40 -31
vectordb_bench/backend/clients/pgvector/config.py +63 -73
vectordb_bench/backend/clients/pgvector/pgvector.py +97 -98
vectordb_bench/backend/clients/pgvectorscale/cli.py +38 -24
vectordb_bench/backend/clients/pgvectorscale/config.py +14 -15
vectordb_bench/backend/clients/pgvectorscale/pgvectorscale.py +38 -43
vectordb_bench/backend/clients/pinecone/config.py +1 -0
vectordb_bench/backend/clients/pinecone/pinecone.py +14 -21
vectordb_bench/backend/clients/qdrant_cloud/config.py +11 -10
vectordb_bench/backend/clients/qdrant_cloud/qdrant_cloud.py +40 -31
vectordb_bench/backend/clients/redis/cli.py +6 -12
vectordb_bench/backend/clients/redis/config.py +7 -5
vectordb_bench/backend/clients/redis/redis.py +94 -58
vectordb_bench/backend/clients/test/cli.py +1 -2
vectordb_bench/backend/clients/test/config.py +2 -2
vectordb_bench/backend/clients/test/test.py +4 -5
vectordb_bench/backend/clients/weaviate_cloud/cli.py +3 -4
vectordb_bench/backend/clients/weaviate_cloud/config.py +2 -2
vectordb_bench/backend/clients/weaviate_cloud/weaviate_cloud.py +36 -22
vectordb_bench/backend/clients/zilliz_cloud/cli.py +14 -11
vectordb_bench/backend/clients/zilliz_cloud/config.py +2 -4
vectordb_bench/backend/clients/zilliz_cloud/zilliz_cloud.py +1 -1
vectordb_bench/backend/data_source.py +30 -18
vectordb_bench/backend/dataset.py +47 -27
vectordb_bench/backend/result_collector.py +2 -3
vectordb_bench/backend/runner/__init__.py +4 -6
vectordb_bench/backend/runner/mp_runner.py +85 -34
vectordb_bench/backend/runner/rate_runner.py +30 -19
vectordb_bench/backend/runner/read_write_runner.py +51 -23
vectordb_bench/backend/runner/serial_runner.py +91 -48
vectordb_bench/backend/runner/util.py +4 -3
vectordb_bench/backend/task_runner.py +92 -72
vectordb_bench/backend/utils.py +17 -10
vectordb_bench/base.py +0 -1
vectordb_bench/cli/cli.py +65 -60
vectordb_bench/cli/vectordbbench.py +6 -7
vectordb_bench/frontend/components/check_results/charts.py +8 -19
vectordb_bench/frontend/components/check_results/data.py +4 -16
vectordb_bench/frontend/components/check_results/filters.py +8 -16
vectordb_bench/frontend/components/check_results/nav.py +4 -4
vectordb_bench/frontend/components/check_results/priceTable.py +1 -3
vectordb_bench/frontend/components/check_results/stPageConfig.py +2 -1
vectordb_bench/frontend/components/concurrent/charts.py +12 -12
vectordb_bench/frontend/components/custom/displayCustomCase.py +17 -11
vectordb_bench/frontend/components/custom/displaypPrams.py +4 -2
vectordb_bench/frontend/components/custom/getCustomConfig.py +1 -2
vectordb_bench/frontend/components/custom/initStyle.py +1 -1
vectordb_bench/frontend/components/get_results/saveAsImage.py +2 -0
vectordb_bench/frontend/components/run_test/caseSelector.py +3 -9
vectordb_bench/frontend/components/run_test/dbConfigSetting.py +1 -4
vectordb_bench/frontend/components/run_test/dbSelector.py +1 -1
vectordb_bench/frontend/components/run_test/generateTasks.py +8 -8
vectordb_bench/frontend/components/run_test/submitTask.py +14 -18
vectordb_bench/frontend/components/tables/data.py +3 -6
vectordb_bench/frontend/config/dbCaseConfigs.py +51 -84
vectordb_bench/frontend/pages/concurrent.py +3 -5
vectordb_bench/frontend/pages/custom.py +30 -9
vectordb_bench/frontend/pages/quries_per_dollar.py +3 -3
vectordb_bench/frontend/pages/run_test.py +3 -7
vectordb_bench/frontend/utils.py +1 -1
vectordb_bench/frontend/vdb_benchmark.py +4 -6
vectordb_bench/interface.py +56 -26
vectordb_bench/log_util.py +59 -64
vectordb_bench/metric.py +10 -11
vectordb_bench/models.py +26 -43
{vectordb_bench-0.0.19.dist-info → vectordb_bench-0.0.20.dist-info}/METADATA +22 -15
vectordb_bench-0.0.20.dist-info/RECORD +135 -0
{vectordb_bench-0.0.19.dist-info → vectordb_bench-0.0.20.dist-info}/WHEEL +1 -1
vectordb_bench-0.0.19.dist-info/RECORD +0 -135
{vectordb_bench-0.0.19.dist-info → vectordb_bench-0.0.20.dist-info}/LICENSE +0 -0
{vectordb_bench-0.0.19.dist-info → vectordb_bench-0.0.20.dist-info}/entry_points.txt +0 -0
{vectordb_bench-0.0.19.dist-info → vectordb_bench-0.0.20.dist-info}/top_level.txt +0 -0

vectordb_bench/backend/clients/__init__.py CHANGED Viewed

@@ -1,12 +1,12 @@
 from enum import Enum
-from typing import Type
 from .api import (
-    VectorDB,
-    DBConfig,
     DBCaseConfig,
+    DBConfig,
     EmptyDBCaseConfig,
     IndexType,
     MetricType,
+    VectorDB,
 )
@@ -41,200 +41,255 @@ class DB(Enum):
     Test = "test"
     AliyunOpenSearch = "AliyunOpenSearch"
     @property
-    def init_cls(self) -> Type[VectorDB]:
+    def init_cls(self) -> type[VectorDB]:  # noqa: PLR0911, PLR0912
         """Import while in use"""
         if self == DB.Milvus:
             from .milvus.milvus import Milvus
             return Milvus
         if self == DB.ZillizCloud:
             from .zilliz_cloud.zilliz_cloud import ZillizCloud
             return ZillizCloud
         if self == DB.Pinecone:
             from .pinecone.pinecone import Pinecone
             return Pinecone
         if self == DB.ElasticCloud:
             from .elastic_cloud.elastic_cloud import ElasticCloud
             return ElasticCloud
         if self == DB.QdrantCloud:
             from .qdrant_cloud.qdrant_cloud import QdrantCloud
             return QdrantCloud
         if self == DB.WeaviateCloud:
             from .weaviate_cloud.weaviate_cloud import WeaviateCloud
             return WeaviateCloud
         if self == DB.PgVector:
             from .pgvector.pgvector import PgVector
             return PgVector
         if self == DB.PgVectoRS:
             from .pgvecto_rs.pgvecto_rs import PgVectoRS
             return PgVectoRS
         if self == DB.PgVectorScale:
             from .pgvectorscale.pgvectorscale import PgVectorScale
             return PgVectorScale
         if self == DB.PgDiskANN:
             from .pgdiskann.pgdiskann import PgDiskANN
             return PgDiskANN
         if self == DB.Redis:
             from .redis.redis import Redis
             return Redis
         if self == DB.MemoryDB:
             from .memorydb.memorydb import MemoryDB
             return MemoryDB
         if self == DB.Chroma:
             from .chroma.chroma import ChromaClient
             return ChromaClient
         if self == DB.AWSOpenSearch:
             from .aws_opensearch.aws_opensearch import AWSOpenSearch
             return AWSOpenSearch
         if self == DB.AlloyDB:
             from .alloydb.alloydb import AlloyDB
             return AlloyDB
         if self == DB.AliyunElasticsearch:
             from .aliyun_elasticsearch.aliyun_elasticsearch import AliyunElasticsearch
             return AliyunElasticsearch
         if self == DB.AliyunOpenSearch:
             from .aliyun_opensearch.aliyun_opensearch import AliyunOpenSearch
             return AliyunOpenSearch
+        msg = f"Unknown DB: {self.name}"
+        raise ValueError(msg)
     @property
-    def config_cls(self) -> Type[DBConfig]:
+    def config_cls(self) -> type[DBConfig]:  # noqa: PLR0911, PLR0912
         """Import while in use"""
         if self == DB.Milvus:
             from .milvus.config import MilvusConfig
             return MilvusConfig
         if self == DB.ZillizCloud:
             from .zilliz_cloud.config import ZillizCloudConfig
             return ZillizCloudConfig
         if self == DB.Pinecone:
             from .pinecone.config import PineconeConfig
             return PineconeConfig
         if self == DB.ElasticCloud:
             from .elastic_cloud.config import ElasticCloudConfig
             return ElasticCloudConfig
         if self == DB.QdrantCloud:
             from .qdrant_cloud.config import QdrantConfig
             return QdrantConfig
         if self == DB.WeaviateCloud:
             from .weaviate_cloud.config import WeaviateConfig
             return WeaviateConfig
         if self == DB.PgVector:
             from .pgvector.config import PgVectorConfig
             return PgVectorConfig
         if self == DB.PgVectoRS:
             from .pgvecto_rs.config import PgVectoRSConfig
             return PgVectoRSConfig
         if self == DB.PgVectorScale:
             from .pgvectorscale.config import PgVectorScaleConfig
             return PgVectorScaleConfig
         if self == DB.PgDiskANN:
             from .pgdiskann.config import PgDiskANNConfig
             return PgDiskANNConfig
         if self == DB.Redis:
             from .redis.config import RedisConfig
             return RedisConfig
         if self == DB.MemoryDB:
             from .memorydb.config import MemoryDBConfig
             return MemoryDBConfig
         if self == DB.Chroma:
             from .chroma.config import ChromaConfig
             return ChromaConfig
         if self == DB.AWSOpenSearch:
             from .aws_opensearch.config import AWSOpenSearchConfig
             return AWSOpenSearchConfig
         if self == DB.AlloyDB:
             from .alloydb.config import AlloyDBConfig
             return AlloyDBConfig
         if self == DB.AliyunElasticsearch:
             from .aliyun_elasticsearch.config import AliyunElasticsearchConfig
             return AliyunElasticsearchConfig
         if self == DB.AliyunOpenSearch:
             from .aliyun_opensearch.config import AliyunOpenSearchConfig
             return AliyunOpenSearchConfig
-    def case_config_cls(self, index_type: IndexType | None = None) -> Type[DBCaseConfig]:
+        msg = f"Unknown DB: {self.name}"
+        raise ValueError(msg)
+    def case_config_cls(  # noqa: PLR0911
+        self,
+        index_type: IndexType | None = None,
+    ) -> type[DBCaseConfig]:
         if self == DB.Milvus:
             from .milvus.config import _milvus_case_config
             return _milvus_case_config.get(index_type)
         if self == DB.ZillizCloud:
             from .zilliz_cloud.config import AutoIndexConfig
             return AutoIndexConfig
         if self == DB.ElasticCloud:
             from .elastic_cloud.config import ElasticCloudIndexConfig
             return ElasticCloudIndexConfig
         if self == DB.QdrantCloud:
             from .qdrant_cloud.config import QdrantIndexConfig
             return QdrantIndexConfig
         if self == DB.WeaviateCloud:
             from .weaviate_cloud.config import WeaviateIndexConfig
             return WeaviateIndexConfig
         if self == DB.PgVector:
             from .pgvector.config import _pgvector_case_config
             return _pgvector_case_config.get(index_type)
         if self == DB.PgVectoRS:
             from .pgvecto_rs.config import _pgvecto_rs_case_config
             return _pgvecto_rs_case_config.get(index_type)
         if self == DB.AWSOpenSearch:
             from .aws_opensearch.config import AWSOpenSearchIndexConfig
             return AWSOpenSearchIndexConfig
         if self == DB.PgVectorScale:
             from .pgvectorscale.config import _pgvectorscale_case_config
             return _pgvectorscale_case_config.get(index_type)
         if self == DB.PgDiskANN:
             from .pgdiskann.config import _pgdiskann_case_config
             return _pgdiskann_case_config.get(index_type)
         if self == DB.AlloyDB:
             from .alloydb.config import _alloydb_case_config
             return _alloydb_case_config.get(index_type)
         if self == DB.AliyunElasticsearch:
             from .elastic_cloud.config import ElasticCloudIndexConfig
             return ElasticCloudIndexConfig
         if self == DB.AliyunOpenSearch:
             from .aliyun_opensearch.config import AliyunOpenSearchIndexConfig
             return AliyunOpenSearchIndexConfig
         # DB.Pinecone, DB.Chroma, DB.Redis
@@ -242,5 +297,11 @@ class DB(Enum):
 __all__ = [
-    "DB", "VectorDB", "DBConfig", "DBCaseConfig", "IndexType", "MetricType", "EmptyDBCaseConfig",
+    "DB",
+    "DBCaseConfig",
+    "DBConfig",
+    "EmptyDBCaseConfig",
+    "IndexType",
+    "MetricType",
+    "VectorDB",
 ]

vectordb_bench/backend/clients/aliyun_elasticsearch/aliyun_elasticsearch.py CHANGED Viewed

@@ -1,5 +1,5 @@
-from ..elastic_cloud.elastic_cloud import ElasticCloud
 from ..elastic_cloud.config import ElasticCloudIndexConfig
+from ..elastic_cloud.elastic_cloud import ElasticCloud
 class AliyunElasticsearch(ElasticCloud):
@@ -24,4 +24,3 @@ class AliyunElasticsearch(ElasticCloud):
             drop_old=drop_old,
             **kwargs,
         )

vectordb_bench/backend/clients/aliyun_elasticsearch/config.py CHANGED Viewed

@@ -1,7 +1,6 @@
-from enum import Enum
-from pydantic import SecretStr, BaseModel
+from pydantic import BaseModel, SecretStr
-from ..api import DBConfig, DBCaseConfig, MetricType, IndexType
+from ..api import DBConfig
 class AliyunElasticsearchConfig(DBConfig, BaseModel):
@@ -14,6 +13,6 @@ class AliyunElasticsearchConfig(DBConfig, BaseModel):
     def to_dict(self) -> dict:
         return {
-            "hosts": [{'scheme': self.scheme, 'host': self.host, 'port': self.port}],
+            "hosts": [{"scheme": self.scheme, "host": self.host, "port": self.port}],
             "basic_auth": (self.user, self.password.get_secret_value()),
         }

vectordb_bench/backend/clients/aliyun_opensearch/aliyun_opensearch.py CHANGED Viewed

@@ -1,32 +1,32 @@
 import json
 import logging
-from contextlib import contextmanager
 import time
+from contextlib import contextmanager
+from alibabacloud_ha3engine_vector import client, models
 from alibabacloud_ha3engine_vector.models import QueryRequest
-from ..api import VectorDB, MetricType
-from .config import AliyunOpenSearchIndexConfig
-from alibabacloud_searchengine20211025.client import Client as searchengineClient
 from alibabacloud_searchengine20211025 import models as searchengine_models
+from alibabacloud_searchengine20211025.client import Client as searchengineClient
 from alibabacloud_tea_openapi import models as open_api_models
-from alibabacloud_ha3engine_vector import models, client
+from ..api import MetricType, VectorDB
+from .config import AliyunOpenSearchIndexConfig
 log = logging.getLogger(__name__)
 ALIYUN_OPENSEARCH_MAX_SIZE_PER_BATCH = 2 * 1024 * 1024  # 2MB
 ALIYUN_OPENSEARCH_MAX_NUM_PER_BATCH = 100
 class AliyunOpenSearch(VectorDB):
     def __init__(
-            self,
-            dim: int,
-            db_config: dict,
-            db_case_config: AliyunOpenSearchIndexConfig,
-            collection_name: str = "VectorDBBenchCollection",
-            drop_old: bool = False,
-            **kwargs,
+        self,
+        dim: int,
+        db_config: dict,
+        db_case_config: AliyunOpenSearchIndexConfig,
+        collection_name: str = "VectorDBBenchCollection",
+        drop_old: bool = False,
+        **kwargs,
     ):
         self.control_client = None
         self.dim = dim
@@ -41,14 +41,17 @@ class AliyunOpenSearch(VectorDB):
         self._index_name = "vector_idx"
         self.batch_size = int(
-            min(ALIYUN_OPENSEARCH_MAX_SIZE_PER_BATCH / (dim * 25), ALIYUN_OPENSEARCH_MAX_NUM_PER_BATCH)
+            min(
+                ALIYUN_OPENSEARCH_MAX_SIZE_PER_BATCH / (dim * 25),
+                ALIYUN_OPENSEARCH_MAX_NUM_PER_BATCH,
+            ),
         )
         log.info(f"Aliyun_OpenSearch client config: {self.db_config}")
         control_config = open_api_models.Config(
             access_key_id=self.db_config["ak"],
             access_key_secret=self.db_config["sk"],
-            endpoint=self.db_config["control_host"]
+            endpoint=self.db_config["control_host"],
         )
         self.control_client = searchengineClient(control_config)
@@ -67,7 +70,7 @@ class AliyunOpenSearch(VectorDB):
         create_table_request.field_schema = {
             self._primary_field: "INT64",
             self._vector_field: "MULTI_FLOAT",
-            self._scalar_field: "INT64"
+            self._scalar_field: "INT64",
         }
         vector_index = searchengine_models.ModifyTableRequestVectorIndex()
         vector_index.index_name = self._index_name
@@ -77,8 +80,25 @@ class AliyunOpenSearch(VectorDB):
         vector_index.vector_index_type = "HNSW"
         advance_params = searchengine_models.ModifyTableRequestVectorIndexAdvanceParams()
-        advance_params.build_index_params = "{\"proxima.hnsw.builder.max_neighbor_count\":" + str(self.case_config.M) + ",\"proxima.hnsw.builder.efconstruction\":" + str(self.case_config.efConstruction) + ",\"proxima.hnsw.builder.enable_adsampling\":true,\"proxima.hnsw.builder.slack_pruning_factor\":1.1,\"proxima.hnsw.builder.thread_count\":16}"
-        advance_params.search_index_params = "{\"proxima.hnsw.searcher.ef\":400,\"proxima.hnsw.searcher.dynamic_termination.prob_threshold\":0.7}"
+        str_max_neighbor_count = f'"proxima.hnsw.builder.max_neighbor_count":{self.case_config.M}'
+        str_efc = f'"proxima.hnsw.builder.efconstruction":{self.case_config.ef_construction}'
+        str_enable_adsampling = '"proxima.hnsw.builder.enable_adsampling":true'
+        str_slack_pruning_factor = '"proxima.hnsw.builder.slack_pruning_factor":1.1'
+        str_thread_count = '"proxima.hnsw.builder.thread_count":16'
+        params = ",".join(
+            [
+                str_max_neighbor_count,
+                str_efc,
+                str_enable_adsampling,
+                str_slack_pruning_factor,
+                str_thread_count,
+            ],
+        )
+        advance_params.build_index_params = params
+        advance_params.search_index_params = (
+            '{"proxima.hnsw.searcher.ef":400,"proxima.hnsw.searcher.dynamic_termination.prob_threshold":0.7}'
+        )
         vector_index.advance_params = advance_params
         create_table_request.vector_index = [vector_index]
@@ -88,7 +108,7 @@ class AliyunOpenSearch(VectorDB):
         except Exception as error:
             log.info(error.message)
             log.info(error.data.get("Recommend"))
-            log.info(f"Failed to create index: error: {str(error)}")
+            log.info(f"Failed to create index: error: {error!s}")
             raise error from None
         # check if index create success
@@ -102,22 +122,22 @@ class AliyunOpenSearch(VectorDB):
             log.info(f"begin to {retry_times} times get table")
             retry_times += 1
             response = client.get_table(self.instance_id, self.collection_name)
-            if response.body.result.status == 'IN_USE':
+            if response.body.result.status == "IN_USE":
                 log.info(f"{self.collection_name} table begin to use.")
                 return
     def _index_exists(self, client: searchengineClient) -> bool:
         try:
             client.get_table(self.instance_id, self.collection_name)
-            return True
-        except Exception as error:
-            log.info(f'get table from searchengine error')
-            log.info(error.message)
+        except Exception as err:
+            log.warning(f"get table from searchengine error, err={err}")
             return False
+        else:
+            return True
     # check if index build success, Insert the embeddings to the vector database after index build success
     def _index_build_success(self, client: searchengineClient) -> None:
-        log.info(f"begin to check if table build success.")
+        log.info("begin to check if table build success.")
         time.sleep(50)
         retry_times = 0
@@ -139,9 +159,9 @@ class AliyunOpenSearch(VectorDB):
                 cur_fsm = fsm
                 break
             if cur_fsm is None:
-                print("no build index fsm")
+                log.warning("no build index fsm")
                 return
-            if "success" == cur_fsm["status"]:
+            if cur_fsm["status"] == "success":
                 return
     def _modify_index(self, client: searchengineClient) -> None:
@@ -154,7 +174,7 @@ class AliyunOpenSearch(VectorDB):
         modify_table_request.field_schema = {
             self._primary_field: "INT64",
             self._vector_field: "MULTI_FLOAT",
-            self._scalar_field: "INT64"
+            self._scalar_field: "INT64",
         }
         vector_index = searchengine_models.ModifyTableRequestVectorIndex()
         vector_index.index_name = self._index_name
@@ -163,19 +183,41 @@ class AliyunOpenSearch(VectorDB):
         vector_index.vector_field = self._vector_field
         vector_index.vector_index_type = "HNSW"
         advance_params = searchengine_models.ModifyTableRequestVectorIndexAdvanceParams()
-        advance_params.build_index_params = "{\"proxima.hnsw.builder.max_neighbor_count\":" + str(self.case_config.M) + ",\"proxima.hnsw.builder.efconstruction\":" + str(self.case_config.efConstruction) + ",\"proxima.hnsw.builder.enable_adsampling\":true,\"proxima.hnsw.builder.slack_pruning_factor\":1.1,\"proxima.hnsw.builder.thread_count\":16}"
-        advance_params.search_index_params = "{\"proxima.hnsw.searcher.ef\":400,\"proxima.hnsw.searcher.dynamic_termination.prob_threshold\":0.7}"
+        str_max_neighbor_count = f'"proxima.hnsw.builder.max_neighbor_count":{self.case_config.M}'
+        str_efc = f'"proxima.hnsw.builder.efconstruction":{self.case_config.ef_construction}'
+        str_enable_adsampling = '"proxima.hnsw.builder.enable_adsampling":true'
+        str_slack_pruning_factor = '"proxima.hnsw.builder.slack_pruning_factor":1.1'
+        str_thread_count = '"proxima.hnsw.builder.thread_count":16'
+        params = ",".join(
+            [
+                str_max_neighbor_count,
+                str_efc,
+                str_enable_adsampling,
+                str_slack_pruning_factor,
+                str_thread_count,
+            ],
+        )
+        advance_params.build_index_params = params
+        advance_params.search_index_params = (
+            '{"proxima.hnsw.searcher.ef":400,"proxima.hnsw.searcher.dynamic_termination.prob_threshold":0.7}'
+        )
         vector_index.advance_params = advance_params
         modify_table_request.vector_index = [vector_index]
         try:
-            response = client.modify_table(self.instance_id, self.collection_name, modify_table_request)
+            response = client.modify_table(
+                self.instance_id,
+                self.collection_name,
+                modify_table_request,
+            )
             log.info(f"modify table success: {response.body}")
         except Exception as error:
             log.info(error.message)
             log.info(error.data.get("Recommend"))
-            log.info(f"Failed to modify index: error: {str(error)}")
+            log.info(f"Failed to modify index: error: {error!s}")
             raise error from None
         # check if modify index & delete data fsm success
@@ -185,15 +227,14 @@ class AliyunOpenSearch(VectorDB):
     def _get_total_count(self):
         try:
             response = self.client.stats(self.collection_name)
+        except Exception as e:
+            log.warning(f"Error querying index: {e}")
+        else:
             body = json.loads(response.body)
             log.info(f"stats info: {response.body}")
             if "result" in body and "totalDocCount" in body.get("result"):
                 return body.get("result").get("totalDocCount")
-            else:
-                return 0
-        except Exception as e:
-            print(f"Error querying index: {e}")
             return 0
     @contextmanager
@@ -203,21 +244,20 @@ class AliyunOpenSearch(VectorDB):
             endpoint=self.db_config["host"],
             protocol="http",
             access_user_name=self.db_config["user"],
-            access_pass_word=self.db_config["password"]
+            access_pass_word=self.db_config["password"],
         )
         self.client = client.Client(config)
         yield
-        # self.client.transport.close()
         self.client = None
         del self.client
     def insert_embeddings(
-            self,
-            embeddings: list[list[float]],
-            metadata: list[int],
-            **kwargs,
+        self,
+        embeddings: list[list[float]],
+        metadata: list[int],
+        **kwargs,
     ) -> tuple[int, Exception]:
         """Insert the embeddings to the opensearch."""
         assert self.client is not None, "should self.init() first"
@@ -226,25 +266,24 @@ class AliyunOpenSearch(VectorDB):
         try:
             for batch_start_offset in range(0, len(embeddings), self.batch_size):
-                batch_end_offset = min(
-                    batch_start_offset + self.batch_size, len(embeddings)
-                )
+                batch_end_offset = min(batch_start_offset + self.batch_size, len(embeddings))
                 documents = []
                 for i in range(batch_start_offset, batch_end_offset):
-                    documentFields = {
+                    document_fields = {
                         self._primary_field: metadata[i],
                         self._vector_field: embeddings[i],
                         self._scalar_field: metadata[i],
-                        "ops_build_channel": "inc"
-                    }
-                    document = {
-                        "fields": documentFields,
-                        "cmd": "add"
+                        "ops_build_channel": "inc",
                     }
+                    document = {"fields": document_fields, "cmd": "add"}
                     documents.append(document)
-                pushDocumentsRequest = models.PushDocumentsRequest({}, documents)
-                self.client.push_documents(self.collection_name, self._primary_field, pushDocumentsRequest)
+                push_doc_req = models.PushDocumentsRequest({}, documents)
+                self.client.push_documents(
+                    self.collection_name,
+                    self._primary_field,
+                    push_doc_req,
+                )
                 insert_count += batch_end_offset - batch_start_offset
         except Exception as e:
             log.info(f"Failed to insert data: {e}")
@@ -252,33 +291,36 @@ class AliyunOpenSearch(VectorDB):
         return (insert_count, None)
     def search_embedding(
-            self,
-            query: list[float],
-            k: int = 100,
-            filters: dict | None = None,
+        self,
+        query: list[float],
+        k: int = 100,
+        filters: dict | None = None,
     ) -> list[int]:
         assert self.client is not None, "should self.init() first"
-        search_params = "{\"proxima.hnsw.searcher.ef\":"+ str(self.case_config.ef_search) +"}"
+        search_params = '{"proxima.hnsw.searcher.ef":' + str(self.case_config.ef_search) + "}"
         os_filter = f"{self._scalar_field} {filters.get('metadata')}" if filters else ""
         try:
-            request = QueryRequest(table_name=self.collection_name,
-                                   vector=query,
-                                   top_k=k,
-                                   search_params=search_params, filter=os_filter)
+            request = QueryRequest(
+                table_name=self.collection_name,
+                vector=query,
+                top_k=k,
+                search_params=search_params,
+                filter=os_filter,
+            )
             result = self.client.query(request)
         except Exception as e:
             log.info(f"Error querying index: {e}")
-            raise e
-        res = json.loads(result.body)
-        id_res = [one_res["id"] for one_res in res["result"]]
-        return id_res
+            raise e from e
+        else:
+            res = json.loads(result.body)
+            return [one_res["id"] for one_res in res["result"]]
     def need_normalize_cosine(self) -> bool:
         """Wheather this database need to normalize dataset to support COSINE"""
         if self.case_config.metric_type == MetricType.COSINE:
-            log.info(f"cosine dataset need normalize.")
+            log.info("cosine dataset need normalize.")
             return True
         return False
@@ -296,9 +338,8 @@ class AliyunOpenSearch(VectorDB):
             total_count = self._get_total_count()
             # check if the data is inserted
             if total_count == data_size:
-                log.info(f"optimize table finish.")
+                log.info("optimize table finish.")
                 return
     def ready_to_load(self):
         """ready_to_load will be called before load in load cases."""
-        pass

vectordb-bench 0.0.19__py3-none-any.whl → 0.0.20__py3-none-any.whl

vectordb-bench 0.0.19py3-none-any.whl → 0.0.20py3-none-any.whl