PyPI - vectordb-bench - Versions diffs - 0.0.18__py3-none-any.whl → 0.0.20__py3-none-any.whl - Mend

vectordb-bench 0.0.18py3-none-any.whl → 0.0.20py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (105) hide show

vectordb_bench/__init__.py +49 -24
vectordb_bench/__main__.py +4 -3
vectordb_bench/backend/assembler.py +12 -13
vectordb_bench/backend/cases.py +56 -46
vectordb_bench/backend/clients/__init__.py +101 -14
vectordb_bench/backend/clients/aliyun_elasticsearch/aliyun_elasticsearch.py +26 -0
vectordb_bench/backend/clients/aliyun_elasticsearch/config.py +18 -0
vectordb_bench/backend/clients/aliyun_opensearch/aliyun_opensearch.py +345 -0
vectordb_bench/backend/clients/aliyun_opensearch/config.py +47 -0
vectordb_bench/backend/clients/alloydb/alloydb.py +58 -80
vectordb_bench/backend/clients/alloydb/cli.py +52 -35
vectordb_bench/backend/clients/alloydb/config.py +30 -30
vectordb_bench/backend/clients/api.py +8 -9
vectordb_bench/backend/clients/aws_opensearch/aws_opensearch.py +46 -47
vectordb_bench/backend/clients/aws_opensearch/cli.py +4 -7
vectordb_bench/backend/clients/aws_opensearch/config.py +13 -9
vectordb_bench/backend/clients/aws_opensearch/run.py +69 -59
vectordb_bench/backend/clients/chroma/chroma.py +38 -36
vectordb_bench/backend/clients/chroma/config.py +4 -2
vectordb_bench/backend/clients/elastic_cloud/config.py +5 -5
vectordb_bench/backend/clients/elastic_cloud/elastic_cloud.py +23 -22
vectordb_bench/backend/clients/memorydb/cli.py +8 -8
vectordb_bench/backend/clients/memorydb/config.py +2 -2
vectordb_bench/backend/clients/memorydb/memorydb.py +65 -53
vectordb_bench/backend/clients/milvus/cli.py +62 -80
vectordb_bench/backend/clients/milvus/config.py +31 -7
vectordb_bench/backend/clients/milvus/milvus.py +23 -26
vectordb_bench/backend/clients/pgdiskann/cli.py +29 -22
vectordb_bench/backend/clients/pgdiskann/config.py +29 -26
vectordb_bench/backend/clients/pgdiskann/pgdiskann.py +55 -73
vectordb_bench/backend/clients/pgvecto_rs/cli.py +9 -11
vectordb_bench/backend/clients/pgvecto_rs/config.py +8 -14
vectordb_bench/backend/clients/pgvecto_rs/pgvecto_rs.py +33 -34
vectordb_bench/backend/clients/pgvector/cli.py +40 -31
vectordb_bench/backend/clients/pgvector/config.py +63 -73
vectordb_bench/backend/clients/pgvector/pgvector.py +97 -98
vectordb_bench/backend/clients/pgvectorscale/cli.py +38 -24
vectordb_bench/backend/clients/pgvectorscale/config.py +14 -15
vectordb_bench/backend/clients/pgvectorscale/pgvectorscale.py +38 -43
vectordb_bench/backend/clients/pinecone/config.py +1 -0
vectordb_bench/backend/clients/pinecone/pinecone.py +14 -21
vectordb_bench/backend/clients/qdrant_cloud/config.py +11 -10
vectordb_bench/backend/clients/qdrant_cloud/qdrant_cloud.py +40 -31
vectordb_bench/backend/clients/redis/cli.py +6 -12
vectordb_bench/backend/clients/redis/config.py +7 -5
vectordb_bench/backend/clients/redis/redis.py +94 -58
vectordb_bench/backend/clients/test/cli.py +1 -2
vectordb_bench/backend/clients/test/config.py +2 -2
vectordb_bench/backend/clients/test/test.py +4 -5
vectordb_bench/backend/clients/weaviate_cloud/cli.py +3 -4
vectordb_bench/backend/clients/weaviate_cloud/config.py +2 -2
vectordb_bench/backend/clients/weaviate_cloud/weaviate_cloud.py +36 -22
vectordb_bench/backend/clients/zilliz_cloud/cli.py +14 -11
vectordb_bench/backend/clients/zilliz_cloud/config.py +2 -4
vectordb_bench/backend/clients/zilliz_cloud/zilliz_cloud.py +1 -1
vectordb_bench/backend/data_source.py +30 -18
vectordb_bench/backend/dataset.py +47 -27
vectordb_bench/backend/result_collector.py +2 -3
vectordb_bench/backend/runner/__init__.py +4 -6
vectordb_bench/backend/runner/mp_runner.py +85 -34
vectordb_bench/backend/runner/rate_runner.py +51 -23
vectordb_bench/backend/runner/read_write_runner.py +140 -46
vectordb_bench/backend/runner/serial_runner.py +99 -50
vectordb_bench/backend/runner/util.py +4 -19
vectordb_bench/backend/task_runner.py +95 -74
vectordb_bench/backend/utils.py +17 -9
vectordb_bench/base.py +0 -1
vectordb_bench/cli/cli.py +65 -60
vectordb_bench/cli/vectordbbench.py +6 -7
vectordb_bench/frontend/components/check_results/charts.py +8 -19
vectordb_bench/frontend/components/check_results/data.py +4 -16
vectordb_bench/frontend/components/check_results/filters.py +8 -16
vectordb_bench/frontend/components/check_results/nav.py +4 -4
vectordb_bench/frontend/components/check_results/priceTable.py +1 -3
vectordb_bench/frontend/components/check_results/stPageConfig.py +2 -1
vectordb_bench/frontend/components/concurrent/charts.py +12 -12
vectordb_bench/frontend/components/custom/displayCustomCase.py +17 -11
vectordb_bench/frontend/components/custom/displaypPrams.py +4 -2
vectordb_bench/frontend/components/custom/getCustomConfig.py +1 -2
vectordb_bench/frontend/components/custom/initStyle.py +1 -1
vectordb_bench/frontend/components/get_results/saveAsImage.py +2 -0
vectordb_bench/frontend/components/run_test/caseSelector.py +3 -9
vectordb_bench/frontend/components/run_test/dbConfigSetting.py +1 -4
vectordb_bench/frontend/components/run_test/dbSelector.py +1 -1
vectordb_bench/frontend/components/run_test/generateTasks.py +8 -8
vectordb_bench/frontend/components/run_test/submitTask.py +14 -18
vectordb_bench/frontend/components/tables/data.py +3 -6
vectordb_bench/frontend/config/dbCaseConfigs.py +108 -83
vectordb_bench/frontend/pages/concurrent.py +3 -5
vectordb_bench/frontend/pages/custom.py +30 -9
vectordb_bench/frontend/pages/quries_per_dollar.py +3 -3
vectordb_bench/frontend/pages/run_test.py +3 -7
vectordb_bench/frontend/utils.py +1 -1
vectordb_bench/frontend/vdb_benchmark.py +4 -6
vectordb_bench/interface.py +56 -26
vectordb_bench/log_util.py +59 -64
vectordb_bench/metric.py +10 -11
vectordb_bench/models.py +26 -43
{vectordb_bench-0.0.18.dist-info → vectordb_bench-0.0.20.dist-info}/METADATA +34 -42
vectordb_bench-0.0.20.dist-info/RECORD +135 -0
{vectordb_bench-0.0.18.dist-info → vectordb_bench-0.0.20.dist-info}/WHEEL +1 -1
vectordb_bench-0.0.18.dist-info/RECORD +0 -131
{vectordb_bench-0.0.18.dist-info → vectordb_bench-0.0.20.dist-info}/LICENSE +0 -0
{vectordb_bench-0.0.18.dist-info → vectordb_bench-0.0.20.dist-info}/entry_points.txt +0 -0
{vectordb_bench-0.0.18.dist-info → vectordb_bench-0.0.20.dist-info}/top_level.txt +0 -0

vectordb_bench/backend/clients/__init__.py CHANGED Viewed

@@ -1,12 +1,12 @@
 from enum import Enum
-from typing import Type
 from .api import (
-    VectorDB,
-    DBConfig,
     DBCaseConfig,
+    DBConfig,
     EmptyDBCaseConfig,
     IndexType,
     MetricType,
+    VectorDB,
 )
@@ -37,184 +37,271 @@ class DB(Enum):
     MemoryDB = "MemoryDB"
     Chroma = "Chroma"
     AWSOpenSearch = "OpenSearch"
+    AliyunElasticsearch = "AliyunElasticsearch"
     Test = "test"
+    AliyunOpenSearch = "AliyunOpenSearch"
     @property
-    def init_cls(self) -> Type[VectorDB]:
+    def init_cls(self) -> type[VectorDB]:  # noqa: PLR0911, PLR0912
         """Import while in use"""
         if self == DB.Milvus:
             from .milvus.milvus import Milvus
             return Milvus
         if self == DB.ZillizCloud:
             from .zilliz_cloud.zilliz_cloud import ZillizCloud
             return ZillizCloud
         if self == DB.Pinecone:
             from .pinecone.pinecone import Pinecone
             return Pinecone
         if self == DB.ElasticCloud:
             from .elastic_cloud.elastic_cloud import ElasticCloud
             return ElasticCloud
         if self == DB.QdrantCloud:
             from .qdrant_cloud.qdrant_cloud import QdrantCloud
             return QdrantCloud
         if self == DB.WeaviateCloud:
             from .weaviate_cloud.weaviate_cloud import WeaviateCloud
             return WeaviateCloud
         if self == DB.PgVector:
             from .pgvector.pgvector import PgVector
             return PgVector
         if self == DB.PgVectoRS:
             from .pgvecto_rs.pgvecto_rs import PgVectoRS
             return PgVectoRS
         if self == DB.PgVectorScale:
             from .pgvectorscale.pgvectorscale import PgVectorScale
             return PgVectorScale
         if self == DB.PgDiskANN:
             from .pgdiskann.pgdiskann import PgDiskANN
             return PgDiskANN
         if self == DB.Redis:
             from .redis.redis import Redis
             return Redis
         if self == DB.MemoryDB:
             from .memorydb.memorydb import MemoryDB
             return MemoryDB
         if self == DB.Chroma:
             from .chroma.chroma import ChromaClient
             return ChromaClient
         if self == DB.AWSOpenSearch:
             from .aws_opensearch.aws_opensearch import AWSOpenSearch
             return AWSOpenSearch
         if self == DB.AlloyDB:
             from .alloydb.alloydb import AlloyDB
             return AlloyDB
+        if self == DB.AliyunElasticsearch:
+            from .aliyun_elasticsearch.aliyun_elasticsearch import AliyunElasticsearch
+            return AliyunElasticsearch
+        if self == DB.AliyunOpenSearch:
+            from .aliyun_opensearch.aliyun_opensearch import AliyunOpenSearch
+            return AliyunOpenSearch
+        msg = f"Unknown DB: {self.name}"
+        raise ValueError(msg)
     @property
-    def config_cls(self) -> Type[DBConfig]:
+    def config_cls(self) -> type[DBConfig]:  # noqa: PLR0911, PLR0912
         """Import while in use"""
         if self == DB.Milvus:
             from .milvus.config import MilvusConfig
             return MilvusConfig
         if self == DB.ZillizCloud:
             from .zilliz_cloud.config import ZillizCloudConfig
             return ZillizCloudConfig
         if self == DB.Pinecone:
             from .pinecone.config import PineconeConfig
             return PineconeConfig
         if self == DB.ElasticCloud:
             from .elastic_cloud.config import ElasticCloudConfig
             return ElasticCloudConfig
         if self == DB.QdrantCloud:
             from .qdrant_cloud.config import QdrantConfig
             return QdrantConfig
         if self == DB.WeaviateCloud:
             from .weaviate_cloud.config import WeaviateConfig
             return WeaviateConfig
         if self == DB.PgVector:
             from .pgvector.config import PgVectorConfig
             return PgVectorConfig
         if self == DB.PgVectoRS:
             from .pgvecto_rs.config import PgVectoRSConfig
             return PgVectoRSConfig
         if self == DB.PgVectorScale:
             from .pgvectorscale.config import PgVectorScaleConfig
             return PgVectorScaleConfig
         if self == DB.PgDiskANN:
             from .pgdiskann.config import PgDiskANNConfig
             return PgDiskANNConfig
         if self == DB.Redis:
             from .redis.config import RedisConfig
             return RedisConfig
         if self == DB.MemoryDB:
             from .memorydb.config import MemoryDBConfig
             return MemoryDBConfig
         if self == DB.Chroma:
             from .chroma.config import ChromaConfig
             return ChromaConfig
         if self == DB.AWSOpenSearch:
             from .aws_opensearch.config import AWSOpenSearchConfig
             return AWSOpenSearchConfig
         if self == DB.AlloyDB:
             from .alloydb.config import AlloyDBConfig
             return AlloyDBConfig
-    def case_config_cls(self, index_type: IndexType | None = None) -> Type[DBCaseConfig]:
+        if self == DB.AliyunElasticsearch:
+            from .aliyun_elasticsearch.config import AliyunElasticsearchConfig
+            return AliyunElasticsearchConfig
+        if self == DB.AliyunOpenSearch:
+            from .aliyun_opensearch.config import AliyunOpenSearchConfig
+            return AliyunOpenSearchConfig
+        msg = f"Unknown DB: {self.name}"
+        raise ValueError(msg)
+    def case_config_cls(  # noqa: PLR0911
+        self,
+        index_type: IndexType | None = None,
+    ) -> type[DBCaseConfig]:
         if self == DB.Milvus:
             from .milvus.config import _milvus_case_config
             return _milvus_case_config.get(index_type)
         if self == DB.ZillizCloud:
             from .zilliz_cloud.config import AutoIndexConfig
             return AutoIndexConfig
         if self == DB.ElasticCloud:
             from .elastic_cloud.config import ElasticCloudIndexConfig
             return ElasticCloudIndexConfig
         if self == DB.QdrantCloud:
             from .qdrant_cloud.config import QdrantIndexConfig
             return QdrantIndexConfig
         if self == DB.WeaviateCloud:
             from .weaviate_cloud.config import WeaviateIndexConfig
             return WeaviateIndexConfig
         if self == DB.PgVector:
             from .pgvector.config import _pgvector_case_config
             return _pgvector_case_config.get(index_type)
         if self == DB.PgVectoRS:
             from .pgvecto_rs.config import _pgvecto_rs_case_config
             return _pgvecto_rs_case_config.get(index_type)
         if self == DB.AWSOpenSearch:
             from .aws_opensearch.config import AWSOpenSearchIndexConfig
             return AWSOpenSearchIndexConfig
         if self == DB.PgVectorScale:
             from .pgvectorscale.config import _pgvectorscale_case_config
             return _pgvectorscale_case_config.get(index_type)
         if self == DB.PgDiskANN:
             from .pgdiskann.config import _pgdiskann_case_config
             return _pgdiskann_case_config.get(index_type)
         if self == DB.AlloyDB:
             from .alloydb.config import _alloydb_case_config
             return _alloydb_case_config.get(index_type)
+        if self == DB.AliyunElasticsearch:
+            from .elastic_cloud.config import ElasticCloudIndexConfig
+            return ElasticCloudIndexConfig
+        if self == DB.AliyunOpenSearch:
+            from .aliyun_opensearch.config import AliyunOpenSearchIndexConfig
+            return AliyunOpenSearchIndexConfig
         # DB.Pinecone, DB.Chroma, DB.Redis
         return EmptyDBCaseConfig
 __all__ = [
-    "DB", "VectorDB", "DBConfig", "DBCaseConfig", "IndexType", "MetricType", "EmptyDBCaseConfig",
+    "DB",
+    "DBCaseConfig",
+    "DBConfig",
+    "EmptyDBCaseConfig",
+    "IndexType",
+    "MetricType",
+    "VectorDB",
 ]

vectordb_bench/backend/clients/aliyun_elasticsearch/aliyun_elasticsearch.py ADDED Viewed

@@ -0,0 +1,26 @@
+from ..elastic_cloud.config import ElasticCloudIndexConfig
+from ..elastic_cloud.elastic_cloud import ElasticCloud
+class AliyunElasticsearch(ElasticCloud):
+    def __init__(
+        self,
+        dim: int,
+        db_config: dict,
+        db_case_config: ElasticCloudIndexConfig,
+        indice: str = "vdb_bench_indice",  # must be lowercase
+        id_col_name: str = "id",
+        vector_col_name: str = "vector",
+        drop_old: bool = False,
+        **kwargs,
+    ):
+        super().__init__(
+            dim=dim,
+            db_config=db_config,
+            db_case_config=db_case_config,
+            indice=indice,
+            id_col_name=id_col_name,
+            vector_col_name=vector_col_name,
+            drop_old=drop_old,
+            **kwargs,
+        )

vectordb_bench/backend/clients/aliyun_elasticsearch/config.py ADDED Viewed

@@ -0,0 +1,18 @@
+from pydantic import BaseModel, SecretStr
+from ..api import DBConfig
+class AliyunElasticsearchConfig(DBConfig, BaseModel):
+    #: Protocol in use to connect to the node
+    scheme: str = "http"
+    host: str = ""
+    port: int = 9200
+    user: str = "elastic"
+    password: SecretStr
+    def to_dict(self) -> dict:
+        return {
+            "hosts": [{"scheme": self.scheme, "host": self.host, "port": self.port}],
+            "basic_auth": (self.user, self.password.get_secret_value()),
+        }

vectordb_bench/backend/clients/aliyun_opensearch/aliyun_opensearch.py ADDED Viewed

@@ -0,0 +1,345 @@
+import json
+import logging
+import time
+from contextlib import contextmanager
+from alibabacloud_ha3engine_vector import client, models
+from alibabacloud_ha3engine_vector.models import QueryRequest
+from alibabacloud_searchengine20211025 import models as searchengine_models
+from alibabacloud_searchengine20211025.client import Client as searchengineClient
+from alibabacloud_tea_openapi import models as open_api_models
+from ..api import MetricType, VectorDB
+from .config import AliyunOpenSearchIndexConfig
+log = logging.getLogger(__name__)
+ALIYUN_OPENSEARCH_MAX_SIZE_PER_BATCH = 2 * 1024 * 1024  # 2MB
+ALIYUN_OPENSEARCH_MAX_NUM_PER_BATCH = 100
+class AliyunOpenSearch(VectorDB):
+    def __init__(
+        self,
+        dim: int,
+        db_config: dict,
+        db_case_config: AliyunOpenSearchIndexConfig,
+        collection_name: str = "VectorDBBenchCollection",
+        drop_old: bool = False,
+        **kwargs,
+    ):
+        self.control_client = None
+        self.dim = dim
+        self.db_config = db_config
+        self.case_config = db_case_config
+        self.collection_name = collection_name
+        self.instance_id = db_config["host"].split(".")[0].replace("http://", "").replace("https://", "")
+        self._primary_field = "id"
+        self._scalar_field = "int_id"
+        self._vector_field = "vector"
+        self._index_name = "vector_idx"
+        self.batch_size = int(
+            min(
+                ALIYUN_OPENSEARCH_MAX_SIZE_PER_BATCH / (dim * 25),
+                ALIYUN_OPENSEARCH_MAX_NUM_PER_BATCH,
+            ),
+        )
+        log.info(f"Aliyun_OpenSearch client config: {self.db_config}")
+        control_config = open_api_models.Config(
+            access_key_id=self.db_config["ak"],
+            access_key_secret=self.db_config["sk"],
+            endpoint=self.db_config["control_host"],
+        )
+        self.control_client = searchengineClient(control_config)
+        if drop_old:
+            log.info(f"aliyun_OpenSearch client drop old index: {self.collection_name}")
+            if self._index_exists(self.control_client):
+                self._modify_index(self.control_client)
+            else:
+                self._create_index(self.control_client)
+    def _create_index(self, client: searchengineClient):
+        create_table_request = searchengine_models.CreateTableRequest()
+        create_table_request.name = self.collection_name
+        create_table_request.primary_key = self._primary_field
+        create_table_request.partition_count = 1
+        create_table_request.field_schema = {
+            self._primary_field: "INT64",
+            self._vector_field: "MULTI_FLOAT",
+            self._scalar_field: "INT64",
+        }
+        vector_index = searchengine_models.ModifyTableRequestVectorIndex()
+        vector_index.index_name = self._index_name
+        vector_index.dimension = self.dim
+        vector_index.distance_type = self.case_config.distance_type()
+        vector_index.vector_field = self._vector_field
+        vector_index.vector_index_type = "HNSW"
+        advance_params = searchengine_models.ModifyTableRequestVectorIndexAdvanceParams()
+        str_max_neighbor_count = f'"proxima.hnsw.builder.max_neighbor_count":{self.case_config.M}'
+        str_efc = f'"proxima.hnsw.builder.efconstruction":{self.case_config.ef_construction}'
+        str_enable_adsampling = '"proxima.hnsw.builder.enable_adsampling":true'
+        str_slack_pruning_factor = '"proxima.hnsw.builder.slack_pruning_factor":1.1'
+        str_thread_count = '"proxima.hnsw.builder.thread_count":16'
+        params = ",".join(
+            [
+                str_max_neighbor_count,
+                str_efc,
+                str_enable_adsampling,
+                str_slack_pruning_factor,
+                str_thread_count,
+            ],
+        )
+        advance_params.build_index_params = params
+        advance_params.search_index_params = (
+            '{"proxima.hnsw.searcher.ef":400,"proxima.hnsw.searcher.dynamic_termination.prob_threshold":0.7}'
+        )
+        vector_index.advance_params = advance_params
+        create_table_request.vector_index = [vector_index]
+        try:
+            response = client.create_table(self.instance_id, create_table_request)
+            log.info(f"create table success: {response.body}")
+        except Exception as error:
+            log.info(error.message)
+            log.info(error.data.get("Recommend"))
+            log.info(f"Failed to create index: error: {error!s}")
+            raise error from None
+        # check if index create success
+        self._active_index(client)
+    # check if index create success
+    def _active_index(self, client: searchengineClient) -> None:
+        retry_times = 0
+        while True:
+            time.sleep(10)
+            log.info(f"begin to {retry_times} times get table")
+            retry_times += 1
+            response = client.get_table(self.instance_id, self.collection_name)
+            if response.body.result.status == "IN_USE":
+                log.info(f"{self.collection_name} table begin to use.")
+                return
+    def _index_exists(self, client: searchengineClient) -> bool:
+        try:
+            client.get_table(self.instance_id, self.collection_name)
+        except Exception as err:
+            log.warning(f"get table from searchengine error, err={err}")
+            return False
+        else:
+            return True
+    # check if index build success, Insert the embeddings to the vector database after index build success
+    def _index_build_success(self, client: searchengineClient) -> None:
+        log.info("begin to check if table build success.")
+        time.sleep(50)
+        retry_times = 0
+        while True:
+            time.sleep(10)
+            log.info(f"begin to {retry_times} times get table fsm")
+            retry_times += 1
+            request = searchengine_models.ListTasksRequest()
+            request.start = (int(time.time()) - 3600) * 1000
+            request.end = int(time.time()) * 1000
+            response = client.list_tasks(self.instance_id, request)
+            fsms = response.body.result
+            cur_fsm = None
+            for fsm in fsms:
+                if fsm["type"] != "datasource_flow_fsm":
+                    continue
+                if self.collection_name not in fsm["fsmId"]:
+                    continue
+                cur_fsm = fsm
+                break
+            if cur_fsm is None:
+                log.warning("no build index fsm")
+                return
+            if cur_fsm["status"] == "success":
+                return
+    def _modify_index(self, client: searchengineClient) -> None:
+        # check if index create success
+        self._active_index(client)
+        modify_table_request = searchengine_models.ModifyTableRequest()
+        modify_table_request.partition_count = 1
+        modify_table_request.primary_key = self._primary_field
+        modify_table_request.field_schema = {
+            self._primary_field: "INT64",
+            self._vector_field: "MULTI_FLOAT",
+            self._scalar_field: "INT64",
+        }
+        vector_index = searchengine_models.ModifyTableRequestVectorIndex()
+        vector_index.index_name = self._index_name
+        vector_index.dimension = self.dim
+        vector_index.distance_type = self.case_config.distance_type()
+        vector_index.vector_field = self._vector_field
+        vector_index.vector_index_type = "HNSW"
+        advance_params = searchengine_models.ModifyTableRequestVectorIndexAdvanceParams()
+        str_max_neighbor_count = f'"proxima.hnsw.builder.max_neighbor_count":{self.case_config.M}'
+        str_efc = f'"proxima.hnsw.builder.efconstruction":{self.case_config.ef_construction}'
+        str_enable_adsampling = '"proxima.hnsw.builder.enable_adsampling":true'
+        str_slack_pruning_factor = '"proxima.hnsw.builder.slack_pruning_factor":1.1'
+        str_thread_count = '"proxima.hnsw.builder.thread_count":16'
+        params = ",".join(
+            [
+                str_max_neighbor_count,
+                str_efc,
+                str_enable_adsampling,
+                str_slack_pruning_factor,
+                str_thread_count,
+            ],
+        )
+        advance_params.build_index_params = params
+        advance_params.search_index_params = (
+            '{"proxima.hnsw.searcher.ef":400,"proxima.hnsw.searcher.dynamic_termination.prob_threshold":0.7}'
+        )
+        vector_index.advance_params = advance_params
+        modify_table_request.vector_index = [vector_index]
+        try:
+            response = client.modify_table(
+                self.instance_id,
+                self.collection_name,
+                modify_table_request,
+            )
+            log.info(f"modify table success: {response.body}")
+        except Exception as error:
+            log.info(error.message)
+            log.info(error.data.get("Recommend"))
+            log.info(f"Failed to modify index: error: {error!s}")
+            raise error from None
+        # check if modify index & delete data fsm success
+        self._index_build_success(client)
+    # get collection records total count
+    def _get_total_count(self):
+        try:
+            response = self.client.stats(self.collection_name)
+        except Exception as e:
+            log.warning(f"Error querying index: {e}")
+        else:
+            body = json.loads(response.body)
+            log.info(f"stats info: {response.body}")
+            if "result" in body and "totalDocCount" in body.get("result"):
+                return body.get("result").get("totalDocCount")
+            return 0
+    @contextmanager
+    def init(self) -> None:
+        """connect to aliyun opensearch"""
+        config = models.Config(
+            endpoint=self.db_config["host"],
+            protocol="http",
+            access_user_name=self.db_config["user"],
+            access_pass_word=self.db_config["password"],
+        )
+        self.client = client.Client(config)
+        yield
+        self.client = None
+        del self.client
+    def insert_embeddings(
+        self,
+        embeddings: list[list[float]],
+        metadata: list[int],
+        **kwargs,
+    ) -> tuple[int, Exception]:
+        """Insert the embeddings to the opensearch."""
+        assert self.client is not None, "should self.init() first"
+        assert len(embeddings) == len(metadata)
+        insert_count = 0
+        try:
+            for batch_start_offset in range(0, len(embeddings), self.batch_size):
+                batch_end_offset = min(batch_start_offset + self.batch_size, len(embeddings))
+                documents = []
+                for i in range(batch_start_offset, batch_end_offset):
+                    document_fields = {
+                        self._primary_field: metadata[i],
+                        self._vector_field: embeddings[i],
+                        self._scalar_field: metadata[i],
+                        "ops_build_channel": "inc",
+                    }
+                    document = {"fields": document_fields, "cmd": "add"}
+                    documents.append(document)
+                push_doc_req = models.PushDocumentsRequest({}, documents)
+                self.client.push_documents(
+                    self.collection_name,
+                    self._primary_field,
+                    push_doc_req,
+                )
+                insert_count += batch_end_offset - batch_start_offset
+        except Exception as e:
+            log.info(f"Failed to insert data: {e}")
+            return (insert_count, e)
+        return (insert_count, None)
+    def search_embedding(
+        self,
+        query: list[float],
+        k: int = 100,
+        filters: dict | None = None,
+    ) -> list[int]:
+        assert self.client is not None, "should self.init() first"
+        search_params = '{"proxima.hnsw.searcher.ef":' + str(self.case_config.ef_search) + "}"
+        os_filter = f"{self._scalar_field} {filters.get('metadata')}" if filters else ""
+        try:
+            request = QueryRequest(
+                table_name=self.collection_name,
+                vector=query,
+                top_k=k,
+                search_params=search_params,
+                filter=os_filter,
+            )
+            result = self.client.query(request)
+        except Exception as e:
+            log.info(f"Error querying index: {e}")
+            raise e from e
+        else:
+            res = json.loads(result.body)
+            return [one_res["id"] for one_res in res["result"]]
+    def need_normalize_cosine(self) -> bool:
+        """Wheather this database need to normalize dataset to support COSINE"""
+        if self.case_config.metric_type == MetricType.COSINE:
+            log.info("cosine dataset need normalize.")
+            return True
+        return False
+    def optimize(self):
+        pass
+    def optimize_with_size(self, data_size: int):
+        log.info(f"optimize count: {data_size}")
+        retry_times = 0
+        while True:
+            time.sleep(10)
+            log.info(f"begin to {retry_times} times get optimize table")
+            retry_times += 1
+            total_count = self._get_total_count()
+            # check if the data is inserted
+            if total_count == data_size:
+                log.info("optimize table finish.")
+                return
+    def ready_to_load(self):
+        """ready_to_load will be called before load in load cases."""

vectordb-bench 0.0.18__py3-none-any.whl → 0.0.20__py3-none-any.whl

vectordb-bench 0.0.18py3-none-any.whl → 0.0.20py3-none-any.whl