PyPI - vectordb-bench - Versions diffs - 0.0.19__py3-none-any.whl → 0.0.21__py3-none-any.whl - Mend

vectordb-bench 0.0.19py3-none-any.whl → 0.0.21py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (105) hide show

vectordb_bench/__init__.py +49 -24
vectordb_bench/__main__.py +4 -3
vectordb_bench/backend/assembler.py +12 -13
vectordb_bench/backend/cases.py +55 -45
vectordb_bench/backend/clients/__init__.py +85 -14
vectordb_bench/backend/clients/aliyun_elasticsearch/aliyun_elasticsearch.py +1 -2
vectordb_bench/backend/clients/aliyun_elasticsearch/config.py +3 -4
vectordb_bench/backend/clients/aliyun_opensearch/aliyun_opensearch.py +112 -77
vectordb_bench/backend/clients/aliyun_opensearch/config.py +6 -7
vectordb_bench/backend/clients/alloydb/alloydb.py +59 -84
vectordb_bench/backend/clients/alloydb/cli.py +51 -34
vectordb_bench/backend/clients/alloydb/config.py +30 -30
vectordb_bench/backend/clients/api.py +13 -24
vectordb_bench/backend/clients/aws_opensearch/aws_opensearch.py +50 -54
vectordb_bench/backend/clients/aws_opensearch/cli.py +4 -7
vectordb_bench/backend/clients/aws_opensearch/config.py +13 -9
vectordb_bench/backend/clients/aws_opensearch/run.py +69 -59
vectordb_bench/backend/clients/chroma/chroma.py +39 -40
vectordb_bench/backend/clients/chroma/config.py +4 -2
vectordb_bench/backend/clients/elastic_cloud/config.py +5 -5
vectordb_bench/backend/clients/elastic_cloud/elastic_cloud.py +24 -26
vectordb_bench/backend/clients/memorydb/cli.py +8 -8
vectordb_bench/backend/clients/memorydb/config.py +2 -2
vectordb_bench/backend/clients/memorydb/memorydb.py +67 -58
vectordb_bench/backend/clients/milvus/cli.py +41 -83
vectordb_bench/backend/clients/milvus/config.py +18 -8
vectordb_bench/backend/clients/milvus/milvus.py +19 -39
vectordb_bench/backend/clients/pgdiskann/cli.py +29 -22
vectordb_bench/backend/clients/pgdiskann/config.py +29 -26
vectordb_bench/backend/clients/pgdiskann/pgdiskann.py +56 -77
vectordb_bench/backend/clients/pgvecto_rs/cli.py +9 -11
vectordb_bench/backend/clients/pgvecto_rs/config.py +8 -14
vectordb_bench/backend/clients/pgvecto_rs/pgvecto_rs.py +34 -43
vectordb_bench/backend/clients/pgvector/cli.py +40 -31
vectordb_bench/backend/clients/pgvector/config.py +63 -73
vectordb_bench/backend/clients/pgvector/pgvector.py +98 -104
vectordb_bench/backend/clients/pgvectorscale/cli.py +38 -24
vectordb_bench/backend/clients/pgvectorscale/config.py +14 -15
vectordb_bench/backend/clients/pgvectorscale/pgvectorscale.py +39 -49
vectordb_bench/backend/clients/pinecone/config.py +1 -0
vectordb_bench/backend/clients/pinecone/pinecone.py +15 -25
vectordb_bench/backend/clients/qdrant_cloud/config.py +11 -10
vectordb_bench/backend/clients/qdrant_cloud/qdrant_cloud.py +41 -35
vectordb_bench/backend/clients/redis/cli.py +6 -12
vectordb_bench/backend/clients/redis/config.py +7 -5
vectordb_bench/backend/clients/redis/redis.py +95 -62
vectordb_bench/backend/clients/test/cli.py +2 -3
vectordb_bench/backend/clients/test/config.py +2 -2
vectordb_bench/backend/clients/test/test.py +5 -9
vectordb_bench/backend/clients/weaviate_cloud/cli.py +3 -4
vectordb_bench/backend/clients/weaviate_cloud/config.py +2 -2
vectordb_bench/backend/clients/weaviate_cloud/weaviate_cloud.py +37 -26
vectordb_bench/backend/clients/zilliz_cloud/cli.py +14 -11
vectordb_bench/backend/clients/zilliz_cloud/config.py +2 -4
vectordb_bench/backend/clients/zilliz_cloud/zilliz_cloud.py +1 -1
vectordb_bench/backend/data_source.py +18 -14
vectordb_bench/backend/dataset.py +47 -27
vectordb_bench/backend/result_collector.py +2 -3
vectordb_bench/backend/runner/__init__.py +4 -6
vectordb_bench/backend/runner/mp_runner.py +56 -23
vectordb_bench/backend/runner/rate_runner.py +30 -19
vectordb_bench/backend/runner/read_write_runner.py +46 -22
vectordb_bench/backend/runner/serial_runner.py +81 -46
vectordb_bench/backend/runner/util.py +4 -3
vectordb_bench/backend/task_runner.py +92 -92
vectordb_bench/backend/utils.py +17 -10
vectordb_bench/base.py +0 -1
vectordb_bench/cli/cli.py +65 -60
vectordb_bench/cli/vectordbbench.py +6 -7
vectordb_bench/frontend/components/check_results/charts.py +8 -19
vectordb_bench/frontend/components/check_results/data.py +4 -16
vectordb_bench/frontend/components/check_results/filters.py +8 -16
vectordb_bench/frontend/components/check_results/nav.py +4 -4
vectordb_bench/frontend/components/check_results/priceTable.py +1 -3
vectordb_bench/frontend/components/check_results/stPageConfig.py +2 -1
vectordb_bench/frontend/components/concurrent/charts.py +12 -12
vectordb_bench/frontend/components/custom/displayCustomCase.py +17 -11
vectordb_bench/frontend/components/custom/displaypPrams.py +4 -2
vectordb_bench/frontend/components/custom/getCustomConfig.py +1 -2
vectordb_bench/frontend/components/custom/initStyle.py +1 -1
vectordb_bench/frontend/components/get_results/saveAsImage.py +2 -0
vectordb_bench/frontend/components/run_test/caseSelector.py +3 -9
vectordb_bench/frontend/components/run_test/dbConfigSetting.py +1 -4
vectordb_bench/frontend/components/run_test/dbSelector.py +1 -1
vectordb_bench/frontend/components/run_test/generateTasks.py +8 -8
vectordb_bench/frontend/components/run_test/submitTask.py +14 -18
vectordb_bench/frontend/components/tables/data.py +3 -6
vectordb_bench/frontend/config/dbCaseConfigs.py +51 -84
vectordb_bench/frontend/pages/concurrent.py +3 -5
vectordb_bench/frontend/pages/custom.py +30 -9
vectordb_bench/frontend/pages/quries_per_dollar.py +3 -3
vectordb_bench/frontend/pages/run_test.py +3 -7
vectordb_bench/frontend/utils.py +1 -1
vectordb_bench/frontend/vdb_benchmark.py +4 -6
vectordb_bench/interface.py +45 -24
vectordb_bench/log_util.py +59 -64
vectordb_bench/metric.py +10 -11
vectordb_bench/models.py +26 -43
{vectordb_bench-0.0.19.dist-info → vectordb_bench-0.0.21.dist-info}/METADATA +22 -15
vectordb_bench-0.0.21.dist-info/RECORD +135 -0
{vectordb_bench-0.0.19.dist-info → vectordb_bench-0.0.21.dist-info}/WHEEL +1 -1
vectordb_bench-0.0.19.dist-info/RECORD +0 -135
{vectordb_bench-0.0.19.dist-info → vectordb_bench-0.0.21.dist-info}/LICENSE +0 -0
{vectordb_bench-0.0.19.dist-info → vectordb_bench-0.0.21.dist-info}/entry_points.txt +0 -0
{vectordb_bench-0.0.19.dist-info → vectordb_bench-0.0.21.dist-info}/top_level.txt +0 -0

vectordb_bench/backend/clients/api.py CHANGED Viewed

@@ -1,9 +1,8 @@
 from abc import ABC, abstractmethod
-from enum import Enum
-from typing import Any, Type
 from contextlib import contextmanager
+from enum import Enum
-from pydantic import BaseModel, validator, SecretStr
+from pydantic import BaseModel, SecretStr, validator
 class MetricType(str, Enum):
@@ -65,13 +64,10 @@ class DBConfig(ABC, BaseModel):
         raise NotImplementedError
     @validator("*")
-    def not_empty_field(cls, v, field):
-        if (
-            field.name in cls.common_short_configs()
-            or field.name in cls.common_long_configs()
-        ):
+    def not_empty_field(cls, v: any, field: any):
+        if field.name in cls.common_short_configs() or field.name in cls.common_long_configs():
             return v
-        if not v and isinstance(v, (str, SecretStr)):
+        if not v and isinstance(v, str | SecretStr):
             raise ValueError("Empty string!")
         return v
@@ -141,6 +137,13 @@ class VectorDB(ABC):
     @contextmanager
     def init(self) -> None:
         """create and destory connections to database.
+        Why contextmanager:
+            In multiprocessing search tasks, vectordbbench might init
+            totally hundreds of thousands of connections with DB server.
+            Too many connections may drain local FDs or server connection resources.
+            If the DB client doesn't have `close()` method, just set the object to None.
         Examples:
             >>> with self.init():
@@ -191,9 +194,8 @@ class VectorDB(ABC):
         """
         raise NotImplementedError
-    # TODO: remove
     @abstractmethod
-    def optimize(self):
+    def optimize(self, data_size: int | None = None):
         """optimize will be called between insertion and search in performance cases.
         Should be blocked until the vectorDB is ready to be tested on
@@ -203,16 +205,3 @@ class VectorDB(ABC):
         Optimize's execution time is limited, the limited time is based on cases.
         """
         raise NotImplementedError
-    def optimize_with_size(self, data_size: int):
-        self.optimize()
-    # TODO: remove
-    @abstractmethod
-    def ready_to_load(self):
-        """ready_to_load will be called before load in load cases.
-        Should be blocked until the vectorDB is ready to be tested on
-        heavy load cases.
-        """
-        raise NotImplementedError

vectordb_bench/backend/clients/aws_opensearch/aws_opensearch.py CHANGED Viewed

@@ -1,14 +1,18 @@
 import logging
-from contextlib import contextmanager
 import time
-from typing import Iterable, Type
-from ..api import VectorDB, DBCaseConfig, DBConfig, IndexType
-from .config import AWSOpenSearchConfig, AWSOpenSearchIndexConfig, AWSOS_Engine
+from collections.abc import Iterable
+from contextlib import contextmanager
 from opensearchpy import OpenSearch
-from opensearchpy.helpers import bulk
+from ..api import IndexType, VectorDB
+from .config import AWSOpenSearchConfig, AWSOpenSearchIndexConfig, AWSOS_Engine
 log = logging.getLogger(__name__)
+WAITING_FOR_REFRESH_SEC = 30
+WAITING_FOR_FORCE_MERGE_SEC = 30
 class AWSOpenSearch(VectorDB):
     def __init__(
@@ -17,7 +21,7 @@ class AWSOpenSearch(VectorDB):
         db_config: dict,
         db_case_config: AWSOpenSearchIndexConfig,
         index_name: str = "vdb_bench_index",  # must be lowercase
-        id_col_name: str = "id",
+        id_col_name: str = "_id",
         vector_col_name: str = "embedding",
         drop_old: bool = False,
         **kwargs,
@@ -27,9 +31,7 @@ class AWSOpenSearch(VectorDB):
         self.case_config = db_case_config
         self.index_name = index_name
         self.id_col_name = id_col_name
-        self.category_col_names = [
-            f"scalar-{categoryCount}" for categoryCount in [2, 5, 10, 100, 1000]
-        ]
+        self.category_col_names = [f"scalar-{categoryCount}" for categoryCount in [2, 5, 10, 100, 1000]]
         self.vector_col_name = vector_col_name
         log.info(f"AWS_OpenSearch client config: {self.db_config}")
@@ -46,39 +48,32 @@ class AWSOpenSearch(VectorDB):
         return AWSOpenSearchConfig
     @classmethod
-    def case_config_cls(
-        cls, index_type: IndexType | None = None
-    ) -> AWSOpenSearchIndexConfig:
+    def case_config_cls(cls, index_type: IndexType | None = None) -> AWSOpenSearchIndexConfig:
         return AWSOpenSearchIndexConfig
     def _create_index(self, client: OpenSearch):
         settings = {
             "index": {
                 "knn": True,
-                # "number_of_shards": 5,
-                # "refresh_interval": "600s",
-            }
+            },
         }
         mappings = {
             "properties": {
-                self.id_col_name: {"type": "integer"},
-                **{
-                    categoryCol: {"type": "keyword"}
-                    for categoryCol in self.category_col_names
-                },
+                **{categoryCol: {"type": "keyword"} for categoryCol in self.category_col_names},
                 self.vector_col_name: {
                     "type": "knn_vector",
                     "dimension": self.dim,
                     "method": self.case_config.index_param(),
                 },
-            }
+            },
         }
         try:
             client.indices.create(
-                index=self.index_name, body=dict(settings=settings, mappings=mappings)
+                index=self.index_name,
+                body={"settings": settings, "mappings": mappings},
             )
         except Exception as e:
-            log.warning(f"Failed to create index: {self.index_name} error: {str(e)}")
+            log.warning(f"Failed to create index: {self.index_name} error: {e!s}")
             raise e from None
     @contextmanager
@@ -87,7 +82,6 @@ class AWSOpenSearch(VectorDB):
         self.client = OpenSearch(**self.db_config)
         yield
-        # self.client.transport.close()
         self.client = None
         del self.client
@@ -102,16 +96,20 @@ class AWSOpenSearch(VectorDB):
         insert_data = []
         for i in range(len(embeddings)):
-            insert_data.append({"index": {"_index": self.index_name, "_id": metadata[i]}})
+            insert_data.append(
+                {"index": {"_index": self.index_name, self.id_col_name: metadata[i]}},
+            )
             insert_data.append({self.vector_col_name: embeddings[i]})
         try:
             resp = self.client.bulk(insert_data)
             log.info(f"AWS_OpenSearch adding documents: {len(resp['items'])}")
             resp = self.client.indices.stats(self.index_name)
-            log.info(f"Total document count in index: {resp['_all']['primaries']['indexing']['index_total']}")
+            log.info(
+                f"Total document count in index: {resp['_all']['primaries']['indexing']['index_total']}",
+            )
             return (len(embeddings), None)
         except Exception as e:
-            log.warning(f"Failed to insert data: {self.index_name} error: {str(e)}")
+            log.warning(f"Failed to insert data: {self.index_name} error: {e!s}")
             time.sleep(10)
             return self.insert_embeddings(embeddings, metadata)
@@ -136,23 +134,26 @@ class AWSOpenSearch(VectorDB):
         body = {
             "size": k,
             "query": {"knn": {self.vector_col_name: {"vector": query, "k": k}}},
-            **({"filter": {"range": {self.id_col_name: {"gt": filters["id"]}}}} if filters else {})
+            **({"filter": {"range": {self.id_col_name: {"gt": filters["id"]}}}} if filters else {}),
         }
         try:
-            resp = self.client.search(index=self.index_name, body=body,size=k,_source=False,docvalue_fields=[self.id_col_name],stored_fields="_none_",filter_path=[f"hits.hits.fields.{self.id_col_name}"],)
-            log.info(f'Search took: {resp["took"]}')
-            log.info(f'Search shards: {resp["_shards"]}')
-            log.info(f'Search hits total: {resp["hits"]["total"]}')
-            result = [h["fields"][self.id_col_name][0] for h in resp["hits"]["hits"]]
-            #result = [int(d["_id"]) for d in resp["hits"]["hits"]]
-            # log.info(f'success! length={len(res)}')
-            return result
+            resp = self.client.search(
+                index=self.index_name,
+                body=body,
+                size=k,
+                _source=False,
+                docvalue_fields=[self.id_col_name],
+                stored_fields="_none_",
+            )
+            log.info(f"Search took: {resp['took']}")
+            log.info(f"Search shards: {resp['_shards']}")
+            log.info(f"Search hits total: {resp['hits']['total']}")
+            return [int(h["fields"][self.id_col_name][0]) for h in resp["hits"]["hits"]]
         except Exception as e:
-            log.warning(f"Failed to search: {self.index_name} error: {str(e)}")
+            log.warning(f"Failed to search: {self.index_name} error: {e!s}")
             raise e from None
-    def optimize(self):
+    def optimize(self, data_size: int | None = None):
         """optimize will be called between insertion and search in performance cases."""
         # Call refresh first to ensure that all segments are created
         self._refresh_index()
@@ -164,37 +165,32 @@ class AWSOpenSearch(VectorDB):
     def _refresh_index(self):
         log.debug(f"Starting refresh for index {self.index_name}")
-        SECONDS_WAITING_FOR_REFRESH_API_CALL_SEC = 30
         while True:
             try:
-                log.info(f"Starting the Refresh Index..")
+                log.info("Starting the Refresh Index..")
                 self.client.indices.refresh(index=self.index_name)
                 break
             except Exception as e:
                 log.info(
-                    f"Refresh errored out. Sleeping for {SECONDS_WAITING_FOR_REFRESH_API_CALL_SEC} sec and then Retrying : {e}")
-                time.sleep(SECONDS_WAITING_FOR_REFRESH_API_CALL_SEC)
+                    f"Refresh errored out. Sleeping for {WAITING_FOR_REFRESH_SEC} sec and then Retrying : {e}",
+                )
+                time.sleep(WAITING_FOR_REFRESH_SEC)
                 continue
         log.debug(f"Completed refresh for index {self.index_name}")
     def _do_force_merge(self):
         log.debug(f"Starting force merge for index {self.index_name}")
-        force_merge_endpoint = f'/{self.index_name}/_forcemerge?max_num_segments=1&wait_for_completion=false'
-        force_merge_task_id = self.client.transport.perform_request('POST', force_merge_endpoint)['task']
-        SECONDS_WAITING_FOR_FORCE_MERGE_API_CALL_SEC = 30
+        force_merge_endpoint = f"/{self.index_name}/_forcemerge?max_num_segments=1&wait_for_completion=false"
+        force_merge_task_id = self.client.transport.perform_request("POST", force_merge_endpoint)["task"]
         while True:
-            time.sleep(SECONDS_WAITING_FOR_FORCE_MERGE_API_CALL_SEC)
+            time.sleep(WAITING_FOR_FORCE_MERGE_SEC)
             task_status = self.client.tasks.get(task_id=force_merge_task_id)
-            if task_status['completed']:
+            if task_status["completed"]:
                 break
         log.debug(f"Completed force merge for index {self.index_name}")
     def _load_graphs_to_memory(self):
         if self.case_config.engine != AWSOS_Engine.lucene:
             log.info("Calling warmup API to load graphs into memory")
-            warmup_endpoint = f'/_plugins/_knn/warmup/{self.index_name}'
-            self.client.transport.perform_request('GET', warmup_endpoint)
-    def ready_to_load(self):
-        """ready_to_load will be called before load in load cases."""
-        pass
+            warmup_endpoint = f"/_plugins/_knn/warmup/{self.index_name}"
+            self.client.transport.perform_request("GET", warmup_endpoint)

vectordb_bench/backend/clients/aws_opensearch/cli.py CHANGED Viewed

@@ -14,22 +14,20 @@ from .. import DB
 class AWSOpenSearchTypedDict(TypedDict):
-    host: Annotated[
-        str, click.option("--host", type=str, help="Db host", required=True)
-    ]
+    host: Annotated[str, click.option("--host", type=str, help="Db host", required=True)]
     port: Annotated[int, click.option("--port", type=int, default=443, help="Db Port")]
     user: Annotated[str, click.option("--user", type=str, default="admin", help="Db User")]
     password: Annotated[str, click.option("--password", type=str, help="Db password")]
-class AWSOpenSearchHNSWTypedDict(CommonTypedDict, AWSOpenSearchTypedDict, HNSWFlavor2):
-    ...
+class AWSOpenSearchHNSWTypedDict(CommonTypedDict, AWSOpenSearchTypedDict, HNSWFlavor2): ...
 @cli.command()
 @click_parameter_decorators_from_typed_dict(AWSOpenSearchHNSWTypedDict)
 def AWSOpenSearch(**parameters: Unpack[AWSOpenSearchHNSWTypedDict]):
     from .config import AWSOpenSearchConfig, AWSOpenSearchIndexConfig
     run(
         db=DB.AWSOpenSearch,
         db_config=AWSOpenSearchConfig(
@@ -38,7 +36,6 @@ def AWSOpenSearch(**parameters: Unpack[AWSOpenSearchHNSWTypedDict]):
             user=parameters["user"],
             password=SecretStr(parameters["password"]),
         ),
-        db_case_config=AWSOpenSearchIndexConfig(
-        ),
+        db_case_config=AWSOpenSearchIndexConfig(),
         **parameters,
     )

vectordb_bench/backend/clients/aws_opensearch/config.py CHANGED Viewed

@@ -1,10 +1,13 @@
 import logging
 from enum import Enum
-from pydantic import SecretStr, BaseModel
-from ..api import DBConfig, DBCaseConfig, MetricType, IndexType
+from pydantic import BaseModel, SecretStr
+from ..api import DBCaseConfig, DBConfig, MetricType
 log = logging.getLogger(__name__)
 class AWSOpenSearchConfig(DBConfig, BaseModel):
     host: str = ""
     port: int = 443
@@ -13,7 +16,7 @@ class AWSOpenSearchConfig(DBConfig, BaseModel):
     def to_dict(self) -> dict:
         return {
-            "hosts": [{'host': self.host, 'port': self.port}],
+            "hosts": [{"host": self.host, "port": self.port}],
             "http_auth": (self.user, self.password.get_secret_value()),
             "use_ssl": True,
             "http_compress": True,
@@ -40,25 +43,26 @@ class AWSOpenSearchIndexConfig(BaseModel, DBCaseConfig):
     def parse_metric(self) -> str:
         if self.metric_type == MetricType.IP:
             return "innerproduct"
-        elif self.metric_type == MetricType.COSINE:
+        if self.metric_type == MetricType.COSINE:
             if self.engine == AWSOS_Engine.faiss:
-                log.info(f"Using metric type as innerproduct because faiss doesn't support cosine as metric type for Opensearch")
+                log.info(
+                    "Using innerproduct because faiss doesn't support cosine as metric type for Opensearch",
+                )
                 return "innerproduct"
             return "cosinesimil"
         return "l2"
     def index_param(self) -> dict:
-        params = {
+        return {
             "name": "hnsw",
             "space_type": self.parse_metric(),
             "engine": self.engine.value,
             "parameters": {
                 "ef_construction": self.efConstruction,
                 "m": self.M,
-                "ef_search": self.efSearch
-            }
+                "ef_search": self.efSearch,
+            },
         }
-        return params
     def search_param(self) -> dict:
         return {}

vectordb_bench/backend/clients/aws_opensearch/run.py CHANGED Viewed

@@ -1,12 +1,16 @@
-import time, random
+import logging
+import random
+import time
 from opensearchpy import OpenSearch
-from opensearch_dsl import Search, Document, Text, Keyword
-_HOST = 'xxxxxx.us-west-2.es.amazonaws.com'
+log = logging.getLogger(__name__)
+_HOST = "xxxxxx.us-west-2.es.amazonaws.com"
 _PORT = 443
-_AUTH = ('admin', 'xxxxxx') # For testing only. Don't store credentials in code.
+_AUTH = ("admin", "xxxxxx")  # For testing only. Don't store credentials in code.
-_INDEX_NAME = 'my-dsl-index'
+_INDEX_NAME = "my-dsl-index"
 _BATCH = 100
 _ROWS = 100
 _DIM = 128
@@ -14,25 +18,24 @@ _TOPK = 10
 def create_client():
-    client = OpenSearch(
-        hosts=[{'host': _HOST, 'port': _PORT}],
-        http_compress=True, # enables gzip compression for request bodies
+    return OpenSearch(
+        hosts=[{"host": _HOST, "port": _PORT}],
+        http_compress=True,  # enables gzip compression for request bodies
         http_auth=_AUTH,
         use_ssl=True,
         verify_certs=True,
         ssl_assert_hostname=False,
         ssl_show_warn=False,
     )
-    return client
-def create_index(client, index_name):
+def create_index(client: OpenSearch, index_name: str):
     settings = {
         "index": {
             "knn": True,
             "number_of_shards": 1,
             "refresh_interval": "5s",
-        }
+        },
     }
     mappings = {
         "properties": {
@@ -46,41 +49,46 @@ def create_index(client, index_name):
                     "parameters": {
                         "ef_construction": 256,
                         "m": 16,
-                    }
-                }
-            }
-        }
+                    },
+                },
+            },
+        },
     }
-    response = client.indices.create(index=index_name, body=dict(settings=settings, mappings=mappings))
-    print('\nCreating index:')
-    print(response)
+    response = client.indices.create(
+        index=index_name,
+        body={"settings": settings, "mappings": mappings},
+    )
+    log.info("\nCreating index:")
+    log.info(response)
-def delete_index(client, index_name):
+def delete_index(client: OpenSearch, index_name: str):
     response = client.indices.delete(index=index_name)
-    print('\nDeleting index:')
-    print(response)
+    log.info("\nDeleting index:")
+    log.info(response)
-def bulk_insert(client, index_name):
+def bulk_insert(client: OpenSearch, index_name: str):
     # Perform bulk operations
-    ids = [i for i in range(_ROWS)]
+    ids = list(range(_ROWS))
     vec = [[random.random() for _ in range(_DIM)] for _ in range(_ROWS)]
     docs = []
     for i in range(0, _ROWS, _BATCH):
         docs.clear()
-        for j in range(0, _BATCH):
-            docs.append({"index": {"_index": index_name, "_id": ids[i+j]}})
-            docs.append({"embedding": vec[i+j]})
+        for j in range(_BATCH):
+            docs.append({"index": {"_index": index_name, "_id": ids[i + j]}})
+            docs.append({"embedding": vec[i + j]})
         response = client.bulk(docs)
-        print('\nAdding documents:', len(response['items']), response['errors'])
+        log.info(f"Adding documents: {len(response['items'])}, {response['errors']}")
         response = client.indices.stats(index_name)
-        print('\nTotal document count in index:', response['_all']['primaries']['indexing']['index_total'])
+        log.info(
+            f'Total document count in index: { response["_all"]["primaries"]["indexing"]["index_total"] }',
+        )
-def search(client, index_name):
+def search(client: OpenSearch, index_name: str):
     # Search for the document.
     search_body = {
         "size": _TOPK,
@@ -89,53 +97,55 @@ def search(client, index_name):
                 "embedding": {
                     "vector": [random.random() for _ in range(_DIM)],
                     "k": _TOPK,
-                }
-            }
-        }
+                },
+            },
+        },
     }
     while True:
         response = client.search(index=index_name, body=search_body)
-        print(f'\nSearch took: {response["took"]}')
-        print(f'\nSearch shards: {response["_shards"]}')
-        print(f'\nSearch hits total: {response["hits"]["total"]}')
+        log.info(f'\nSearch took: {response["took"]}')
+        log.info(f'\nSearch shards: {response["_shards"]}')
+        log.info(f'\nSearch hits total: {response["hits"]["total"]}')
         result = response["hits"]["hits"]
         if len(result) != 0:
-            print('\nSearch results:')
+            log.info("\nSearch results:")
             for hit in response["hits"]["hits"]:
-                print(hit["_id"], hit["_score"])
+                log.info(hit["_id"], hit["_score"])
             break
-        else:
-            print('\nSearch not ready, sleep 1s')
-            time.sleep(1)
-def optimize_index(client, index_name):
-    print(f"Starting force merge for index {index_name}")
-    force_merge_endpoint = f'/{index_name}/_forcemerge?max_num_segments=1&wait_for_completion=false'
-    force_merge_task_id = client.transport.perform_request('POST', force_merge_endpoint)['task']
-    SECONDS_WAITING_FOR_FORCE_MERGE_API_CALL_SEC = 30
+        log.info("\nSearch not ready, sleep 1s")
+        time.sleep(1)
+SECONDS_WAITING_FOR_FORCE_MERGE_API_CALL_SEC = 30
+WAITINT_FOR_REFRESH_SEC = 30
+def optimize_index(client: OpenSearch, index_name: str):
+    log.info(f"Starting force merge for index {index_name}")
+    force_merge_endpoint = f"/{index_name}/_forcemerge?max_num_segments=1&wait_for_completion=false"
+    force_merge_task_id = client.transport.perform_request("POST", force_merge_endpoint)["task"]
     while True:
         time.sleep(SECONDS_WAITING_FOR_FORCE_MERGE_API_CALL_SEC)
         task_status = client.tasks.get(task_id=force_merge_task_id)
-        if task_status['completed']:
+        if task_status["completed"]:
             break
-    print(f"Completed force merge for index {index_name}")
+    log.info(f"Completed force merge for index {index_name}")
-def refresh_index(client, index_name):
-    print(f"Starting refresh for index {index_name}")
-    SECONDS_WAITING_FOR_REFRESH_API_CALL_SEC = 30
+def refresh_index(client: OpenSearch, index_name: str):
+    log.info(f"Starting refresh for index {index_name}")
     while True:
         try:
-            print(f"Starting the Refresh Index..")
+            log.info("Starting the Refresh Index..")
             client.indices.refresh(index=index_name)
             break
         except Exception as e:
-            print(
-                f"Refresh errored out. Sleeping for {SECONDS_WAITING_FOR_REFRESH_API_CALL_SEC} sec and then Retrying : {e}")
-            time.sleep(SECONDS_WAITING_FOR_REFRESH_API_CALL_SEC)
+            log.info(
+                f"Refresh errored out. Sleeping for {WAITINT_FOR_REFRESH_SEC} sec and then Retrying : {e}",
+            )
+            time.sleep(WAITINT_FOR_REFRESH_SEC)
             continue
-    print(f"Completed refresh for index {index_name}")
+    log.info(f"Completed refresh for index {index_name}")
 def main():
@@ -148,9 +158,9 @@ def main():
         search(client, _INDEX_NAME)
         delete_index(client, _INDEX_NAME)
     except Exception as e:
-        print(e)
+        log.info(e)
         delete_index(client, _INDEX_NAME)
-if __name__ == '__main__':
+if __name__ == "__main__":
     main()

vectordb-bench 0.0.19__py3-none-any.whl → 0.0.21__py3-none-any.whl

vectordb-bench 0.0.19py3-none-any.whl → 0.0.21py3-none-any.whl