PyPI - vectordb-bench - Versions diffs - 0.0.12__tar.gz → 0.0.14__tar.gz - Mend

vectordb-bench 0.0.12tar.gz → 0.0.14tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (157) hide show

{vectordb_bench-0.0.12 → vectordb_bench-0.0.14}/.gitignore RENAMED Viewed

@@ -9,3 +9,4 @@ __MACOSX
 build/
 venv/
 .idea/
+results/

{vectordb_bench-0.0.12 → vectordb_bench-0.0.14}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: vectordb-bench
-Version: 0.0.12
+Version: 0.0.14
 Summary: VectorDBBench is not just an offering of benchmark results for mainstream vector databases and cloud services, it's your go-to tool for the ultimate performance and cost-effectiveness comparison. Designed with ease-of-use in mind, VectorDBBench is devised to help users, even non-professionals, reproduce results or test new systems, making the hunt for the optimal choice amongst a plethora of cloud services and open-source vector databases a breeze.
 Author-email: XuanYang-cn <xuan.yang@zilliz.com>
 Project-URL: repository, https://github.com/zilliztech/VectorDBBench
@@ -36,10 +36,10 @@ Requires-Dist: pinecone-client; extra == "all"
 Requires-Dist: weaviate-client; extra == "all"
 Requires-Dist: elasticsearch; extra == "all"
 Requires-Dist: pgvector; extra == "all"
+Requires-Dist: pgvecto_rs[psycopg3]>=0.2.2; extra == "all"
 Requires-Dist: sqlalchemy; extra == "all"
 Requires-Dist: redis; extra == "all"
 Requires-Dist: chromadb; extra == "all"
-Requires-Dist: psycopg2; extra == "all"
 Requires-Dist: psycopg; extra == "all"
 Requires-Dist: psycopg-binary; extra == "all"
 Requires-Dist: opensearch-dsl==2.1.0; extra == "all"
@@ -56,10 +56,16 @@ Provides-Extra: pgvector
 Requires-Dist: psycopg; extra == "pgvector"
 Requires-Dist: psycopg-binary; extra == "pgvector"
 Requires-Dist: pgvector; extra == "pgvector"
+Provides-Extra: pgvectorscale
+Requires-Dist: psycopg; extra == "pgvectorscale"
+Requires-Dist: psycopg-binary; extra == "pgvectorscale"
+Requires-Dist: pgvector; extra == "pgvectorscale"
 Provides-Extra: pgvecto-rs
-Requires-Dist: psycopg2; extra == "pgvecto-rs"
+Requires-Dist: pgvecto_rs[psycopg3]>=0.2.2; extra == "pgvecto-rs"
 Provides-Extra: redis
 Requires-Dist: redis; extra == "redis"
+Provides-Extra: memorydb
+Requires-Dist: memorydb; extra == "memorydb"
 Provides-Extra: chromadb
 Requires-Dist: chromadb; extra == "chromadb"
 Provides-Extra: awsopensearch
@@ -105,7 +111,9 @@ All the database client supported
 | elastic                  | `pip install vectordb-bench[elastic]`       |
 | pgvector                 | `pip install vectordb-bench[pgvector]`      |
 | pgvecto.rs               | `pip install vectordb-bench[pgvecto_rs]`    |
+| pgvectorscale            | `pip install vectordb-bench[pgvectorscale]` |
 | redis                    | `pip install vectordb-bench[redis]`         |
+| memorydb                 | `pip install vectordb-bench[memorydb]`      |
 | chromadb                 | `pip install vectordb-bench[chromadb]`      |
 | awsopensearch            | `pip install vectordb-bench[awsopensearch]` |
@@ -183,6 +191,29 @@ Options:
   --m INTEGER                     hnsw m
   --ef-construction INTEGER       hnsw ef-construction
   --ef-search INTEGER             hnsw ef-search
+  --quantization-type [none|halfvec]
+                                  quantization type for vectors
+  --custom-case-name TEXT         Custom case name i.e. PerformanceCase1536D50K
+  --custom-case-description TEXT  Custom name description
+  --custom-case-load-timeout INTEGER
+                                  Custom case load timeout [default: 36000]
+  --custom-case-optimize-timeout INTEGER
+                                  Custom case optimize timeout [default: 36000]
+  --custom-dataset-name TEXT
+                                  Dataset name i.e OpenAI
+  --custom-dataset-dir TEXT       Dataset directory i.e. openai_medium_500k
+  --custom-dataset-size INTEGER   Dataset size i.e. 500000
+  --custom-dataset-dim INTEGER    Dataset dimension
+  --custom-dataset-metric-type TEXT
+                                  Dataset distance metric [default: COSINE]
+  --custom-dataset-file-count INTEGER
+                                  Dataset file count
+  --custom-dataset-use-shuffled / --skip-custom-dataset-use-shuffled
+                                  Use shuffled custom dataset or skip  [default: custom-dataset-
+                                  use-shuffled]
+  --custom-dataset-with-gt / --skip-custom-dataset-with-gt
+                                  Custom dataset with ground truth or skip  [default: custom-dataset-
+                                  with-gt]
   --help                          Show this message and exit.
 ```
 #### Using a configuration file.
@@ -527,6 +558,8 @@ def ZillizAutoIndex(**parameters: Unpack[ZillizTypedDict]):
    3. Update db_config and db_case_config to match client requirements
    4. Continue to add new functions for each index config.
    5. Import the client cli module and command to vectordb_bench/cli/vectordbbench.py (for databases with multiple commands (index configs), this only needs to be done for one command)
+   6. Import the `get_custom_case_config` function from `vectordb_bench/cli/cli.py` and use it to add a new key `custom_case` to the `parameters` variable within the command.
 > cli modules with multiple index configs:
 > - pgvector: vectordb_bench/backend/clients/pgvector/cli.py

{vectordb_bench-0.0.12 → vectordb_bench-0.0.14}/README.md RENAMED Viewed

@@ -37,7 +37,9 @@ All the database client supported
 | elastic                  | `pip install vectordb-bench[elastic]`       |
 | pgvector                 | `pip install vectordb-bench[pgvector]`      |
 | pgvecto.rs               | `pip install vectordb-bench[pgvecto_rs]`    |
+| pgvectorscale            | `pip install vectordb-bench[pgvectorscale]` |
 | redis                    | `pip install vectordb-bench[redis]`         |
+| memorydb                 | `pip install vectordb-bench[memorydb]`      |
 | chromadb                 | `pip install vectordb-bench[chromadb]`      |
 | awsopensearch            | `pip install vectordb-bench[awsopensearch]` |
@@ -115,6 +117,29 @@ Options:
   --m INTEGER                     hnsw m
   --ef-construction INTEGER       hnsw ef-construction
   --ef-search INTEGER             hnsw ef-search
+  --quantization-type [none|halfvec]
+                                  quantization type for vectors
+  --custom-case-name TEXT         Custom case name i.e. PerformanceCase1536D50K
+  --custom-case-description TEXT  Custom name description
+  --custom-case-load-timeout INTEGER
+                                  Custom case load timeout [default: 36000]
+  --custom-case-optimize-timeout INTEGER
+                                  Custom case optimize timeout [default: 36000]
+  --custom-dataset-name TEXT
+                                  Dataset name i.e OpenAI
+  --custom-dataset-dir TEXT       Dataset directory i.e. openai_medium_500k
+  --custom-dataset-size INTEGER   Dataset size i.e. 500000
+  --custom-dataset-dim INTEGER    Dataset dimension
+  --custom-dataset-metric-type TEXT
+                                  Dataset distance metric [default: COSINE]
+  --custom-dataset-file-count INTEGER
+                                  Dataset file count
+  --custom-dataset-use-shuffled / --skip-custom-dataset-use-shuffled
+                                  Use shuffled custom dataset or skip  [default: custom-dataset-
+                                  use-shuffled]
+  --custom-dataset-with-gt / --skip-custom-dataset-with-gt
+                                  Custom dataset with ground truth or skip  [default: custom-dataset-
+                                  with-gt]
   --help                          Show this message and exit.
 ```
 #### Using a configuration file.
@@ -459,6 +484,8 @@ def ZillizAutoIndex(**parameters: Unpack[ZillizTypedDict]):
    3. Update db_config and db_case_config to match client requirements
    4. Continue to add new functions for each index config.
    5. Import the client cli module and command to vectordb_bench/cli/vectordbbench.py (for databases with multiple commands (index configs), this only needs to be done for one command)
+   6. Import the `get_custom_case_config` function from `vectordb_bench/cli/cli.py` and use it to add a new key `custom_case` to the `parameters` variable within the command.
 > cli modules with multiple index configs:
 > - pgvector: vectordb_bench/backend/clients/pgvector/cli.py

{vectordb_bench-0.0.12 → vectordb_bench-0.0.14}/install/requirements_py3.11.txt RENAMED Viewed

@@ -5,6 +5,7 @@ pinecone-client
 weaviate-client
 elasticsearch
 pgvector
+pgvecto_rs[psycopg3]>=0.2.1
 sqlalchemy
 redis
 chromadb

{vectordb_bench-0.0.12 → vectordb_bench-0.0.14}/pyproject.toml RENAMED Viewed

@@ -56,10 +56,10 @@ all = [
     "weaviate-client",
     "elasticsearch",
     "pgvector",
+    "pgvecto_rs[psycopg3]>=0.2.2",
     "sqlalchemy",
     "redis",
     "chromadb",
-    "psycopg2",
     "psycopg",
     "psycopg-binary",
     "opensearch-dsl==2.1.0",
@@ -71,8 +71,10 @@ pinecone = [ "pinecone-client" ]
 weaviate = [ "weaviate-client" ]
 elastic = [ "elasticsearch" ]
 pgvector = [ "psycopg", "psycopg-binary", "pgvector" ]
-pgvecto_rs = [ "psycopg2" ]
+pgvectorscale = [ "psycopg", "psycopg-binary", "pgvector" ]
+pgvecto_rs = [ "pgvecto_rs[psycopg3]>=0.2.2" ]
 redis = [ "redis" ]
+memorydb = [ "memorydb" ]
 chromadb = [ "chromadb" ]
 awsopensearch = [ "awsopensearch" ]
 zilliz_cloud = []

{vectordb_bench-0.0.12 → vectordb_bench-0.0.14}/vectordb_bench/backend/clients/__init__.py RENAMED Viewed

@@ -30,7 +30,9 @@ class DB(Enum):
     WeaviateCloud = "WeaviateCloud"
     PgVector = "PgVector"
     PgVectoRS = "PgVectoRS"
+    PgVectorScale = "PgVectorScale"
     Redis = "Redis"
+    MemoryDB = "MemoryDB"
     Chroma = "Chroma"
     AWSOpenSearch = "OpenSearch"
     Test = "test"
@@ -70,10 +72,18 @@ class DB(Enum):
         if self == DB.PgVectoRS:
             from .pgvecto_rs.pgvecto_rs import PgVectoRS
             return PgVectoRS
+        if self == DB.PgVectorScale:
+            from .pgvectorscale.pgvectorscale import PgVectorScale
+            return PgVectorScale
         if self == DB.Redis:
             from .redis.redis import Redis
             return Redis
+        if self == DB.MemoryDB:
+            from .memorydb.memorydb import MemoryDB
+            return MemoryDB
         if self == DB.Chroma:
             from .chroma.chroma import ChromaClient
@@ -118,9 +128,17 @@ class DB(Enum):
             from .pgvecto_rs.config import PgVectoRSConfig
             return PgVectoRSConfig
+        if self == DB.PgVectorScale:
+            from .pgvectorscale.config import PgVectorScaleConfig
+            return PgVectorScaleConfig
         if self == DB.Redis:
             from .redis.config import RedisConfig
             return RedisConfig
+        if self == DB.MemoryDB:
+            from .memorydb.config import MemoryDBConfig
+            return MemoryDBConfig
         if self == DB.Chroma:
             from .chroma.config import ChromaConfig
@@ -163,6 +181,10 @@ class DB(Enum):
             from .aws_opensearch.config import AWSOpenSearchIndexConfig
             return AWSOpenSearchIndexConfig
+        if self == DB.PgVectorScale:
+            from .pgvectorscale.config import _pgvectorscale_case_config
+            return _pgvectorscale_case_config.get(index_type)
         # DB.Pinecone, DB.Chroma, DB.Redis
         return EmptyDBCaseConfig

{vectordb_bench-0.0.12 → vectordb_bench-0.0.14}/vectordb_bench/backend/clients/api.py RENAMED Viewed

@@ -15,6 +15,7 @@ class MetricType(str, Enum):
 class IndexType(str, Enum):
     HNSW = "HNSW"
     DISKANN = "DISKANN"
+    STREAMING_DISKANN = "DISKANN"
     IVFFlat = "IVF_FLAT"
     IVFSQ8 = "IVF_SQ8"
     Flat = "FLAT"
@@ -38,6 +39,22 @@ class DBConfig(ABC, BaseModel):
     """
     db_label: str = ""
+    version: str = ""
+    note: str = ""
+    @staticmethod
+    def common_short_configs() -> list[str]:
+        """
+        short input, such as `db_label`, `version`
+        """
+        return ["version", "db_label"]
+    @staticmethod
+    def common_long_configs() -> list[str]:
+        """
+        long input, such as `note`
+        """
+        return ["note"]
     @abstractmethod
     def to_dict(self) -> dict:
@@ -45,7 +62,10 @@ class DBConfig(ABC, BaseModel):
     @validator("*")
     def not_empty_field(cls, v, field):
-        if field.name == "db_label":
+        if (
+            field.name in cls.common_short_configs()
+            or field.name in cls.common_long_configs()
+        ):
             return v
         if not v and isinstance(v, (str, SecretStr)):
             raise ValueError("Empty string!")

{vectordb_bench-0.0.12 → vectordb_bench-0.0.14}/vectordb_bench/backend/clients/aws_opensearch/aws_opensearch.py RENAMED Viewed

@@ -3,7 +3,7 @@ from contextlib import contextmanager
 import time
 from typing import Iterable, Type
 from ..api import VectorDB, DBCaseConfig, DBConfig, IndexType
-from .config import AWSOpenSearchConfig, AWSOpenSearchIndexConfig
+from .config import AWSOpenSearchConfig, AWSOpenSearchIndexConfig, AWSOS_Engine
 from opensearchpy import OpenSearch
 from opensearchpy.helpers import bulk
@@ -83,7 +83,7 @@ class AWSOpenSearch(VectorDB):
     @contextmanager
     def init(self) -> None:
-        """connect to elasticsearch"""
+        """connect to opensearch"""
         self.client = OpenSearch(**self.db_config)
         yield
@@ -97,7 +97,7 @@ class AWSOpenSearch(VectorDB):
         metadata: list[int],
         **kwargs,
     ) -> tuple[int, Exception]:
-        """Insert the embeddings to the elasticsearch."""
+        """Insert the embeddings to the opensearch."""
         assert self.client is not None, "should self.init() first"
         insert_data = []
@@ -136,13 +136,15 @@ class AWSOpenSearch(VectorDB):
         body = {
             "size": k,
             "query": {"knn": {self.vector_col_name: {"vector": query, "k": k}}},
+            **({"filter": {"range": {self.id_col_name: {"gt": filters["id"]}}}} if filters else {})
         }
         try:
-            resp = self.client.search(index=self.index_name, body=body)
+            resp = self.client.search(index=self.index_name, body=body,size=k,_source=False,docvalue_fields=[self.id_col_name],stored_fields="_none_",filter_path=[f"hits.hits.fields.{self.id_col_name}"],)
             log.info(f'Search took: {resp["took"]}')
             log.info(f'Search shards: {resp["_shards"]}')
             log.info(f'Search hits total: {resp["hits"]["total"]}')
-            result = [int(d["_id"]) for d in resp["hits"]["hits"]]
+            result = [h["fields"][self.id_col_name][0] for h in resp["hits"]["hits"]]
+            #result = [int(d["_id"]) for d in resp["hits"]["hits"]]
             # log.info(f'success! length={len(res)}')
             return result
@@ -152,7 +154,46 @@ class AWSOpenSearch(VectorDB):
     def optimize(self):
         """optimize will be called between insertion and search in performance cases."""
-        pass
+        # Call refresh first to ensure that all segments are created
+        self._refresh_index()
+        self._do_force_merge()
+        # Call refresh again to ensure that the index is ready after force merge.
+        self._refresh_index()
+        # ensure that all graphs are loaded in memory and ready for search
+        self._load_graphs_to_memory()
+    def _refresh_index(self):
+        log.debug(f"Starting refresh for index {self.index_name}")
+        SECONDS_WAITING_FOR_REFRESH_API_CALL_SEC = 30
+        while True:
+            try:
+                log.info(f"Starting the Refresh Index..")
+                self.client.indices.refresh(index=self.index_name)
+                break
+            except Exception as e:
+                log.info(
+                    f"Refresh errored out. Sleeping for {SECONDS_WAITING_FOR_REFRESH_API_CALL_SEC} sec and then Retrying : {e}")
+                time.sleep(SECONDS_WAITING_FOR_REFRESH_API_CALL_SEC)
+                continue
+        log.debug(f"Completed refresh for index {self.index_name}")
+    def _do_force_merge(self):
+        log.debug(f"Starting force merge for index {self.index_name}")
+        force_merge_endpoint = f'/{self.index_name}/_forcemerge?max_num_segments=1&wait_for_completion=false'
+        force_merge_task_id = self.client.transport.perform_request('POST', force_merge_endpoint)['task']
+        SECONDS_WAITING_FOR_FORCE_MERGE_API_CALL_SEC = 30
+        while True:
+            time.sleep(SECONDS_WAITING_FOR_FORCE_MERGE_API_CALL_SEC)
+            task_status = self.client.tasks.get(task_id=force_merge_task_id)
+            if task_status['completed']:
+                break
+        log.debug(f"Completed force merge for index {self.index_name}")
+    def _load_graphs_to_memory(self):
+        if self.case_config.engine != AWSOS_Engine.lucene:
+            log.info("Calling warmup API to load graphs into memory")
+            warmup_endpoint = f'/_plugins/_knn/warmup/{self.index_name}'
+            self.client.transport.perform_request('GET', warmup_endpoint)
     def ready_to_load(self):
         """ready_to_load will be called before load in load cases."""

{vectordb_bench-0.0.12 → vectordb_bench-0.0.14}/vectordb_bench/backend/clients/aws_opensearch/config.py RENAMED Viewed

@@ -1,9 +1,10 @@
+import logging
 from enum import Enum
 from pydantic import SecretStr, BaseModel
 from ..api import DBConfig, DBCaseConfig, MetricType, IndexType
+log = logging.getLogger(__name__)
 class AWSOpenSearchConfig(DBConfig, BaseModel):
     host: str = ""
     port: int = 443
@@ -31,14 +32,18 @@ class AWSOS_Engine(Enum):
 class AWSOpenSearchIndexConfig(BaseModel, DBCaseConfig):
     metric_type: MetricType = MetricType.L2
-    engine: AWSOS_Engine = AWSOS_Engine.nmslib
-    efConstruction: int = 360
-    M: int = 30
+    engine: AWSOS_Engine = AWSOS_Engine.faiss
+    efConstruction: int = 256
+    efSearch: int = 256
+    M: int = 16
     def parse_metric(self) -> str:
         if self.metric_type == MetricType.IP:
-            return "innerproduct"  # only support faiss / nmslib, not for Lucene.
+            return "innerproduct"
         elif self.metric_type == MetricType.COSINE:
+            if self.engine == AWSOS_Engine.faiss:
+                log.info(f"Using metric type as innerproduct because faiss doesn't support cosine as metric type for Opensearch")
+                return "innerproduct"
             return "cosinesimil"
         return "l2"
@@ -49,7 +54,8 @@ class AWSOpenSearchIndexConfig(BaseModel, DBCaseConfig):
             "engine": self.engine.value,
             "parameters": {
                 "ef_construction": self.efConstruction,
-                "m": self.M
+                "m": self.M,
+                "ef_search": self.efSearch
             }
         }
         return params

{vectordb_bench-0.0.12 → vectordb_bench-0.0.14}/vectordb_bench/backend/clients/aws_opensearch/run.py RENAMED Viewed

@@ -40,12 +40,12 @@ def create_index(client, index_name):
                 "type": "knn_vector",
                 "dimension": _DIM,
                 "method": {
-                    "engine": "nmslib",
+                    "engine": "faiss",
                     "name": "hnsw",
                     "space_type": "l2",
                     "parameters": {
-                        "ef_construction": 128,
-                        "m": 24,
+                        "ef_construction": 256,
+                        "m": 16,
                     }
                 }
             }
@@ -108,12 +108,43 @@ def search(client, index_name):
             print('\nSearch not ready, sleep 1s')
             time.sleep(1)
+def optimize_index(client, index_name):
+    print(f"Starting force merge for index {index_name}")
+    force_merge_endpoint = f'/{index_name}/_forcemerge?max_num_segments=1&wait_for_completion=false'
+    force_merge_task_id = client.transport.perform_request('POST', force_merge_endpoint)['task']
+    SECONDS_WAITING_FOR_FORCE_MERGE_API_CALL_SEC = 30
+    while True:
+        time.sleep(SECONDS_WAITING_FOR_FORCE_MERGE_API_CALL_SEC)
+        task_status = client.tasks.get(task_id=force_merge_task_id)
+        if task_status['completed']:
+            break
+    print(f"Completed force merge for index {index_name}")
+def refresh_index(client, index_name):
+    print(f"Starting refresh for index {index_name}")
+    SECONDS_WAITING_FOR_REFRESH_API_CALL_SEC = 30
+    while True:
+        try:
+            print(f"Starting the Refresh Index..")
+            client.indices.refresh(index=index_name)
+            break
+        except Exception as e:
+            print(
+                f"Refresh errored out. Sleeping for {SECONDS_WAITING_FOR_REFRESH_API_CALL_SEC} sec and then Retrying : {e}")
+            time.sleep(SECONDS_WAITING_FOR_REFRESH_API_CALL_SEC)
+            continue
+    print(f"Completed refresh for index {index_name}")
 def main():
     client = create_client()
     try:
         create_index(client, _INDEX_NAME)
         bulk_insert(client, _INDEX_NAME)
+        optimize_index(client, _INDEX_NAME)
+        refresh_index(client, _INDEX_NAME)
         search(client, _INDEX_NAME)
         delete_index(client, _INDEX_NAME)
     except Exception as e:

vectordb_bench-0.0.14/vectordb_bench/backend/clients/memorydb/cli.py ADDED Viewed

@@ -0,0 +1,88 @@
+from typing import Annotated, TypedDict, Unpack
+import click
+from pydantic import SecretStr
+from ....cli.cli import (
+    CommonTypedDict,
+    HNSWFlavor2,
+    cli,
+    click_parameter_decorators_from_typed_dict,
+    run,
+)
+from .. import DB
+class MemoryDBTypedDict(TypedDict):
+    host: Annotated[
+        str, click.option("--host", type=str, help="Db host", required=True)
+    ]
+    password: Annotated[str, click.option("--password", type=str, help="Db password")]
+    port: Annotated[int, click.option("--port", type=int, default=6379, help="Db Port")]
+    ssl: Annotated[
+        bool,
+        click.option(
+            "--ssl/--no-ssl",
+            is_flag=True,
+            show_default=True,
+            default=True,
+            help="Enable or disable SSL for MemoryDB",
+        ),
+    ]
+    ssl_ca_certs: Annotated[
+        str,
+        click.option(
+            "--ssl-ca-certs",
+            show_default=True,
+            help="Path to certificate authority file to use for SSL",
+        ),
+    ]
+    cmd: Annotated[
+        bool,
+        click.option(
+            "--cmd",
+            is_flag=True,
+            show_default=True,
+            default=False,
+            help="Cluster Mode Disabled (CMD), use this flag when testing locally on a single node instance. In production, MemoryDB only supports cluster mode (CME)",
+        ),
+    ]
+    insert_batch_size: Annotated[
+        int,
+        click.option(
+            "--insert-batch-size",
+            type=int,
+            default=10,
+            help="Batch size for inserting data. Adjust this as needed, but don't make it too big",
+        ),
+    ]
+class MemoryDBHNSWTypedDict(CommonTypedDict, MemoryDBTypedDict, HNSWFlavor2):
+    ...
+@cli.command()
+@click_parameter_decorators_from_typed_dict(MemoryDBHNSWTypedDict)
+def MemoryDB(**parameters: Unpack[MemoryDBHNSWTypedDict]):
+    from .config import MemoryDBConfig, MemoryDBHNSWConfig
+    run(
+        db=DB.MemoryDB,
+        db_config=MemoryDBConfig(
+            db_label=parameters["db_label"],
+            password=SecretStr(parameters["password"]) if parameters["password"] else None,
+            host=SecretStr(parameters["host"]),
+            port=parameters["port"],
+            ssl=parameters["ssl"],
+            ssl_ca_certs=parameters["ssl_ca_certs"],
+            cmd=parameters["cmd"],
+        ),
+        db_case_config=MemoryDBHNSWConfig(
+            M=parameters["m"],
+            ef_construction=parameters["ef_construction"],
+            ef_runtime=parameters["ef_runtime"],
+            insert_batch_size=parameters["insert_batch_size"]
+        ),
+        **parameters,
+    )

vectordb_bench-0.0.14/vectordb_bench/backend/clients/memorydb/config.py ADDED Viewed

@@ -0,0 +1,54 @@
+from pydantic import BaseModel, SecretStr
+from ..api import DBCaseConfig, DBConfig, IndexType, MetricType
+class MemoryDBConfig(DBConfig):
+    host: SecretStr
+    password: SecretStr | None = None
+    port: int | None = None
+    ssl: bool | None = None
+    cmd: bool | None = None
+    ssl_ca_certs: str | None = None
+    def to_dict(self) -> dict:
+        return {
+            "host": self.host.get_secret_value(),
+            "port": self.port,
+            "password": self.password.get_secret_value() if self.password else None,
+            "ssl": self.ssl,
+            "cmd": self.cmd,
+            "ssl_ca_certs": self.ssl_ca_certs,
+        }
+class MemoryDBIndexConfig(BaseModel, DBCaseConfig):
+    metric_type: MetricType | None = None
+    insert_batch_size: int | None = None
+    def parse_metric(self) -> str:
+        if self.metric_type == MetricType.L2:
+            return "l2"
+        elif self.metric_type == MetricType.IP:
+            return "ip"
+        return "cosine"
+class MemoryDBHNSWConfig(MemoryDBIndexConfig):
+    M: int | None = 16
+    ef_construction: int | None = 64
+    ef_runtime: int | None = 10
+    index: IndexType = IndexType.HNSW
+    def index_param(self) -> dict:
+        return {
+            "metric": self.parse_metric(),
+            "index_type": self.index.value,
+            "m": self.M,
+            "ef_construction": self.ef_construction,
+        }
+    def search_param(self) -> dict:
+        return {
+            "ef_runtime": self.ef_runtime,
+        }

vectordb-bench 0.0.12__tar.gz → 0.0.14__tar.gz

vectordb-bench 0.0.12tar.gz → 0.0.14tar.gz