PyPI - vectordb-bench - Versions diffs - 0.0.14__py3-none-any.whl → 0.0.16__py3-none-any.whl - Mend

vectordb-bench 0.0.14py3-none-any.whl → 0.0.16py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (23) hide show

vectordb_bench/backend/clients/pgvector/cli.py CHANGED Viewed

@@ -4,6 +4,8 @@ import click
 import os
 from pydantic import SecretStr
+from vectordb_bench.backend.clients.api import MetricType
 from ....cli.cli import (
     CommonTypedDict,
     HNSWFlavor1,
@@ -16,6 +18,14 @@ from ....cli.cli import (
 from vectordb_bench.backend.clients import DB
+def set_default_quantized_fetch_limit(ctx, param, value):
+    if ctx.params.get("reranking") and value is None:
+        # ef_search is the default value for quantized_fetch_limit as it's bound by ef_search.
+        # 100 is default value for quantized_fetch_limit for IVFFlat.
+        default_value = ctx.params["ef_search"] if ctx.command.name == "pgvectorhnsw" else 100
+        return default_value
+    return value
 class PgVectorTypedDict(CommonTypedDict):
     user_name: Annotated[
         str, click.option("--user-name", type=str, help="Db username", required=True)
@@ -61,11 +71,45 @@ class PgVectorTypedDict(CommonTypedDict):
         Optional[str],
         click.option(
             "--quantization-type",
-            type=click.Choice(["none", "halfvec"]),
+            type=click.Choice(["none", "bit", "halfvec"]),
             help="quantization type for vectors",
             required=False,
         ),
     ]
+    reranking: Annotated[
+        Optional[bool],
+        click.option(
+            "--reranking/--skip-reranking",
+            type=bool,
+            help="Enable reranking for HNSW search for binary quantization",
+            default=False,
+        ),
+    ]
+    reranking_metric: Annotated[
+        Optional[str],
+        click.option(
+            "--reranking-metric",
+            type=click.Choice(
+                [metric.value for metric in MetricType if metric.value not in ["HAMMING", "JACCARD"]]
+            ),
+            help="Distance metric for reranking",
+            default="COSINE",
+            show_default=True,
+        ),
+    ]
+    quantized_fetch_limit: Annotated[
+        Optional[int],
+        click.option(
+            "--quantized-fetch-limit",
+            type=int,
+            help="Limit of fetching quantized vector ranked by distance for reranking \
+                -- bound by ef_search",
+            required=False,
+            callback=set_default_quantized_fetch_limit,
+        )
+    ]
 class PgVectorIVFFlatTypedDict(PgVectorTypedDict, IVFFlatTypedDict):
     ...
@@ -93,6 +137,9 @@ def PgVectorIVFFlat(
             lists=parameters["lists"],
             probes=parameters["probes"],
             quantization_type=parameters["quantization_type"],
+            reranking=parameters["reranking"],
+            reranking_metric=parameters["reranking_metric"],
+            quantized_fetch_limit=parameters["quantized_fetch_limit"],
         ),
         **parameters,
     )
@@ -126,6 +173,9 @@ def PgVectorHNSW(
             maintenance_work_mem=parameters["maintenance_work_mem"],
             max_parallel_workers=parameters["max_parallel_workers"],
             quantization_type=parameters["quantization_type"],
+            reranking=parameters["reranking"],
+            reranking_metric=parameters["reranking_metric"],
+            quantized_fetch_limit=parameters["quantized_fetch_limit"],
         ),
         **parameters,
     )

vectordb_bench/backend/clients/pgvector/config.py CHANGED Viewed

@@ -65,6 +65,10 @@ class PgVectorIndexConfig(BaseModel, DBCaseConfig):
             elif self.metric_type == MetricType.IP:
                 return "halfvec_ip_ops"
             return "halfvec_cosine_ops"
+        elif self.quantization_type == "bit":
+            if self.metric_type == MetricType.JACCARD:
+                return "bit_jaccard_ops"
+            return "bit_hamming_ops"
         else:
             if self.metric_type == MetricType.L2:
                 return "vector_l2_ops"
@@ -73,11 +77,16 @@ class PgVectorIndexConfig(BaseModel, DBCaseConfig):
             return "vector_cosine_ops"
     def parse_metric_fun_op(self) -> LiteralString:
-        if self.metric_type == MetricType.L2:
-            return "<->"
-        elif self.metric_type == MetricType.IP:
-            return "<#>"
-        return "<=>"
+        if self.quantization_type == "bit":
+            if self.metric_type == MetricType.JACCARD:
+                return "<%>"
+            return "<~>"
+        else:
+            if self.metric_type == MetricType.L2:
+                return "<->"
+            elif self.metric_type == MetricType.IP:
+                return "<#>"
+            return "<=>"
     def parse_metric_fun_str(self) -> str:
         if self.metric_type == MetricType.L2:
@@ -85,6 +94,14 @@ class PgVectorIndexConfig(BaseModel, DBCaseConfig):
         elif self.metric_type == MetricType.IP:
             return "max_inner_product"
         return "cosine_distance"
+    def parse_reranking_metric_fun_op(self) -> LiteralString:
+        if self.reranking_metric == MetricType.L2:
+            return "<->"
+        elif self.reranking_metric == MetricType.IP:
+            return "<#>"
+        return "<=>"
     @abstractmethod
     def index_param(self) -> PgVectorIndexParam:
@@ -151,6 +168,9 @@ class PgVectorIVFFlatConfig(PgVectorIndexConfig):
     maintenance_work_mem: Optional[str] = None
     max_parallel_workers: Optional[int] = None
     quantization_type: Optional[str] = None
+    reranking: Optional[bool] = None
+    quantized_fetch_limit: Optional[int] = None
+    reranking_metric: Optional[str] = None
     def index_param(self) -> PgVectorIndexParam:
         index_parameters = {"lists": self.lists}
@@ -170,6 +190,9 @@ class PgVectorIVFFlatConfig(PgVectorIndexConfig):
     def search_param(self) -> PgVectorSearchParam:
         return {
             "metric_fun_op": self.parse_metric_fun_op(),
+            "reranking": self.reranking,
+            "reranking_metric_fun_op": self.parse_reranking_metric_fun_op(),
+            "quantized_fetch_limit": self.quantized_fetch_limit,
         }
     def session_param(self) -> PgVectorSessionCommands:
@@ -195,6 +218,9 @@ class PgVectorHNSWConfig(PgVectorIndexConfig):
     maintenance_work_mem: Optional[str] = None
     max_parallel_workers: Optional[int] = None
     quantization_type: Optional[str] = None
+    reranking: Optional[bool] = None
+    quantized_fetch_limit: Optional[int] = None
+    reranking_metric: Optional[str] = None
     def index_param(self) -> PgVectorIndexParam:
         index_parameters = {"m": self.m, "ef_construction": self.ef_construction}
@@ -214,6 +240,9 @@ class PgVectorHNSWConfig(PgVectorIndexConfig):
     def search_param(self) -> PgVectorSearchParam:
         return {
             "metric_fun_op": self.parse_metric_fun_op(),
+            "reranking": self.reranking,
+            "reranking_metric_fun_op": self.parse_reranking_metric_fun_op(),
+            "quantized_fetch_limit": self.quantized_fetch_limit,
         }
     def session_param(self) -> PgVectorSessionCommands:

vectordb_bench/backend/clients/pgvector/pgvector.py CHANGED Viewed

@@ -11,7 +11,7 @@ from pgvector.psycopg import register_vector
 from psycopg import Connection, Cursor, sql
 from ..api import VectorDB
-from .config import PgVectorConfigDict, PgVectorIndexConfig
+from .config import PgVectorConfigDict, PgVectorIndexConfig, PgVectorHNSWConfig
 log = logging.getLogger(__name__)
@@ -87,6 +87,92 @@ class PgVector(VectorDB):
         assert cursor is not None, "Cursor is not initialized"
         return conn, cursor
+    def _generate_search_query(self, filtered: bool=False) -> sql.Composed:
+        index_param = self.case_config.index_param()
+        reranking = self.case_config.search_param()["reranking"]
+        column_name = (
+            sql.SQL("binary_quantize({0})").format(sql.Identifier("embedding"))
+            if index_param["quantization_type"] == "bit"
+            else sql.SQL("embedding")
+        )
+        search_vector = (
+            sql.SQL("binary_quantize({0})").format(sql.Placeholder())
+            if index_param["quantization_type"] == "bit"
+            else sql.Placeholder()
+        )
+        # The following sections assume that the quantization_type value matches the quantization function name
+        if index_param["quantization_type"] != None:
+            if index_param["quantization_type"] == "bit" and reranking:
+                # Embeddings needs to be passed to binary_quantize function if quantization_type is bit
+                search_query = sql.Composed(
+                    [
+                        sql.SQL(
+                            """
+                            SELECT i.id
+                            FROM (
+                                SELECT id, embedding {reranking_metric_fun_op} %s::vector AS distance
+                                FROM public.{table_name} {where_clause}
+                                ORDER BY {column_name}::{quantization_type}({dim})
+                            """
+                        ).format(
+                            table_name=sql.Identifier(self.table_name),
+                            column_name=column_name,
+                            reranking_metric_fun_op=sql.SQL(self.case_config.search_param()["reranking_metric_fun_op"]),
+                            quantization_type=sql.SQL(index_param["quantization_type"]),
+                            dim=sql.Literal(self.dim),
+                            where_clause=sql.SQL("WHERE id >= %s") if filtered else sql.SQL(""),
+                        ),
+                        sql.SQL(self.case_config.search_param()["metric_fun_op"]),
+                        sql.SQL(
+                            """
+                                {search_vector}
+                                LIMIT {quantized_fetch_limit}
+                            ) i
+                            ORDER BY i.distance
+                            LIMIT %s::int
+                            """
+                        ).format(
+                            search_vector=search_vector,
+                            quantized_fetch_limit=sql.Literal(
+                                self.case_config.search_param()["quantized_fetch_limit"]
+                            ),
+                        ),
+                    ]
+                )
+            else:
+                search_query = sql.Composed(
+                    [
+                        sql.SQL(
+                            "SELECT id FROM public.{table_name} {where_clause} ORDER BY {column_name}::{quantization_type}({dim}) "
+                        ).format(
+                            table_name=sql.Identifier(self.table_name),
+                            column_name=column_name,
+                            quantization_type=sql.SQL(index_param["quantization_type"]),
+                            dim=sql.Literal(self.dim),
+                            where_clause=sql.SQL("WHERE id >= %s") if filtered else sql.SQL(""),
+                        ),
+                        sql.SQL(self.case_config.search_param()["metric_fun_op"]),
+                        sql.SQL(" {search_vector} LIMIT %s::int").format(search_vector=search_vector),
+                    ]
+                )
+        else:
+            search_query = sql.Composed(
+                [
+                    sql.SQL(
+                        "SELECT id FROM public.{table_name} {where_clause} ORDER BY embedding "
+                    ).format(
+                        table_name=sql.Identifier(self.table_name),
+                        where_clause=sql.SQL("WHERE id >= %s") if filtered else sql.SQL(""),
+                    ),
+                    sql.SQL(self.case_config.search_param()["metric_fun_op"]),
+                    sql.SQL(" %s::vector LIMIT %s::int"),
+                ]
+            )
+        return search_query
     @contextmanager
     def init(self) -> Generator[None, None, None]:
@@ -112,63 +198,8 @@ class PgVector(VectorDB):
                 self.cursor.execute(command)
             self.conn.commit()
-        index_param = self.case_config.index_param()
-        # The following sections assume that the quantization_type value matches the quantization function name
-        if index_param["quantization_type"] != None:
-            self._filtered_search = sql.Composed(
-                [
-                    sql.SQL(
-                        "SELECT id FROM public.{table_name} WHERE id >= %s ORDER BY embedding::{quantization_type}({dim}) "
-                    ).format(
-                        table_name=sql.Identifier(self.table_name),
-                        quantization_type=sql.SQL(index_param["quantization_type"]),
-                        dim=sql.Literal(self.dim),
-                    ),
-                    sql.SQL(self.case_config.search_param()["metric_fun_op"]),
-                    sql.SQL(" %s::{quantization_type}({dim}) LIMIT %s::int").format(
-                        quantization_type=sql.SQL(index_param["quantization_type"]),
-                        dim=sql.Literal(self.dim),
-                    ),
-                ]
-            )
-        else:
-            self._filtered_search = sql.Composed(
-                [
-                    sql.SQL(
-                        "SELECT id FROM public.{table_name} WHERE id >= %s ORDER BY embedding "
-                        ).format(table_name=sql.Identifier(self.table_name)),
-                    sql.SQL(self.case_config.search_param()["metric_fun_op"]),
-                    sql.SQL(" %s::vector LIMIT %s::int"),
-                ]
-            )
-        if index_param["quantization_type"] != None:
-            self._unfiltered_search = sql.Composed(
-                [
-                    sql.SQL(
-                        "SELECT id FROM public.{table_name} ORDER BY embedding::{quantization_type}({dim}) "
-                    ).format(
-                        table_name=sql.Identifier(self.table_name),
-                        quantization_type=sql.SQL(index_param["quantization_type"]),
-                        dim=sql.Literal(self.dim),
-                    ),
-                    sql.SQL(self.case_config.search_param()["metric_fun_op"]),
-                    sql.SQL(" %s::{quantization_type}({dim}) LIMIT %s::int").format(
-                        quantization_type=sql.SQL(index_param["quantization_type"]),
-                        dim=sql.Literal(self.dim),
-                    ),
-                ]
-            )
-        else:
-            self._unfiltered_search = sql.Composed(
-                [
-                    sql.SQL("SELECT id FROM public.{} ORDER BY embedding ").format(
-                        sql.Identifier(self.table_name)
-                    ),
-                    sql.SQL(self.case_config.search_param()["metric_fun_op"]),
-                    sql.SQL(" %s::vector LIMIT %s::int"),
-                ]
-            )
+        self._filtered_search = self._generate_search_query(filtered=True)
+        self._unfiltered_search = self._generate_search_query()
         try:
             yield
@@ -306,12 +337,17 @@ class PgVector(VectorDB):
         if index_param["quantization_type"] != None:
             index_create_sql = sql.SQL(
                 """
-                CREATE INDEX IF NOT EXISTS {index_name} ON public.{table_name}
-                USING {index_type} ((embedding::{quantization_type}({dim})) {embedding_metric})
+                CREATE INDEX IF NOT EXISTS {index_name} ON public.{table_name}
+                USING {index_type} (({column_name}::{quantization_type}({dim})) {embedding_metric})
                 """
             ).format(
                 index_name=sql.Identifier(self._index_name),
                 table_name=sql.Identifier(self.table_name),
+                column_name=(
+                    sql.SQL("binary_quantize({0})").format(sql.Identifier("embedding"))
+                    if index_param["quantization_type"] == "bit"
+                    else sql.Identifier("embedding")
+                ),
                 index_type=sql.Identifier(index_param["index_type"]),
                 # This assumes that the quantization_type value matches the quantization function name
                 quantization_type=sql.SQL(index_param["quantization_type"]),
@@ -406,15 +442,28 @@ class PgVector(VectorDB):
         assert self.conn is not None, "Connection is not initialized"
         assert self.cursor is not None, "Cursor is not initialized"
+        index_param = self.case_config.index_param()
+        search_param = self.case_config.search_param()
         q = np.asarray(query)
         if filters:
             gt = filters.get("id")
-            result = self.cursor.execute(
+            if index_param["quantization_type"] == "bit" and search_param["reranking"]:
+                result = self.cursor.execute(
+                    self._filtered_search, (q, gt, q, k), prepare=True, binary=True
+                )
+            else:
+                result = self.cursor.execute(
                     self._filtered_search, (gt, q, k), prepare=True, binary=True
-                    )
+                )
         else:
-            result = self.cursor.execute(
+            if index_param["quantization_type"] == "bit" and search_param["reranking"]:
+                result = self.cursor.execute(
+                    self._unfiltered_search, (q, q, k), prepare=True, binary=True
+                )
+            else:
+                result = self.cursor.execute(
                     self._unfiltered_search, (q, k), prepare=True, binary=True
-                    )
+                )
         return [int(i[0]) for i in result.fetchall()]

vectordb_bench/backend/clients/weaviate_cloud/weaviate_cloud.py CHANGED Viewed

@@ -23,7 +23,7 @@ class WeaviateCloud(VectorDB):
         **kwargs,
     ):
         """Initialize wrapper around the weaviate vector database."""
-        db_config.update("auth_client_secret", weaviate.AuthApiKey(api_key=db_config.get("auth_client_secret")))
+        db_config.update({"auth_client_secret": weaviate.AuthApiKey(api_key=db_config.get("auth_client_secret"))})
         self.db_config = db_config
         self.case_config = db_case_config
         self.collection_name = collection_name

vectordb_bench/cli/cli.py CHANGED Viewed

@@ -414,7 +414,7 @@ class HNSWBaseRequiredTypedDict(TypedDict):
 class HNSWFlavor1(HNSWBaseTypedDict):
     ef_search: Annotated[
-        Optional[int], click.option("--ef-search", type=int, help="hnsw ef-search")
+        Optional[int], click.option("--ef-search", type=int, help="hnsw ef-search", is_eager=True)
     ]
@@ -479,7 +479,7 @@ def run(
                 concurrency_duration=parameters["concurrency_duration"],
                 num_concurrency=[int(s) for s in parameters["num_concurrency"]],
             ),
-            custom_case=parameters.get("custom_case", {}),
+            custom_case=get_custom_case_config(parameters),
         ),
         stages=parse_task_stages(
             (

vectordb_bench/cli/vectordbbench.py CHANGED Viewed

@@ -1,6 +1,7 @@
 from ..backend.clients.pgvector.cli import PgVectorHNSW
 from ..backend.clients.pgvecto_rs.cli import PgVectoRSHNSW, PgVectoRSIVFFlat
 from ..backend.clients.pgvectorscale.cli import PgVectorScaleDiskAnn
+from ..backend.clients.pgdiskann.cli import PgDiskAnn
 from ..backend.clients.redis.cli import Redis
 from ..backend.clients.memorydb.cli import MemoryDB
 from ..backend.clients.test.cli import Test
@@ -22,6 +23,7 @@ cli.add_command(ZillizAutoIndex)
 cli.add_command(MilvusAutoIndex)
 cli.add_command(AWSOpenSearch)
 cli.add_command(PgVectorScaleDiskAnn)
+cli.add_command(PgDiskAnn)
 if __name__ == "__main__":

vectordb_bench/frontend/components/run_test/caseSelector.py CHANGED Viewed

@@ -110,6 +110,12 @@ def caseConfigSetting(st, dbToCaseClusterConfigs, uiCaseItem: UICaseItem, active
                         value=config.inputConfig["value"],
                         help=config.inputHelp,
                     )
+                elif config.inputType == InputType.Bool:
+                    caseConfig[config.label] = column.checkbox(
+                        config.displayLabel if config.displayLabel else config.label.value,
+                        value=config.inputConfig["value"],
+                        help=config.inputHelp,
+                    )
                 k += 1
         if k == 0:
             columns[1].write("Auto")

vectordb_bench/frontend/config/dbCaseConfigs.py CHANGED Viewed

@@ -3,7 +3,7 @@ import typing
 from pydantic import BaseModel
 from vectordb_bench.backend.cases import CaseLabel, CaseType
 from vectordb_bench.backend.clients import DB
-from vectordb_bench.backend.clients.api import IndexType
+from vectordb_bench.backend.clients.api import IndexType, MetricType
 from vectordb_bench.frontend.components.custom.getCustomConfig import get_custom_configs
 from vectordb_bench.models import CaseConfig, CaseConfigParamType
@@ -149,6 +149,7 @@ class InputType(IntEnum):
     Number = 20002
     Option = 20003
     Float = 20004
+    Bool = 20005
 class CaseConfigInput(BaseModel):
@@ -180,6 +181,16 @@ CaseConfigParamInput_IndexType = CaseConfigInput(
     },
 )
+CaseConfigParamInput_IndexType_PgDiskANN = CaseConfigInput(
+    label=CaseConfigParamType.IndexType,
+    inputHelp="Select Index Type",
+    inputType=InputType.Option,
+    inputConfig={
+        "options": [
+            IndexType.DISKANN.value,
+        ],
+    },
+)
 CaseConfigParamInput_IndexType_PgVectorScale = CaseConfigInput(
     label=CaseConfigParamType.IndexType,
@@ -205,6 +216,42 @@ CaseConfigParamInput_storage_layout = CaseConfigInput(
     },
 )
+CaseConfigParamInput_max_neighbors = CaseConfigInput(
+    label=CaseConfigParamType.max_neighbors,
+    inputType=InputType.Number,
+    inputConfig={
+        "min": 10,
+        "max": 300,
+        "value": 32,
+    },
+    isDisplayed=lambda config: config.get(CaseConfigParamType.IndexType, None)
+    == IndexType.DISKANN.value,
+)
+CaseConfigParamInput_l_value_ib = CaseConfigInput(
+    label=CaseConfigParamType.l_value_ib,
+    inputType=InputType.Number,
+    inputConfig={
+        "min": 10,
+        "max": 300,
+        "value": 50,
+    },
+    isDisplayed=lambda config: config.get(CaseConfigParamType.IndexType, None)
+    == IndexType.DISKANN.value,
+)
+CaseConfigParamInput_l_value_is = CaseConfigInput(
+    label=CaseConfigParamType.l_value_is,
+    inputType=InputType.Number,
+    inputConfig={
+        "min": 10,
+        "max": 300,
+        "value": 40,
+    },
+    isDisplayed=lambda config: config.get(CaseConfigParamType.IndexType, None)
+    == IndexType.DISKANN.value,
+)
 CaseConfigParamInput_num_neighbors = CaseConfigInput(
     label=CaseConfigParamType.num_neighbors,
     inputType=InputType.Number,
@@ -773,7 +820,7 @@ CaseConfigParamInput_QuantizationType_PgVector = CaseConfigInput(
     label=CaseConfigParamType.quantizationType,
     inputType=InputType.Option,
     inputConfig={
-        "options": ["none", "halfvec"],
+        "options": ["none", "bit", "halfvec"],
     },
     isDisplayed=lambda config: config.get(CaseConfigParamType.IndexType, None)
     in [
@@ -819,6 +866,46 @@ CaseConfigParamInput_ZillizLevel = CaseConfigInput(
     },
 )
+CaseConfigParamInput_reranking_PgVector = CaseConfigInput(
+    label=CaseConfigParamType.reranking,
+    inputType=InputType.Bool,
+    displayLabel="Enable Reranking",
+    inputHelp="Enable if you want to use reranking while performing \
+        similarity search in binary quantization",
+    inputConfig={
+        "value": False,
+    },
+    isDisplayed=lambda config: config.get(CaseConfigParamType.quantizationType, None)
+    == "bit"
+)
+CaseConfigParamInput_quantized_fetch_limit_PgVector = CaseConfigInput(
+    label=CaseConfigParamType.quantizedFetchLimit,
+    displayLabel="Quantized vector fetch limit",
+    inputHelp="Limit top-k vectors using the quantized vector comparison --bound by ef_search",
+    inputType=InputType.Number,
+    inputConfig={
+        "min": 20,
+        "max": 1000,
+        "value": 200,
+    },
+    isDisplayed=lambda config: config.get(CaseConfigParamType.quantizationType, None)
+    == "bit" and config.get(CaseConfigParamType.reranking, False)
+)
+CaseConfigParamInput_reranking_metric_PgVector = CaseConfigInput(
+    label=CaseConfigParamType.rerankingMetric,
+    inputType=InputType.Option,
+    inputConfig={
+        "options": [
+            metric.value for metric in MetricType if metric.value not in ["HAMMING", "JACCARD"]
+        ],
+    },
+    isDisplayed=lambda config: config.get(CaseConfigParamType.quantizationType, None)
+    == "bit" and config.get(CaseConfigParamType.reranking, False)
+)
 MilvusLoadConfig = [
     CaseConfigParamInput_IndexType,
     CaseConfigParamInput_M,
@@ -896,6 +983,9 @@ PgVectorPerformanceConfig = [
     CaseConfigParamInput_QuantizationType_PgVector,
     CaseConfigParamInput_maintenance_work_mem_PgVector,
     CaseConfigParamInput_max_parallel_workers_PgVector,
+    CaseConfigParamInput_reranking_PgVector,
+    CaseConfigParamInput_reranking_metric_PgVector,
+    CaseConfigParamInput_quantized_fetch_limit_PgVector,
 ]
 PgVectoRSLoadingConfig = [
@@ -942,6 +1032,19 @@ PgVectorScalePerformanceConfig = [
     CaseConfigParamInput_query_search_list_size,
 ]
+PgDiskANNLoadConfig = [
+    CaseConfigParamInput_IndexType_PgDiskANN,
+    CaseConfigParamInput_max_neighbors,
+    CaseConfigParamInput_l_value_ib,
+]
+PgDiskANNPerformanceConfig = [
+    CaseConfigParamInput_IndexType_PgDiskANN,
+    CaseConfigParamInput_max_neighbors,
+    CaseConfigParamInput_l_value_ib,
+    CaseConfigParamInput_l_value_is,
+]
 CASE_CONFIG_MAP = {
     DB.Milvus: {
         CaseLabel.Load: MilvusLoadConfig,
@@ -974,4 +1077,8 @@ CASE_CONFIG_MAP = {
         CaseLabel.Load: PgVectorScaleLoadingConfig,
         CaseLabel.Performance: PgVectorScalePerformanceConfig,
     },
+    DB.PgDiskANN: {
+        CaseLabel.Load: PgDiskANNLoadConfig,
+        CaseLabel.Performance: PgDiskANNPerformanceConfig,
+    },
 }

vectordb_bench/models.py CHANGED Viewed

@@ -47,6 +47,9 @@ class CaseConfigParamType(Enum):
     probes = "probes"
     quantizationType = "quantization_type"
     quantizationRatio = "quantization_ratio"
+    reranking = "reranking"
+    rerankingMetric = "reranking_metric"
+    quantizedFetchLimit = "quantized_fetch_limit"
     m = "m"
     nbits = "nbits"
     intermediate_graph_degree = "intermediate_graph_degree"
@@ -64,6 +67,9 @@ class CaseConfigParamType(Enum):
     max_parallel_workers = "max_parallel_workers"
     storage_layout = "storage_layout"
     num_neighbors = "num_neighbors"
+    max_neighbors = "max_neighbors"
+    l_value_ib = "l_value_ib"
+    l_value_is = "l_value_is"
     search_list_size = "search_list_size"
     max_alpha = "max_alpha"
     num_dimensions = "num_dimensions"

vectordb_bench/results/getLeaderboardData.py CHANGED Viewed

@@ -2,6 +2,7 @@ from vectordb_bench import config
 import ujson
 import pathlib
 from vectordb_bench.backend.cases import CaseType
+from vectordb_bench.backend.clients import DB
 from vectordb_bench.frontend.config.dbPrices import DB_DBLABEL_TO_PRICE
 from vectordb_bench.interface import benchMarkRunner
 from vectordb_bench.models import ResultLabel, TestResult
@@ -45,6 +46,8 @@ def main():
             for d in test_result.results
             if d.task_config.case_config.case_id != CaseType.CapacityDim128
             and d.task_config.case_config.case_id != CaseType.CapacityDim960
+            if d.task_config.db != DB.ZillizCloud
+            or test_result.timestamp >= datetime(2024, 1, 1).timestamp()
         ]
         # compute qp$

vectordb-bench 0.0.14__py3-none-any.whl → 0.0.16__py3-none-any.whl

vectordb-bench 0.0.14py3-none-any.whl → 0.0.16py3-none-any.whl