PyPI - vectordb-bench - Versions diffs - 0.0.12__py3-none-any.whl → 0.0.13__py3-none-any.whl - Mend

vectordb-bench 0.0.12py3-none-any.whl → 0.0.13py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (23) hide show

vectordb_bench/backend/clients/pgvectorscale/pgvectorscale.py ADDED Viewed

@@ -0,0 +1,272 @@
+"""Wrapper around the Pgvectorscale vector database over VectorDB"""
+import logging
+import pprint
+from contextlib import contextmanager
+from typing import Any, Generator, Optional, Tuple
+import numpy as np
+import psycopg
+from pgvector.psycopg import register_vector
+from psycopg import Connection, Cursor, sql
+from ..api import VectorDB
+from .config import PgVectorScaleConfigDict, PgVectorScaleIndexConfig
+log = logging.getLogger(__name__)
+class PgVectorScale(VectorDB):
+    """Use psycopg instructions"""
+    conn: psycopg.Connection[Any] | None = None
+    coursor: psycopg.Cursor[Any] | None = None
+    def __init__(
+        self,
+        dim: int,
+        db_config: PgVectorScaleConfigDict,
+        db_case_config: PgVectorScaleIndexConfig,
+        collection_name: str = "pg_vectorscale_collection",
+        drop_old: bool = False,
+        **kwargs,
+    ):
+        self.name = "PgVectorScale"
+        self.db_config = db_config
+        self.case_config = db_case_config
+        self.table_name = collection_name
+        self.dim = dim
+        self._index_name = "pgvectorscale_index"
+        self._primary_field = "id"
+        self._vector_field = "embedding"
+        self.conn, self.cursor = self._create_connection(**self.db_config)
+        log.info(f"{self.name} config values: {self.db_config}\n{self.case_config}")
+        if not any(
+            (
+                self.case_config.create_index_before_load,
+                self.case_config.create_index_after_load,
+            )
+        ):
+            err = f"{self.name} config must create an index using create_index_before_load or create_index_after_load"
+            log.error(err)
+            raise RuntimeError(
+                f"{err}\n{pprint.pformat(self.db_config)}\n{pprint.pformat(self.case_config)}"
+            )
+        if drop_old:
+            self._drop_index()
+            self._drop_table()
+            self._create_table(dim)
+            if self.case_config.create_index_before_load:
+                self._create_index()
+        self.cursor.close()
+        self.conn.close()
+        self.cursor = None
+        self.conn = None
+    @staticmethod
+    def _create_connection(**kwargs) -> Tuple[Connection, Cursor]:
+        conn = psycopg.connect(**kwargs)
+        conn.cursor().execute("CREATE EXTENSION IF NOT EXISTS vectorscale CASCADE")
+        conn.commit()
+        register_vector(conn)
+        conn.autocommit = False
+        cursor = conn.cursor()
+        assert conn is not None, "Connection is not initialized"
+        assert cursor is not None, "Cursor is not initialized"
+        return conn, cursor
+    @contextmanager
+    def init(self) -> Generator[None, None, None]:
+        self.conn, self.cursor = self._create_connection(**self.db_config)
+        # index configuration may have commands defined that we should set during each client session
+        session_options: dict[str, Any] = self.case_config.session_param()
+        if len(session_options) > 0:
+            for setting_name, setting_val in session_options.items():
+                command = sql.SQL("SET {setting_name} " + "= {setting_val};").format(
+                    setting_name=sql.Identifier(setting_name),
+                    setting_val=sql.Identifier(str(setting_val)),
+                )
+                log.debug(command.as_string(self.cursor))
+                self.cursor.execute(command)
+            self.conn.commit()
+        self._unfiltered_search = sql.Composed(
+            [
+                sql.SQL("SELECT id FROM public.{} ORDER BY embedding ").format(
+                    sql.Identifier(self.table_name)
+                ),
+                sql.SQL(self.case_config.search_param()["metric_fun_op"]),
+                sql.SQL(" %s::vector LIMIT %s::int"),
+            ]
+        )
+        try:
+            yield
+        finally:
+            self.cursor.close()
+            self.conn.close()
+            self.cursor = None
+            self.conn = None
+    def _drop_table(self):
+        assert self.conn is not None, "Connection is not initialized"
+        assert self.cursor is not None, "Cursor is not initialized"
+        log.info(f"{self.name} client drop table : {self.table_name}")
+        self.cursor.execute(
+            sql.SQL("DROP TABLE IF EXISTS public.{table_name}").format(
+                table_name=sql.Identifier(self.table_name)
+            )
+        )
+        self.conn.commit()
+    def ready_to_load(self):
+        pass
+    def optimize(self):
+        self._post_insert()
+    def _post_insert(self):
+        log.info(f"{self.name} post insert before optimize")
+        if self.case_config.create_index_after_load:
+            self._drop_index()
+            self._create_index()
+    def _drop_index(self):
+        assert self.conn is not None, "Connection is not initialized"
+        assert self.cursor is not None, "Cursor is not initialized"
+        log.info(f"{self.name} client drop index : {self._index_name}")
+        drop_index_sql = sql.SQL("DROP INDEX IF EXISTS {index_name}").format(
+            index_name=sql.Identifier(self._index_name)
+        )
+        log.debug(drop_index_sql.as_string(self.cursor))
+        self.cursor.execute(drop_index_sql)
+        self.conn.commit()
+    def _create_index(self):
+        assert self.conn is not None, "Connection is not initialized"
+        assert self.cursor is not None, "Cursor is not initialized"
+        log.info(f"{self.name} client create index : {self._index_name}")
+        index_param: dict[str, Any] = self.case_config.index_param()
+        options = []
+        for option_name, option_val in index_param["options"].items():
+            if option_val is not None:
+                options.append(
+                    sql.SQL("{option_name} = {val}").format(
+                        option_name=sql.Identifier(option_name),
+                        val=sql.Identifier(str(option_val)),
+                    )
+                )
+        num_bits_per_dimension = "2" if self.dim < 900 else "1"
+        options.append(
+            sql.SQL("{option_name} = {val}").format(
+                option_name=sql.Identifier("num_bits_per_dimension"),
+                val=sql.Identifier(num_bits_per_dimension),
+            )
+        )
+        if any(options):
+            with_clause = sql.SQL("WITH ({});").format(sql.SQL(", ").join(options))
+        else:
+            with_clause = sql.Composed(())
+        index_create_sql = sql.SQL(
+            """
+            CREATE INDEX IF NOT EXISTS {index_name} ON public.{table_name}
+            USING {index_type} (embedding {embedding_metric})
+            """
+        ).format(
+            index_name=sql.Identifier(self._index_name),
+            table_name=sql.Identifier(self.table_name),
+            index_type=sql.Identifier(index_param["index_type"].lower()),
+            embedding_metric=sql.Identifier(index_param["metric"]),
+        )
+        index_create_sql_with_with_clause = (
+            index_create_sql + with_clause
+        ).join(" ")
+        log.debug(index_create_sql_with_with_clause.as_string(self.cursor))
+        self.cursor.execute(index_create_sql_with_with_clause)
+        self.conn.commit()
+    def _create_table(self, dim: int):
+        assert self.conn is not None, "Connection is not initialized"
+        assert self.cursor is not None, "Cursor is not initialized"
+        try:
+            log.info(f"{self.name} client create table : {self.table_name}")
+            self.cursor.execute(
+                sql.SQL(
+                    "CREATE TABLE IF NOT EXISTS public.{table_name} (id BIGINT PRIMARY KEY, embedding vector({dim}));"
+                ).format(table_name=sql.Identifier(self.table_name), dim=dim)
+            )
+            self.conn.commit()
+        except Exception as e:
+            log.warning(
+                f"Failed to create pgvectorscale table: {self.table_name} error: {e}"
+            )
+            raise e from None
+    def insert_embeddings(
+        self,
+        embeddings: list[list[float]],
+        metadata: list[int],
+        **kwargs: Any,
+    ) -> Tuple[int, Optional[Exception]]:
+        assert self.conn is not None, "Connection is not initialized"
+        assert self.cursor is not None, "Cursor is not initialized"
+        try:
+            metadata_arr = np.array(metadata)
+            embeddings_arr = np.array(embeddings)
+            with self.cursor.copy(
+                sql.SQL("COPY public.{table_name} FROM STDIN (FORMAT BINARY)").format(
+                    table_name=sql.Identifier(self.table_name)
+                )
+            ) as copy:
+                copy.set_types(["bigint", "vector"])
+                for i, row in enumerate(metadata_arr):
+                    copy.write_row((row, embeddings_arr[i]))
+            self.conn.commit()
+            if kwargs.get("last_batch"):
+                self._post_insert()
+            return len(metadata), None
+        except Exception as e:
+            log.warning(
+                f"Failed to insert data into pgvector table ({self.table_name}), error: {e}"
+            )
+            return 0, e
+    def search_embedding(
+        self,
+        query: list[float],
+        k: int = 100,
+        filters: dict | None = None,
+        timeout: int | None = None,
+    ) -> list[int]:
+        assert self.conn is not None, "Connection is not initialized"
+        assert self.cursor is not None, "Cursor is not initialized"
+        q = np.asarray(query)
+        # TODO add filters support
+        result = self.cursor.execute(
+            self._unfiltered_search, (q, k), prepare=True, binary=True
+        )
+        return [int(i[0]) for i in result.fetchall()]

vectordb_bench/cli/vectordbbench.py CHANGED Viewed

@@ -1,5 +1,7 @@
 from ..backend.clients.pgvector.cli import PgVectorHNSW
+from ..backend.clients.pgvecto_rs.cli import PgVectoRSHNSW, PgVectoRSIVFFlat
 from ..backend.clients.redis.cli import Redis
+from ..backend.clients.memorydb.cli import MemoryDB
 from ..backend.clients.test.cli import Test
 from ..backend.clients.weaviate_cloud.cli import Weaviate
 from ..backend.clients.zilliz_cloud.cli import ZillizAutoIndex
@@ -10,7 +12,10 @@ from ..backend.clients.aws_opensearch.cli import AWSOpenSearch
 from .cli import cli
 cli.add_command(PgVectorHNSW)
+cli.add_command(PgVectoRSHNSW)
+cli.add_command(PgVectoRSIVFFlat)
 cli.add_command(Redis)
+cli.add_command(MemoryDB)
 cli.add_command(Weaviate)
 cli.add_command(Test)
 cli.add_command(ZillizAutoIndex)

vectordb_bench/frontend/components/check_results/data.py CHANGED Viewed

@@ -24,7 +24,10 @@ def getFilterTasks(
         task
         for task in tasks
         if task.task_config.db_name in dbNames
-        and task.task_config.case_config.case_id.case_cls(task.task_config.case_config.custom_case).name in caseNames
+        and task.task_config.case_config.case_id.case_cls(
+            task.task_config.case_config.custom_case
+        ).name
+        in caseNames
     ]
     return filterTasks
@@ -35,17 +38,20 @@ def mergeTasks(tasks: list[CaseResult]):
         db_name = task.task_config.db_name
         db = task.task_config.db.value
         db_label = task.task_config.db_config.db_label or ""
-        case = task.task_config.case_config.case_id.case_cls(task.task_config.case_config.custom_case)
+        version = task.task_config.db_config.version or ""
+        case = task.task_config.case_config.case_id.case_cls(
+            task.task_config.case_config.custom_case
+        )
         dbCaseMetricsMap[db_name][case.name] = {
             "db": db,
             "db_label": db_label,
+            "version": version,
             "metrics": mergeMetrics(
                 dbCaseMetricsMap[db_name][case.name].get("metrics", {}),
                 asdict(task.metrics),
             ),
             "label": getBetterLabel(
-                dbCaseMetricsMap[db_name][case.name].get(
-                    "label", ResultLabel.FAILED),
+                dbCaseMetricsMap[db_name][case.name].get("label", ResultLabel.FAILED),
                 task.label,
             ),
         }
@@ -57,6 +63,7 @@ def mergeTasks(tasks: list[CaseResult]):
             metrics = metricInfo["metrics"]
             db = metricInfo["db"]
             db_label = metricInfo["db_label"]
+            version = metricInfo["version"]
             label = metricInfo["label"]
             if label == ResultLabel.NORMAL:
                 mergedTasks.append(
@@ -64,6 +71,7 @@ def mergeTasks(tasks: list[CaseResult]):
                         "db_name": db_name,
                         "db": db,
                         "db_label": db_label,
+                        "version": version,
                         "case_name": case_name,
                         "metricsSet": set(metrics.keys()),
                         **metrics,
@@ -79,8 +87,7 @@ def mergeMetrics(metrics_1: dict, metrics_2: dict) -> dict:
     metrics = {**metrics_1}
     for key, value in metrics_2.items():
         metrics[key] = (
-            getBetterMetric(
-                key, value, metrics[key]) if key in metrics else value
+            getBetterMetric(key, value, metrics[key]) if key in metrics else value
         )
     return metrics

vectordb_bench/frontend/components/run_test/caseSelector.py CHANGED Viewed

@@ -100,6 +100,16 @@ def caseConfigSetting(st, dbToCaseClusterConfigs, uiCaseItem: UICaseItem, active
                         value=config.inputConfig["value"],
                         help=config.inputHelp,
                     )
+                elif config.inputType == InputType.Float:
+                    caseConfig[config.label] = column.number_input(
+                        config.displayLabel if config.displayLabel else config.label.value,
+                        step=config.inputConfig.get("step", 0.1),
+                        min_value=config.inputConfig["min"],
+                        max_value=config.inputConfig["max"],
+                        key=key,
+                        value=config.inputConfig["value"],
+                        help=config.inputHelp,
+                    )
                 k += 1
         if k == 0:
             columns[1].write("Auto")

vectordb_bench/frontend/components/run_test/dbConfigSetting.py CHANGED Viewed

@@ -1,9 +1,10 @@
 from pydantic import ValidationError
-from vectordb_bench.frontend.config.styles import *
+from vectordb_bench.backend.clients import DB
+from vectordb_bench.frontend.config.styles import DB_CONFIG_SETTING_COLUMNS
 from vectordb_bench.frontend.utils import inputIsPassword
-def dbConfigSettings(st, activedDbList):
+def dbConfigSettings(st, activedDbList: list[DB]):
     expander = st.expander("Configurations for the selected databases", True)
     dbConfigs = {}
@@ -27,7 +28,7 @@ def dbConfigSettings(st, activedDbList):
     return dbConfigs, isAllValid
-def dbConfigSettingItem(st, activeDb):
+def dbConfigSettingItem(st, activeDb: DB):
     st.markdown(
         f"<div style='font-weight: 600; font-size: 20px; margin-top: 16px;'>{activeDb.value}</div>",
         unsafe_allow_html=True,
@@ -36,20 +37,41 @@ def dbConfigSettingItem(st, activeDb):
     dbConfigClass = activeDb.config_cls
     properties = dbConfigClass.schema().get("properties")
-    propertiesItems = list(properties.items())
-    moveDBLabelToLast(propertiesItems)
     dbConfig = {}
-    for j, property in enumerate(propertiesItems):
-        column = columns[j % DB_CONFIG_SETTING_COLUMNS]
-        key, value = property
+    idx = 0
+    # db config (unique)
+    for key, property in properties.items():
+        if (
+            key not in dbConfigClass.common_short_configs()
+            and key not in dbConfigClass.common_long_configs()
+        ):
+            column = columns[idx % DB_CONFIG_SETTING_COLUMNS]
+            idx += 1
+            dbConfig[key] = column.text_input(
+                key,
+                key="%s-%s" % (activeDb.name, key),
+                value=property.get("default", ""),
+                type="password" if inputIsPassword(key) else "default",
+            )
+    # db config (common short labels)
+    for key in dbConfigClass.common_short_configs():
+        column = columns[idx % DB_CONFIG_SETTING_COLUMNS]
+        idx += 1
         dbConfig[key] = column.text_input(
             key,
-            key="%s-%s" % (activeDb, key),
-            value=value.get("default", ""),
-            type="password" if inputIsPassword(key) else "default",
+            key="%s-%s" % (activeDb.name, key),
+            value="",
+            type="default",
+            placeholder="optional, for labeling results",
         )
-    return dbConfig
-def moveDBLabelToLast(propertiesItems):
-    propertiesItems.sort(key=lambda x: 1 if x[0] == "db_label" else 0)
+    # db config (common long text_input)
+    for key in dbConfigClass.common_long_configs():
+        dbConfig[key] = st.text_area(
+            key,
+            key="%s-%s" % (activeDb.name, key),
+            value="",
+            placeholder="optional",
+        )
+    return dbConfig

vectordb_bench/frontend/components/run_test/initStyle.py CHANGED Viewed

@@ -9,6 +9,8 @@ def initStyle(st):
             div[data-testid='stHorizontalBlock'] {gap: 8px;}
             /* check box */
             .stCheckbox p { color: #000; font-size: 18px; font-weight: 600; }
+            /* db selector - db_name should not wrap */
+            div[data-testid="stVerticalBlockBorderWrapper"] div[data-testid="stCheckbox"] div[data-testid="stWidgetLabel"] p { white-space: nowrap; }
         </style>""",
         unsafe_allow_html=True,
-    )
+    )

vectordb-bench 0.0.12__py3-none-any.whl → 0.0.13__py3-none-any.whl

vectordb-bench 0.0.12py3-none-any.whl → 0.0.13py3-none-any.whl