PyPI - vectordb-bench - Versions diffs - 0.0.18__py3-none-any.whl → 0.0.20__py3-none-any.whl - Mend

vectordb-bench 0.0.18py3-none-any.whl → 0.0.20py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (105) hide show

vectordb_bench/__init__.py +49 -24
vectordb_bench/__main__.py +4 -3
vectordb_bench/backend/assembler.py +12 -13
vectordb_bench/backend/cases.py +56 -46
vectordb_bench/backend/clients/__init__.py +101 -14
vectordb_bench/backend/clients/aliyun_elasticsearch/aliyun_elasticsearch.py +26 -0
vectordb_bench/backend/clients/aliyun_elasticsearch/config.py +18 -0
vectordb_bench/backend/clients/aliyun_opensearch/aliyun_opensearch.py +345 -0
vectordb_bench/backend/clients/aliyun_opensearch/config.py +47 -0
vectordb_bench/backend/clients/alloydb/alloydb.py +58 -80
vectordb_bench/backend/clients/alloydb/cli.py +52 -35
vectordb_bench/backend/clients/alloydb/config.py +30 -30
vectordb_bench/backend/clients/api.py +8 -9
vectordb_bench/backend/clients/aws_opensearch/aws_opensearch.py +46 -47
vectordb_bench/backend/clients/aws_opensearch/cli.py +4 -7
vectordb_bench/backend/clients/aws_opensearch/config.py +13 -9
vectordb_bench/backend/clients/aws_opensearch/run.py +69 -59
vectordb_bench/backend/clients/chroma/chroma.py +38 -36
vectordb_bench/backend/clients/chroma/config.py +4 -2
vectordb_bench/backend/clients/elastic_cloud/config.py +5 -5
vectordb_bench/backend/clients/elastic_cloud/elastic_cloud.py +23 -22
vectordb_bench/backend/clients/memorydb/cli.py +8 -8
vectordb_bench/backend/clients/memorydb/config.py +2 -2
vectordb_bench/backend/clients/memorydb/memorydb.py +65 -53
vectordb_bench/backend/clients/milvus/cli.py +62 -80
vectordb_bench/backend/clients/milvus/config.py +31 -7
vectordb_bench/backend/clients/milvus/milvus.py +23 -26
vectordb_bench/backend/clients/pgdiskann/cli.py +29 -22
vectordb_bench/backend/clients/pgdiskann/config.py +29 -26
vectordb_bench/backend/clients/pgdiskann/pgdiskann.py +55 -73
vectordb_bench/backend/clients/pgvecto_rs/cli.py +9 -11
vectordb_bench/backend/clients/pgvecto_rs/config.py +8 -14
vectordb_bench/backend/clients/pgvecto_rs/pgvecto_rs.py +33 -34
vectordb_bench/backend/clients/pgvector/cli.py +40 -31
vectordb_bench/backend/clients/pgvector/config.py +63 -73
vectordb_bench/backend/clients/pgvector/pgvector.py +97 -98
vectordb_bench/backend/clients/pgvectorscale/cli.py +38 -24
vectordb_bench/backend/clients/pgvectorscale/config.py +14 -15
vectordb_bench/backend/clients/pgvectorscale/pgvectorscale.py +38 -43
vectordb_bench/backend/clients/pinecone/config.py +1 -0
vectordb_bench/backend/clients/pinecone/pinecone.py +14 -21
vectordb_bench/backend/clients/qdrant_cloud/config.py +11 -10
vectordb_bench/backend/clients/qdrant_cloud/qdrant_cloud.py +40 -31
vectordb_bench/backend/clients/redis/cli.py +6 -12
vectordb_bench/backend/clients/redis/config.py +7 -5
vectordb_bench/backend/clients/redis/redis.py +94 -58
vectordb_bench/backend/clients/test/cli.py +1 -2
vectordb_bench/backend/clients/test/config.py +2 -2
vectordb_bench/backend/clients/test/test.py +4 -5
vectordb_bench/backend/clients/weaviate_cloud/cli.py +3 -4
vectordb_bench/backend/clients/weaviate_cloud/config.py +2 -2
vectordb_bench/backend/clients/weaviate_cloud/weaviate_cloud.py +36 -22
vectordb_bench/backend/clients/zilliz_cloud/cli.py +14 -11
vectordb_bench/backend/clients/zilliz_cloud/config.py +2 -4
vectordb_bench/backend/clients/zilliz_cloud/zilliz_cloud.py +1 -1
vectordb_bench/backend/data_source.py +30 -18
vectordb_bench/backend/dataset.py +47 -27
vectordb_bench/backend/result_collector.py +2 -3
vectordb_bench/backend/runner/__init__.py +4 -6
vectordb_bench/backend/runner/mp_runner.py +85 -34
vectordb_bench/backend/runner/rate_runner.py +51 -23
vectordb_bench/backend/runner/read_write_runner.py +140 -46
vectordb_bench/backend/runner/serial_runner.py +99 -50
vectordb_bench/backend/runner/util.py +4 -19
vectordb_bench/backend/task_runner.py +95 -74
vectordb_bench/backend/utils.py +17 -9
vectordb_bench/base.py +0 -1
vectordb_bench/cli/cli.py +65 -60
vectordb_bench/cli/vectordbbench.py +6 -7
vectordb_bench/frontend/components/check_results/charts.py +8 -19
vectordb_bench/frontend/components/check_results/data.py +4 -16
vectordb_bench/frontend/components/check_results/filters.py +8 -16
vectordb_bench/frontend/components/check_results/nav.py +4 -4
vectordb_bench/frontend/components/check_results/priceTable.py +1 -3
vectordb_bench/frontend/components/check_results/stPageConfig.py +2 -1
vectordb_bench/frontend/components/concurrent/charts.py +12 -12
vectordb_bench/frontend/components/custom/displayCustomCase.py +17 -11
vectordb_bench/frontend/components/custom/displaypPrams.py +4 -2
vectordb_bench/frontend/components/custom/getCustomConfig.py +1 -2
vectordb_bench/frontend/components/custom/initStyle.py +1 -1
vectordb_bench/frontend/components/get_results/saveAsImage.py +2 -0
vectordb_bench/frontend/components/run_test/caseSelector.py +3 -9
vectordb_bench/frontend/components/run_test/dbConfigSetting.py +1 -4
vectordb_bench/frontend/components/run_test/dbSelector.py +1 -1
vectordb_bench/frontend/components/run_test/generateTasks.py +8 -8
vectordb_bench/frontend/components/run_test/submitTask.py +14 -18
vectordb_bench/frontend/components/tables/data.py +3 -6
vectordb_bench/frontend/config/dbCaseConfigs.py +108 -83
vectordb_bench/frontend/pages/concurrent.py +3 -5
vectordb_bench/frontend/pages/custom.py +30 -9
vectordb_bench/frontend/pages/quries_per_dollar.py +3 -3
vectordb_bench/frontend/pages/run_test.py +3 -7
vectordb_bench/frontend/utils.py +1 -1
vectordb_bench/frontend/vdb_benchmark.py +4 -6
vectordb_bench/interface.py +56 -26
vectordb_bench/log_util.py +59 -64
vectordb_bench/metric.py +10 -11
vectordb_bench/models.py +26 -43
{vectordb_bench-0.0.18.dist-info → vectordb_bench-0.0.20.dist-info}/METADATA +34 -42
vectordb_bench-0.0.20.dist-info/RECORD +135 -0
{vectordb_bench-0.0.18.dist-info → vectordb_bench-0.0.20.dist-info}/WHEEL +1 -1
vectordb_bench-0.0.18.dist-info/RECORD +0 -131
{vectordb_bench-0.0.18.dist-info → vectordb_bench-0.0.20.dist-info}/LICENSE +0 -0
{vectordb_bench-0.0.18.dist-info → vectordb_bench-0.0.20.dist-info}/entry_points.txt +0 -0
{vectordb_bench-0.0.18.dist-info → vectordb_bench-0.0.20.dist-info}/top_level.txt +0 -0

vectordb_bench/__init__.py CHANGED Viewed

@@ -22,46 +22,71 @@ class config:
     DROP_OLD = env.bool("DROP_OLD", True)
     USE_SHUFFLED_DATA = env.bool("USE_SHUFFLED_DATA", True)
-    NUM_CONCURRENCY = env.list("NUM_CONCURRENCY",  [1, 5, 10, 15, 20, 25, 30, 35, 40, 45, 50, 55, 60, 65, 70, 75, 80, 85, 90, 95, 100], subcast=int )
+    NUM_CONCURRENCY = env.list(
+        "NUM_CONCURRENCY",
+        [
+            1,
+            5,
+            10,
+            15,
+            20,
+            25,
+            30,
+            35,
+            40,
+            45,
+            50,
+            55,
+            60,
+            65,
+            70,
+            75,
+            80,
+            85,
+            90,
+            95,
+            100,
+        ],
+        subcast=int,
+    )
     CONCURRENCY_DURATION = 30
     RESULTS_LOCAL_DIR = env.path(
-        "RESULTS_LOCAL_DIR", pathlib.Path(__file__).parent.joinpath("results")
+        "RESULTS_LOCAL_DIR",
+        pathlib.Path(__file__).parent.joinpath("results"),
     )
     CONFIG_LOCAL_DIR = env.path(
-        "CONFIG_LOCAL_DIR", pathlib.Path(__file__).parent.joinpath("config-files")
+        "CONFIG_LOCAL_DIR",
+        pathlib.Path(__file__).parent.joinpath("config-files"),
     )
     K_DEFAULT = 100  # default return top k nearest neighbors during search
     CUSTOM_CONFIG_DIR = pathlib.Path(__file__).parent.joinpath("custom/custom_case.json")
-    CAPACITY_TIMEOUT_IN_SECONDS = 24 * 3600   # 24h
-    LOAD_TIMEOUT_DEFAULT        = 24 * 3600   # 24h
-    LOAD_TIMEOUT_768D_1M        = 24 * 3600   # 24h
-    LOAD_TIMEOUT_768D_10M       = 240 * 3600  # 10d
-    LOAD_TIMEOUT_768D_100M      = 2400 * 3600 # 100d
+    CAPACITY_TIMEOUT_IN_SECONDS = 24 * 3600  # 24h
+    LOAD_TIMEOUT_DEFAULT = 24 * 3600  # 24h
+    LOAD_TIMEOUT_768D_1M = 24 * 3600  # 24h
+    LOAD_TIMEOUT_768D_10M = 240 * 3600  # 10d
+    LOAD_TIMEOUT_768D_100M = 2400 * 3600  # 100d
-    LOAD_TIMEOUT_1536D_500K     = 24 * 3600   # 24h
-    LOAD_TIMEOUT_1536D_5M       = 240 * 3600  # 10d
+    LOAD_TIMEOUT_1536D_500K = 24 * 3600  # 24h
+    LOAD_TIMEOUT_1536D_5M = 240 * 3600  # 10d
-    OPTIMIZE_TIMEOUT_DEFAULT    = 24 * 3600   # 24h
-    OPTIMIZE_TIMEOUT_768D_1M    = 24 * 3600   # 24h
-    OPTIMIZE_TIMEOUT_768D_10M   = 240 * 3600  # 10d
-    OPTIMIZE_TIMEOUT_768D_100M  = 2400 * 3600 # 100d
+    OPTIMIZE_TIMEOUT_DEFAULT = 24 * 3600  # 24h
+    OPTIMIZE_TIMEOUT_768D_1M = 24 * 3600  # 24h
+    OPTIMIZE_TIMEOUT_768D_10M = 240 * 3600  # 10d
+    OPTIMIZE_TIMEOUT_768D_100M = 2400 * 3600  # 100d
+    OPTIMIZE_TIMEOUT_1536D_500K = 24 * 3600  # 24h
+    OPTIMIZE_TIMEOUT_1536D_5M = 240 * 3600  # 10d
-    OPTIMIZE_TIMEOUT_1536D_500K = 24 * 3600   # 24h
-    OPTIMIZE_TIMEOUT_1536D_5M   = 240 * 3600  # 10d
     def display(self) -> str:
-        tmp = [
-            i for i in inspect.getmembers(self)
-            if not inspect.ismethod(i[1])
-            and not i[0].startswith('_')
-            and "TIMEOUT" not in i[0]
+        return [
+            i
+            for i in inspect.getmembers(self)
+            if not inspect.ismethod(i[1]) and not i[0].startswith("_") and "TIMEOUT" not in i[0]
         ]
-        return tmp
 log_util.init(config.LOG_LEVEL)

vectordb_bench/__main__.py CHANGED Viewed

@@ -1,7 +1,8 @@
-import traceback
 import logging
+import pathlib
 import subprocess
-import os
+import traceback
 from . import config
 log = logging.getLogger("vectordb_bench")
@@ -16,7 +17,7 @@ def run_streamlit():
     cmd = [
         "streamlit",
         "run",
-        f"{os.path.dirname(__file__)}/frontend/vdb_benchmark.py",
+        f"{pathlib.Path(__file__).parent}/frontend/vdb_benchmark.py",
         "--logger.level",
         "info",
         "--theme.base",

vectordb_bench/backend/assembler.py CHANGED Viewed

@@ -1,24 +1,25 @@
-from .cases import CaseLabel
-from .task_runner import CaseRunner, RunningStatus, TaskRunner
-from ..models import TaskConfig
-from ..backend.clients import EmptyDBCaseConfig
-from ..backend.data_source  import DatasetSource
 import logging
+from vectordb_bench.backend.clients import EmptyDBCaseConfig
+from vectordb_bench.backend.data_source import DatasetSource
+from vectordb_bench.models import TaskConfig
+from .cases import CaseLabel
+from .task_runner import CaseRunner, RunningStatus, TaskRunner
 log = logging.getLogger(__name__)
 class Assembler:
     @classmethod
-    def assemble(cls, run_id , task: TaskConfig, source: DatasetSource) -> CaseRunner:
+    def assemble(cls, run_id: str, task: TaskConfig, source: DatasetSource) -> CaseRunner:
         c_cls = task.case_config.case_id.case_cls
         c = c_cls(task.case_config.custom_case)
-        if type(task.db_case_config) != EmptyDBCaseConfig:
+        if type(task.db_case_config) is not EmptyDBCaseConfig:
             task.db_case_config.metric_type = c.dataset.data.metric_type
-        runner = CaseRunner(
+        return CaseRunner(
             run_id=run_id,
             config=task,
             ca=c,
@@ -26,8 +27,6 @@ class Assembler:
             dataset_source=source,
         )
-        return runner
     @classmethod
     def assemble_all(
         cls,
@@ -50,12 +49,12 @@ class Assembler:
             db2runner[db].append(r)
         # check dbclient installed
-        for k in db2runner.keys():
+        for k in db2runner:
             _ = k.init_cls
         # sort by dataset size
-        for k in db2runner.keys():
-            db2runner[k].sort(key=lambda x:x.ca.dataset.data.size)
+        for k, _ in db2runner:
+            db2runner[k].sort(key=lambda x: x.ca.dataset.data.size)
         all_runners = []
         all_runners.extend(load_runners)

vectordb_bench/backend/cases.py CHANGED Viewed

@@ -1,7 +1,5 @@
-import typing
 import logging
 from enum import Enum, auto
-from typing import Type
 from vectordb_bench import config
 from vectordb_bench.backend.clients.api import MetricType
@@ -12,7 +10,6 @@ from vectordb_bench.frontend.components.custom.getCustomConfig import (
 from .dataset import CustomDataset, Dataset, DatasetManager
 log = logging.getLogger(__name__)
@@ -50,11 +47,10 @@ class CaseType(Enum):
     Custom = 100
     PerformanceCustomDataset = 101
-    def case_cls(self, custom_configs: dict | None = None) -> Type["Case"]:
+    def case_cls(self, custom_configs: dict | None = None) -> type["Case"]:
         if custom_configs is None:
             return type2case.get(self)()
-        else:
-            return type2case.get(self)(**custom_configs)
+        return type2case.get(self)(**custom_configs)
     def case_name(self, custom_configs: dict | None = None) -> str:
         c = self.case_cls(custom_configs)
@@ -99,10 +95,10 @@ class Case(BaseModel):
     @property
     def filters(self) -> dict | None:
         if self.filter_rate is not None:
-            ID = round(self.filter_rate * self.dataset.data.size)
+            target_id = round(self.filter_rate * self.dataset.data.size)
             return {
-                "metadata": f">={ID}",
-                "id": ID,
+                "metadata": f">={target_id}",
+                "id": target_id,
             }
         return None
@@ -126,8 +122,8 @@ class CapacityDim960(CapacityCase):
     case_id: CaseType = CaseType.CapacityDim960
     dataset: DatasetManager = Dataset.GIST.manager(100_000)
     name: str = "Capacity Test (960 Dim Repeated)"
-    description: str = """This case tests the vector database's loading capacity by repeatedly inserting large-dimension
-     vectors (GIST 100K vectors, <b>960 dimensions</b>) until it is fully loaded. Number of inserted vectors will be
+    description: str = """This case tests the vector database's loading capacity by repeatedly inserting large-dimension
+     vectors (GIST 100K vectors, <b>960 dimensions</b>) until it is fully loaded. Number of inserted vectors will be
      reported."""
@@ -136,7 +132,7 @@ class CapacityDim128(CapacityCase):
     dataset: DatasetManager = Dataset.SIFT.manager(500_000)
     name: str = "Capacity Test (128 Dim Repeated)"
     description: str = """This case tests the vector database's loading capacity by repeatedly inserting small-dimension
-     vectors (SIFT 100K vectors, <b>128 dimensions</b>) until it is fully loaded. Number of inserted vectors will be
+     vectors (SIFT 100K vectors, <b>128 dimensions</b>) until it is fully loaded. Number of inserted vectors will be
      reported."""
@@ -144,8 +140,9 @@ class Performance768D10M(PerformanceCase):
     case_id: CaseType = CaseType.Performance768D10M
     dataset: DatasetManager = Dataset.COHERE.manager(10_000_000)
     name: str = "Search Performance Test (10M Dataset, 768 Dim)"
-    description: str = """This case tests the search performance of a vector database with a large dataset (<b>Cohere 10M vectors</b>, 768 dimensions) at varying parallel levels.
-Results will show index building time, recall, and maximum QPS."""
+    description: str = """This case tests the search performance of a vector database with a large dataset
+    (<b>Cohere 10M vectors</b>, 768 dimensions) at varying parallel levels.
+    Results will show index building time, recall, and maximum QPS."""
     load_timeout: float | int = config.LOAD_TIMEOUT_768D_10M
     optimize_timeout: float | int | None = config.OPTIMIZE_TIMEOUT_768D_10M
@@ -154,8 +151,9 @@ class Performance768D1M(PerformanceCase):
     case_id: CaseType = CaseType.Performance768D1M
     dataset: DatasetManager = Dataset.COHERE.manager(1_000_000)
     name: str = "Search Performance Test (1M Dataset, 768 Dim)"
-    description: str = """This case tests the search performance of a vector database with a medium dataset (<b>Cohere 1M vectors</b>, 768 dimensions) at varying parallel levels.
-Results will show index building time, recall, and maximum QPS."""
+    description: str = """This case tests the search performance of a vector database with a medium dataset
+    (<b>Cohere 1M vectors</b>, 768 dimensions) at varying parallel levels.
+    Results will show index building time, recall, and maximum QPS."""
     load_timeout: float | int = config.LOAD_TIMEOUT_768D_1M
     optimize_timeout: float | int | None = config.OPTIMIZE_TIMEOUT_768D_1M
@@ -165,8 +163,9 @@ class Performance768D10M1P(PerformanceCase):
     filter_rate: float | int | None = 0.01
     dataset: DatasetManager = Dataset.COHERE.manager(10_000_000)
     name: str = "Filtering Search Performance Test (10M Dataset, 768 Dim, Filter 1%)"
-    description: str = """This case tests the search performance of a vector database with a large dataset (<b>Cohere 10M vectors</b>, 768 dimensions) under a low filtering rate (<b>1% vectors</b>), at varying parallel levels.
-Results will show index building time, recall, and maximum QPS."""
+    description: str = """This case tests the search performance of a vector database with a large dataset
+    (<b>Cohere 10M vectors</b>, 768 dimensions) under a low filtering rate (<b>1% vectors</b>), at varying parallel
+    levels. Results will show index building time, recall, and maximum QPS."""
     load_timeout: float | int = config.LOAD_TIMEOUT_768D_10M
     optimize_timeout: float | int | None = config.OPTIMIZE_TIMEOUT_768D_10M
@@ -176,8 +175,9 @@ class Performance768D1M1P(PerformanceCase):
     filter_rate: float | int | None = 0.01
     dataset: DatasetManager = Dataset.COHERE.manager(1_000_000)
     name: str = "Filtering Search Performance Test (1M Dataset, 768 Dim, Filter 1%)"
-    description: str = """This case tests the search performance of a vector database with a medium dataset (<b>Cohere 1M vectors</b>, 768 dimensions) under a low filtering rate (<b>1% vectors</b>), at varying parallel levels.
-Results will show index building time, recall, and maximum QPS."""
+    description: str = """This case tests the search performance of a vector database with a medium dataset
+    (<b>Cohere 1M vectors</b>, 768 dimensions) under a low filtering rate (<b>1% vectors</b>),
+    at varying parallel levels. Results will show index building time, recall, and maximum QPS."""
     load_timeout: float | int = config.LOAD_TIMEOUT_768D_1M
     optimize_timeout: float | int | None = config.OPTIMIZE_TIMEOUT_768D_1M
@@ -187,8 +187,9 @@ class Performance768D10M99P(PerformanceCase):
     filter_rate: float | int | None = 0.99
     dataset: DatasetManager = Dataset.COHERE.manager(10_000_000)
     name: str = "Filtering Search Performance Test (10M Dataset, 768 Dim, Filter 99%)"
-    description: str = """This case tests the search performance of a vector database with a large dataset (<b>Cohere 10M vectors</b>, 768 dimensions) under a high filtering rate (<b>99% vectors</b>), at varying parallel levels.
-Results will show index building time, recall, and maximum QPS."""
+    description: str = """This case tests the search performance of a vector database with a large dataset
+    (<b>Cohere 10M vectors</b>, 768 dimensions) under a high filtering rate (<b>99% vectors</b>),
+    at varying parallel levels. Results will show index building time, recall, and maximum QPS."""
     load_timeout: float | int = config.LOAD_TIMEOUT_768D_10M
     optimize_timeout: float | int | None = config.OPTIMIZE_TIMEOUT_768D_10M
@@ -198,8 +199,9 @@ class Performance768D1M99P(PerformanceCase):
     filter_rate: float | int | None = 0.99
     dataset: DatasetManager = Dataset.COHERE.manager(1_000_000)
     name: str = "Filtering Search Performance Test (1M Dataset, 768 Dim, Filter 99%)"
-    description: str = """This case tests the search performance of a vector database with a medium dataset (<b>Cohere 1M vectors</b>, 768 dimensions) under a high filtering rate (<b>99% vectors</b>), at varying parallel levels.
-Results will show index building time, recall, and maximum QPS."""
+    description: str = """This case tests the search performance of a vector database with a medium dataset
+    (<b>Cohere 1M vectors</b>, 768 dimensions) under a high filtering rate (<b>99% vectors</b>),
+    at varying parallel levels. Results will show index building time, recall, and maximum QPS."""
     load_timeout: float | int = config.LOAD_TIMEOUT_768D_1M
     optimize_timeout: float | int | None = config.OPTIMIZE_TIMEOUT_768D_1M
@@ -209,8 +211,9 @@ class Performance768D100M(PerformanceCase):
     filter_rate: float | int | None = None
     dataset: DatasetManager = Dataset.LAION.manager(100_000_000)
     name: str = "Search Performance Test (100M Dataset, 768 Dim)"
-    description: str = """This case tests the search performance of a vector database with a large 100M dataset (<b>LAION 100M vectors</b>, 768 dimensions), at varying parallel levels.
-Results will show index building time, recall, and maximum QPS."""
+    description: str = """This case tests the search performance of a vector database with a large 100M dataset
+    (<b>LAION 100M vectors</b>, 768 dimensions), at varying parallel levels. Results will show index building time,
+    recall, and maximum QPS."""
     load_timeout: float | int = config.LOAD_TIMEOUT_768D_100M
     optimize_timeout: float | int | None = config.OPTIMIZE_TIMEOUT_768D_100M
@@ -220,8 +223,9 @@ class Performance1536D500K(PerformanceCase):
     filter_rate: float | int | None = None
     dataset: DatasetManager = Dataset.OPENAI.manager(500_000)
     name: str = "Search Performance Test (500K Dataset, 1536 Dim)"
-    description: str = """This case tests the search performance of a vector database with a medium 500K dataset (<b>OpenAI 500K vectors</b>, 1536 dimensions), at varying parallel levels.
-Results will show index building time, recall, and maximum QPS."""
+    description: str = """This case tests the search performance of a vector database with a medium 500K dataset
+    (<b>OpenAI 500K vectors</b>, 1536 dimensions), at varying parallel levels. Results will show index building time,
+    recall, and maximum QPS."""
     load_timeout: float | int = config.LOAD_TIMEOUT_1536D_500K
     optimize_timeout: float | int | None = config.OPTIMIZE_TIMEOUT_1536D_500K
@@ -231,8 +235,9 @@ class Performance1536D5M(PerformanceCase):
     filter_rate: float | int | None = None
     dataset: DatasetManager = Dataset.OPENAI.manager(5_000_000)
     name: str = "Search Performance Test (5M Dataset, 1536 Dim)"
-    description: str = """This case tests the search performance of a vector database with a medium 5M dataset (<b>OpenAI 5M vectors</b>, 1536 dimensions), at varying parallel levels.
-Results will show index building time, recall, and maximum QPS."""
+    description: str = """This case tests the search performance of a vector database with a medium 5M dataset
+    (<b>OpenAI 5M vectors</b>, 1536 dimensions), at varying parallel levels. Results will show index building time,
+    recall, and maximum QPS."""
     load_timeout: float | int = config.LOAD_TIMEOUT_1536D_5M
     optimize_timeout: float | int | None = config.OPTIMIZE_TIMEOUT_1536D_5M
@@ -242,8 +247,9 @@ class Performance1536D500K1P(PerformanceCase):
     filter_rate: float | int | None = 0.01
     dataset: DatasetManager = Dataset.OPENAI.manager(500_000)
     name: str = "Filtering Search Performance Test (500K Dataset, 1536 Dim, Filter 1%)"
-    description: str = """This case tests the search performance of a vector database with a large dataset (<b>OpenAI 500K vectors</b>, 1536 dimensions) under a low filtering rate (<b>1% vectors</b>), at varying parallel levels.
-Results will show index building time, recall, and maximum QPS."""
+    description: str = """This case tests the search performance of a vector database with a large dataset
+    (<b>OpenAI 500K vectors</b>, 1536 dimensions) under a low filtering rate (<b>1% vectors</b>),
+    at varying parallel levels. Results will show index building time, recall, and maximum QPS."""
     load_timeout: float | int = config.LOAD_TIMEOUT_1536D_500K
     optimize_timeout: float | int | None = config.OPTIMIZE_TIMEOUT_1536D_500K
@@ -253,8 +259,9 @@ class Performance1536D5M1P(PerformanceCase):
     filter_rate: float | int | None = 0.01
     dataset: DatasetManager = Dataset.OPENAI.manager(5_000_000)
     name: str = "Filtering Search Performance Test (5M Dataset, 1536 Dim, Filter 1%)"
-    description: str = """This case tests the search performance of a vector database with a large dataset (<b>OpenAI 5M vectors</b>, 1536 dimensions) under a low filtering rate (<b>1% vectors</b>), at varying parallel levels.
-Results will show index building time, recall, and maximum QPS."""
+    description: str = """This case tests the search performance of a vector database with a large dataset
+    (<b>OpenAI 5M vectors</b>, 1536 dimensions) under a low filtering rate (<b>1% vectors</b>),
+    at varying parallel levels. Results will show index building time, recall, and maximum QPS."""
     load_timeout: float | int = config.LOAD_TIMEOUT_1536D_5M
     optimize_timeout: float | int | None = config.OPTIMIZE_TIMEOUT_1536D_5M
@@ -264,8 +271,9 @@ class Performance1536D500K99P(PerformanceCase):
     filter_rate: float | int | None = 0.99
     dataset: DatasetManager = Dataset.OPENAI.manager(500_000)
     name: str = "Filtering Search Performance Test (500K Dataset, 1536 Dim, Filter 99%)"
-    description: str = """This case tests the search performance of a vector database with a medium dataset (<b>OpenAI 500K vectors</b>, 1536 dimensions) under a high filtering rate (<b>99% vectors</b>), at varying parallel levels.
-Results will show index building time, recall, and maximum QPS."""
+    description: str = """This case tests the search performance of a vector database with a medium dataset
+    (<b>OpenAI 500K vectors</b>, 1536 dimensions) under a high filtering rate (<b>99% vectors</b>),
+    at varying parallel levels. Results will show index building time, recall, and maximum QPS."""
     load_timeout: float | int = config.LOAD_TIMEOUT_1536D_500K
     optimize_timeout: float | int | None = config.OPTIMIZE_TIMEOUT_1536D_500K
@@ -275,8 +283,9 @@ class Performance1536D5M99P(PerformanceCase):
     filter_rate: float | int | None = 0.99
     dataset: DatasetManager = Dataset.OPENAI.manager(5_000_000)
     name: str = "Filtering Search Performance Test (5M Dataset, 1536 Dim, Filter 99%)"
-    description: str = """This case tests the search performance of a vector database with a medium dataset (<b>OpenAI 5M vectors</b>, 1536 dimensions) under a high filtering rate (<b>99% vectors</b>), at varying parallel levels.
-Results will show index building time, recall, and maximum QPS."""
+    description: str = """This case tests the search performance of a vector database with a medium dataset
+    (<b>OpenAI 5M vectors</b>, 1536 dimensions) under a high filtering rate (<b>99% vectors</b>),
+    at varying parallel levels. Results will show index building time, recall, and maximum QPS."""
     load_timeout: float | int = config.LOAD_TIMEOUT_1536D_5M
     optimize_timeout: float | int | None = config.OPTIMIZE_TIMEOUT_1536D_5M
@@ -286,10 +295,11 @@ class Performance1536D50K(PerformanceCase):
     filter_rate: float | int | None = None
     dataset: DatasetManager = Dataset.OPENAI.manager(50_000)
     name: str = "Search Performance Test (50K Dataset, 1536 Dim)"
-    description: str = """This case tests the search performance of a vector database with a medium 50K dataset (<b>OpenAI 50K vectors</b>, 1536 dimensions), at varying parallel levels.
-Results will show index building time, recall, and maximum QPS."""
+    description: str = """This case tests the search performance of a vector database with a medium 50K dataset
+    (<b>OpenAI 50K vectors</b>, 1536 dimensions), at varying parallel levels. Results will show index building time,
+    recall, and maximum QPS."""
     load_timeout: float | int = 3600
-    optimize_timeout: float | int | None = 15 * 60
+    optimize_timeout: float | int | None = config.OPTIMIZE_TIMEOUT_DEFAULT
 def metric_type_map(s: str) -> MetricType:
@@ -312,11 +322,11 @@ class PerformanceCustomDataset(PerformanceCase):
     def __init__(
         self,
-        name,
-        description,
-        load_timeout,
-        optimize_timeout,
-        dataset_config,
+        name: str,
+        description: str,
+        load_timeout: float,
+        optimize_timeout: float,
+        dataset_config: dict,
         **kwargs,
     ):
         dataset_config = CustomDatasetConfig(**dataset_config)

vectordb-bench 0.0.18__py3-none-any.whl → 0.0.20__py3-none-any.whl

vectordb-bench 0.0.18py3-none-any.whl → 0.0.20py3-none-any.whl