PyPI - vectordb-bench - Versions diffs - 1.0.0__py3-none-any.whl → 1.0.1__py3-none-any.whl - Mend

vectordb-bench 1.0.0py3-none-any.whl → 1.0.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (25) hide show

vectordb_bench/__main__.py CHANGED Viewed

@@ -17,7 +17,7 @@ def run_streamlit():
     cmd = [
         "streamlit",
         "run",
-        f"{pathlib.Path(__file__).parent}/frontend/vdb_benchmark.py",
+        f"{pathlib.Path(__file__).parent}/frontend/vdbbench.py",
         "--logger.level",
         "info",
         "--theme.base",

vectordb_bench/backend/clients/api.py CHANGED Viewed

@@ -132,7 +132,7 @@ class VectorDB(ABC):
     """
     "The filtering types supported by the VectorDB Client, default only non-filter"
-    supported_filter_types: list[FilterOp] = [FilterOp.NonFilter, FilterOp.NumGE]
+    supported_filter_types: list[FilterOp] = [FilterOp.NonFilter]
     @classmethod
     def filter_supported(cls, filters: Filter) -> bool:

vectordb_bench/backend/clients/aws_opensearch/config.py CHANGED Viewed

@@ -75,6 +75,20 @@ class AWSOpenSearchIndexConfig(BaseModel, DBCaseConfig):
             and self.quantization_type == obj.quantization_type
         )
+    def __hash__(self) -> int:
+        return hash(
+            (
+                self.engine,
+                self.M,
+                self.efConstruction,
+                self.number_of_shards,
+                self.number_of_replicas,
+                self.number_of_segments,
+                self.use_routing,
+                self.quantization_type,
+            )
+        )
     def parse_metric(self) -> str:
         log.info(f"User specified metric_type: {self.metric_type_name}")
         self.metric_type = MetricType[self.metric_type_name.upper()]

vectordb_bench/backend/clients/elastic_cloud/config.py CHANGED Viewed

@@ -48,6 +48,18 @@ class ElasticCloudIndexConfig(BaseModel, DBCaseConfig):
             and self.M == obj.M
         )
+    def __hash__(self) -> int:
+        return hash(
+            (
+                self.index,
+                self.number_of_shards,
+                self.number_of_replicas,
+                self.use_routing,
+                self.efConstruction,
+                self.M,
+            )
+        )
     def parse_metric(self) -> str:
         if self.metric_type == MetricType.L2:
             return "l2_norm"

vectordb_bench/backend/clients/milvus/milvus.py CHANGED Viewed

@@ -29,7 +29,7 @@ class Milvus(VectorDB):
         dim: int,
         db_config: dict,
         db_case_config: MilvusIndexConfig,
-        collection_name: str = "VectorDBBenchCollection",
+        collection_name: str = "VDBBench",
         drop_old: bool = False,
         name: str = "Milvus",
         with_scalar_labels: bool = False,

vectordb_bench/backend/clients/qdrant_cloud/config.py CHANGED Viewed

@@ -63,6 +63,20 @@ class QdrantIndexConfig(BaseModel, DBCaseConfig):
             and self.default_segment_number == obj.default_segment_number
         )
+    def __hash__(self) -> int:
+        return hash(
+            (
+                self.m,
+                self.payload_m,
+                self.create_payload_int_index,
+                self.create_payload_keyword_index,
+                self.is_tenant,
+                self.use_scalar_quant,
+                self.sq_quantile,
+                self.default_segment_number,
+            )
+        )
     def parse_metric(self) -> str:
         if self.metric_type == MetricType.L2:
             return "Euclid"

vectordb_bench/backend/clients/zilliz_cloud/zilliz_cloud.py CHANGED Viewed

@@ -10,7 +10,7 @@ class ZillizCloud(Milvus):
         dim: int,
         db_config: dict,
         db_case_config: DBCaseConfig,
-        collection_name: str = "ZillizCloudVectorDBBench",
+        collection_name: str = "ZillizCloudVDBBench",
         drop_old: bool = False,
         name: str = "ZillizCloud",
         **kwargs,

vectordb_bench/backend/dataset.py CHANGED Viewed

@@ -242,6 +242,9 @@ class DatasetManager(BaseModel):
             return self.data.name == obj.data.name and self.data.label == obj.data.label
         return False
+    def __hash__(self) -> int:
+        return hash((self.data.name, self.data.label))
     def set_reader(self, reader: DatasetReader):
         self.reader = reader

vectordb_bench/backend/task_runner.py CHANGED Viewed

@@ -59,6 +59,17 @@ class CaseRunner(BaseModel):
             )
         return False
+    def __hash__(self) -> int:
+        """Hash method to maintain consistency with __eq__ method."""
+        return hash(
+            (
+                self.ca.label,
+                self.config.db,
+                self.config.db_case_config,
+                self.ca.dataset,
+            )
+        )
     def display(self) -> dict:
         c_dict = self.ca.dict(
             include={

vectordb_bench/frontend/components/check_results/headerIcon.py CHANGED Viewed

@@ -4,7 +4,7 @@ from vectordb_bench.frontend.config.styles import HEADER_ICON
 def drawHeaderIcon(st):
     st.markdown(
         f"""
-    <a href="/vdb_benchmark" target="_self">
+    <a href="/vdbbench" target="_self">
         <div class="headerIconContainer"></div>
     </a>
@@ -16,8 +16,10 @@ def drawHeaderIcon(st):
         width: 100%;
         border-bottom: 2px solid #E8EAEE;
         background-image: url({HEADER_ICON});
+        background-size: contain;
+        background-position: left top;
         background-repeat: no-repeat;
-        cursor: pointer;
+        cursor: pointer;
     }}
     </style>
     """,

vectordb_bench/frontend/components/welcome/explainPrams.py CHANGED Viewed

@@ -1,16 +1,16 @@
 def explainPrams(st):
     st.markdown("## descriptions")
-    st.markdown("### 1.Overview")
+    st.markdown("### 1. Overview")
     st.markdown(
         """
-- **VectorDBBench** is an open-source benchmarking tool designed specifically for vector databases. Its main features include:
+- **VectorDBBench(VDBBench)** is an open-source benchmarking tool designed specifically for vector databases. Its main features include:
     - (1) An easy-to-use **web UI** for configuration of tests and visual analysis of results.
     - (2) A comprehensive set of **standards for testing and metric collection**.
     - (3) Support for **various scenarios**, including additional support for **Filter** and **Streaming** based on standard tests.
-- VectorDBBench embraces open-source and welcome contributions of code and test result submissions. The testing process and extended scenarios of VectorDBBench, as well as the intention behind our design will be introduced as follows.
+- VDBBench embraces open-source and welcome contributions of code and test result submissions. The testing process and extended scenarios of VDBBench, as well as the intention behind our design will be introduced as follows.
 """
     )
-    st.markdown("### 2.Dataset")
+    st.markdown("### 2. Dataset")
     st.markdown(
         """
 - We provide two embedding datasets:
@@ -19,7 +19,7 @@ def explainPrams(st):
     - (3)*OpenAI 1536dim*, generated using the **OpenAI** model based on the [C4 corpus](https://huggingface.co/datasets/legacy-datasets/c4).
 """
     )
-    st.markdown("### 3.Standard Test")
+    st.markdown("### 3. Standard Test")
     st.markdown(
         """
 The test is actually divided into 3 sub-processes
@@ -42,19 +42,19 @@ The test is actually divided into 3 sub-processes
 """,
         unsafe_allow_html=True,
     )
-    st.markdown("### 4.Filter Search Test")
+    st.markdown("### 4. Filter Search Test")
     st.markdown(
         """
 - Compared to the Standard Test, the **Filter Search** introduces additional scalar constraints (e.g. **color == red**) during the Search Test. Different **filter_ratios** present varying levels of challenge to the VectorDB's search performance.
 - We provide an additional **string column** containing 10 labels with different distribution ratios (50%,20%,10%,5%,2%,1%,0.5%,0.2%,0.1%). For each label, we conduct both a **Serial Test** and a **Concurrency Test** to observe the VectorDB's performance in terms of **QPS, latency, and recall** under different filtering conditions.
 """
     )
-    st.markdown("### 5.Streaming Search Test")
+    st.markdown("### 5. Streaming Search Test")
     st.markdown(
         """
 Different from Standard's load and search separation, Streaming Search Test primarily focuses on **search performance during the insertion process**.
 Different **base dataset sizes** and varying **insertion rates** set distinct challenges to the VectorDB's search capabilities.
-VectorDBBench will send insert requests at a **fixed rate**, maintaining consistent insertion pressure. The search test consists of three steps as follows:
+VDBBench will send insert requests at a **fixed rate**, maintaining consistent insertion pressure. The search test consists of three steps as follows:
 - 1.**Streaming Search**
     - Users can configure **multiple search stages**. When the inserted data volume reaches a specified stage, a **Serial Test** and a **Concurrent Test** will be conducted, recording qps, latency, and recall performance.
 - 2.**Streaming Final Search**

vectordb_bench/frontend/components/welcome/welcomePrams.py CHANGED Viewed

@@ -21,7 +21,7 @@ def get_image_as_base64(image_path):
 def welcomePrams(st):
-    st.title("Welcome to VectorDB Benchmark!")
+    st.title("Welcome to VDBBench!")
     options = [
         {
             "title": "Standard Test Results",

vectordb_bench/frontend/config/dbCaseConfigs.py CHANGED Viewed

@@ -520,7 +520,7 @@ CaseConfigParamInput_M = CaseConfigInput(
     inputConfig={
         "min": 4,
         "max": 64,
-        "value": 30,
+        "value": 16,
     },
     isDisplayed=lambda config: config.get(CaseConfigParamType.IndexType, None)
     in [
@@ -550,7 +550,7 @@ CaseConfigParamInput_EFConstruction_Milvus = CaseConfigInput(
     inputConfig={
         "min": 8,
         "max": 512,
-        "value": 360,
+        "value": 256,
     },
     isDisplayed=lambda config: config[CaseConfigParamType.IndexType]
     in [
@@ -1441,7 +1441,7 @@ CaseConfigParamInput_Milvus_use_partition_key = CaseConfigInput(
     label=CaseConfigParamType.use_partition_key,
     inputType=InputType.Option,
     inputHelp="whether to use partition_key for label-filter cases. only works in label-filter cases",
-    inputConfig={"options": [True, False]},
+    inputConfig={"options": [False, True]},
 )

vectordb_bench/frontend/config/styles.py CHANGED Viewed

@@ -33,8 +33,8 @@ MAX_AUTO_REFRESH_COUNT = 999999
 MAX_AUTO_REFRESH_INTERVAL = 5000  # 5s
 PAGE_TITLE = "VectorDB Benchmark"
-FAVICON = "https://assets.zilliz.com/favicon_f7f922fe27.png"
-HEADER_ICON = "https://assets.zilliz.com/vdb_benchmark_db790b5387.png"
+FAVICON = "https://assets.zilliz.com/VDB_Bench_icon_d3276bedc4.png"
+HEADER_ICON = "https://assets.zilliz.com/VDB_Bench_text_icon_6c5f52a458.png"
 # RedisCloud icon: https://assets.zilliz.com/Redis_Cloud_74b8bfef39.png
 # Elasticsearch icon: https://assets.zilliz.com/elasticsearch_beffeadc29.png

vectordb_bench/frontend/pages/results.py CHANGED Viewed

@@ -30,10 +30,12 @@ def main():
     st.title("Vector Database Benchmark")
     st.caption(
-        "Except for zillizcloud-v2024.1, which was tested in _January 2024_, all other tests were completed before _August 2023_."
+        "Choose your desired test results to display from the sidebar. "
+        "For your reference, we've included two standard benchmarks tested by our team. "
+        "Note that `standard_2025` was tested in 2025; the others in 2023. "
+        "Unless explicitly labeled as distributed multi-node, test with single-node mode by default."
     )
-    st.caption("All tested milvus are in _standalone_ mode.")
+    st.caption("We welcome community contributions for better results, parameter configurations, and optimizations.")
     # results selector and filter
     resultSelectorContainer = st.sidebar.container()
     shownData, failedTasks, showCaseNames = getshownData(resultSelectorContainer, allResults)

vectordb_bench/interface.py CHANGED Viewed

@@ -95,7 +95,8 @@ class BenchMarkRunner:
         return self._run_async(send_conn)
-    def get_results(self, result_dir: pathlib.Path | None = None) -> list[TestResult]:
+    @staticmethod
+    def get_results(result_dir: pathlib.Path | None = None) -> list[TestResult]:
         """results of all runs, each TestResult represents one run."""
         target_dir = result_dir if result_dir else config.RESULTS_LOCAL_DIR
         return ResultCollector.collect(target_dir)

vectordb_bench/results/getLeaderboardDataV2.py ADDED Viewed

@@ -0,0 +1,59 @@
+import json
+import logging
+from vectordb_bench.backend.cases import CaseType
+from vectordb_bench.backend.clients import DB
+from vectordb_bench.models import CaseResult
+from vectordb_bench import config
+logging.basicConfig(level=logging.DEBUG, format="%(asctime)s - %(levelname)s - %(message)s")
+from vectordb_bench.interface import BenchMarkRunner
+def get_standard_2025_results() -> list[CaseResult]:
+    all_results = BenchMarkRunner.get_results()
+    standard_2025_case_results = []
+    for result in all_results:
+        if result.task_label == "standard_2025":
+            standard_2025_case_results += result.results
+    return standard_2025_case_results
+def save_to_json(data: list[dict], file_name: str):
+    with open(file_name, "w") as f:
+        json.dump(data, f, indent=4)
+def main():
+    standard_2025_case_results = get_standard_2025_results()
+    data = []
+    for case_result in standard_2025_case_results:
+        db = case_result.task_config.db
+        label = case_result.task_config.db_config.db_label
+        metrics = case_result.metrics
+        qps = metrics.qps
+        latency = metrics.serial_latency_p99
+        recall = metrics.recall
+        case = case_result.task_config.case_config.case
+        filter_ratio = case.filters.filter_rate
+        dataset = case.dataset.data.full_name
+        if case.case_id != CaseType.StreamingPerformanceCase:
+            data.append(
+                {
+                    "dataset": dataset,
+                    "db": db.value,
+                    "label": label,
+                    "db_name": f"{db.value}-{label}",
+                    "qps": round(qps, 4),
+                    "latency": round(latency, 4),
+                    "recall": round(recall, 4),
+                    "filter_ratio": round(filter_ratio, 2),
+                }
+            )
+    save_to_json(data, config.RESULTS_LOCAL_DIR / "leaderboard_v2.json")
+if __name__ == "__main__":
+    main()

vectordb-bench 1.0.0__py3-none-any.whl → 1.0.1__py3-none-any.whl

vectordb-bench 1.0.0py3-none-any.whl → 1.0.1py3-none-any.whl