PyPI - vectordb-bench - Versions diffs - 0.0.20__py3-none-any.whl → 0.0.22__py3-none-any.whl - Mend

vectordb-bench 0.0.20py3-none-any.whl → 0.0.22py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (43) hide show

vectordb_bench/backend/runner/serial_runner.py CHANGED Viewed

@@ -40,9 +40,7 @@ class SerialInsertRunner:
     def task(self) -> int:
         count = 0
         with self.db.init():
-            log.info(
-                f"({mp.current_process().name:16}) Start inserting embeddings in batch {config.NUM_PER_BATCH}",
-            )
+            log.info(f"({mp.current_process().name:16}) Start inserting embeddings in batch {config.NUM_PER_BATCH}")
             start = time.perf_counter()
             for data_df in self.dataset:
                 all_metadata = data_df["id"].tolist()
@@ -66,13 +64,11 @@ class SerialInsertRunner:
                 assert insert_count == len(all_metadata)
                 count += insert_count
                 if count % 100_000 == 0:
-                    log.info(
-                        f"({mp.current_process().name:16}) Loaded {count} embeddings into VectorDB",
-                    )
+                    log.info(f"({mp.current_process().name:16}) Loaded {count} embeddings into VectorDB")
             log.info(
-                f"({mp.current_process().name:16}) Finish loading all dataset into VectorDB, ",
-                f"dur={time.perf_counter()-start}",
+                f"({mp.current_process().name:16}) Finish loading all dataset into VectorDB, "
+                f"dur={time.perf_counter() - start}"
             )
             return count
@@ -83,8 +79,8 @@ class SerialInsertRunner:
             num_batches = math.ceil(len(all_embeddings) / NUM_PER_BATCH)
             log.info(
-                f"({mp.current_process().name:16}) Start inserting {len(all_embeddings)} ",
-                f"embeddings in batch {NUM_PER_BATCH}",
+                f"({mp.current_process().name:16}) Start inserting {len(all_embeddings)} "
+                f"embeddings in batch {NUM_PER_BATCH}"
             )
             count = 0
             for batch_id in range(num_batches):
@@ -94,8 +90,8 @@ class SerialInsertRunner:
                 embeddings = all_embeddings[batch_id * NUM_PER_BATCH : (batch_id + 1) * NUM_PER_BATCH]
                 log.debug(
-                    f"({mp.current_process().name:16}) batch [{batch_id:3}/{num_batches}], ",
-                    f"Start inserting {len(metadata)} embeddings",
+                    f"({mp.current_process().name:16}) batch [{batch_id:3}/{num_batches}], "
+                    f"Start inserting {len(metadata)} embeddings"
                 )
                 while retry_count < LOAD_MAX_TRY_COUNT:
                     insert_count, error = self.db.insert_embeddings(
@@ -113,15 +109,15 @@ class SerialInsertRunner:
                     else:
                         break
                 log.debug(
-                    f"({mp.current_process().name:16}) batch [{batch_id:3}/{num_batches}], ",
-                    f"Finish inserting {len(metadata)} embeddings",
+                    f"({mp.current_process().name:16}) batch [{batch_id:3}/{num_batches}], "
+                    f"Finish inserting {len(metadata)} embeddings"
                 )
                 assert already_insert_count == len(metadata)
                 count += already_insert_count
             log.info(
-                f"({mp.current_process().name:16}) Finish inserting {len(all_embeddings)} embeddings in ",
-                f"batch {NUM_PER_BATCH}",
+                f"({mp.current_process().name:16}) Finish inserting {len(all_embeddings)} embeddings in "
+                f"batch {NUM_PER_BATCH}"
             )
         return count
@@ -160,8 +156,6 @@ class SerialInsertRunner:
         start_time = time.perf_counter()
         max_load_count, times = 0, 0
         try:
-            with self.db.init():
-                self.db.ready_to_load()
             while time.perf_counter() - start_time < self.timeout:
                 count = self.endless_insert_data(
                     all_embeddings,
@@ -171,13 +165,13 @@ class SerialInsertRunner:
                 max_load_count += count
                 times += 1
                 log.info(
-                    f"Loaded {times} entire dataset, current max load counts={utils.numerize(max_load_count)}, ",
-                    f"{max_load_count}",
+                    f"Loaded {times} entire dataset, current max load counts={utils.numerize(max_load_count)}, "
+                    f"{max_load_count}"
                 )
         except Exception as e:
             log.info(
-                f"Capacity case load reach limit, insertion counts={utils.numerize(max_load_count)}, ",
-                f"{max_load_count}, err={e}",
+                f"Capacity case load reach limit, insertion counts={utils.numerize(max_load_count)}, "
+                f"{max_load_count}, err={e}"
             )
             traceback.print_exc()
             return max_load_count
@@ -209,9 +203,7 @@ class SerialSearchRunner:
         self.ground_truth = ground_truth
     def search(self, args: tuple[list, pd.DataFrame]) -> tuple[float, float, float]:
-        log.info(
-            f"{mp.current_process().name:14} start search the entire test_data to get recall and latency",
-        )
+        log.info(f"{mp.current_process().name:14} start search the entire test_data to get recall and latency")
         with self.db.init():
             test_data, ground_truth = args
             ideal_dcg = get_ideal_dcg(self.k)
@@ -242,8 +234,8 @@ class SerialSearchRunner:
                 if len(latencies) % 100 == 0:
                     log.debug(
-                        f"({mp.current_process().name:14}) search_count={len(latencies):3}, ",
-                        f"latest_latency={latencies[-1]}, latest recall={recalls[-1]}",
+                        f"({mp.current_process().name:14}) search_count={len(latencies):3}, "
+                        f"latest_latency={latencies[-1]}, latest recall={recalls[-1]}"
                     )
         avg_latency = round(np.mean(latencies), 4)
@@ -258,7 +250,7 @@ class SerialSearchRunner:
             f"avg_recall={avg_recall}, "
             f"avg_ndcg={avg_ndcg},"
             f"avg_latency={avg_latency}, "
-            f"p99={p99}",
+            f"p99={p99}"
         )
         return (avg_recall, avg_ndcg, p99)

vectordb_bench/backend/task_runner.py CHANGED Viewed

@@ -98,9 +98,7 @@ class CaseRunner(BaseModel):
             self.init_db(drop_old)
             self.ca.dataset.prepare(self.dataset_source, filters=self.ca.filter_rate)
         except ModuleNotFoundError as e:
-            log.warning(
-                f"pre run case error: please install client for db: {self.config.db}, error={e}",
-            )
+            log.warning(f"pre run case error: please install client for db: {self.config.db}, error={e}")
             raise e from None
     def run(self, drop_old: bool = True) -> Metric:
@@ -136,9 +134,7 @@ class CaseRunner(BaseModel):
             log.warning(f"Failed to run capacity case, reason = {e}")
             raise e from None
         else:
-            log.info(
-                f"Capacity case loading dataset reaches VectorDB's limit: max capacity = {count}",
-            )
+            log.info(f"Capacity case loading dataset reaches VectorDB's limit: max capacity = {count}")
             return Metric(max_load_count=count)
     def _run_perf_case(self, drop_old: bool = True) -> Metric:
@@ -147,22 +143,6 @@ class CaseRunner(BaseModel):
         Returns:
             Metric: load_duration, recall, serial_latency_p99, and, qps
         """
-        """
-                    if drop_old:
-                _, load_dur = self._load_train_data()
-                build_dur = self._optimize()
-                m.load_duration = round(load_dur+build_dur, 4)
-                log.info(
-                    f"Finish loading the entire dataset into VectorDB,"
-                    f" insert_duration={load_dur}, optimize_duration={build_dur}"
-                    f" load_duration(insert + optimize) = {m.load_duration}"
-                )
-            self._init_search_runner()
-            m.qps, m.conc_num_list, m.conc_qps_list, m.conc_latency_p99_list = self._conc_search()
-            m.recall, m.serial_latency_p99 = self._serial_search()
-        """
         log.info("Start performance case")
         try:
@@ -175,7 +155,7 @@ class CaseRunner(BaseModel):
                     log.info(
                         f"Finish loading the entire dataset into VectorDB,"
                         f" insert_duration={load_dur}, optimize_duration={build_dur}"
-                        f" load_duration(insert + optimize) = {m.load_duration}",
+                        f" load_duration(insert + optimize) = {m.load_duration}"
                     )
                 else:
                     log.info("Data loading skipped")
@@ -254,13 +234,13 @@ class CaseRunner(BaseModel):
             self.stop()
     @utils.time_it
-    def _task(self) -> None:
+    def _optimize_task(self) -> None:
         with self.db.init():
-            self.db.optimize_with_size(data_size=self.ca.dataset.data.size)
+            self.db.optimize(data_size=self.ca.dataset.data.size)
     def _optimize(self) -> float:
         with concurrent.futures.ProcessPoolExecutor(max_workers=1) as executor:
-            future = executor.submit(self._task)
+            future = executor.submit(self._optimize_task)
             try:
                 return future.result(timeout=self.ca.optimize_timeout)[1]
             except TimeoutError as e:

vectordb_bench/frontend/components/custom/displaypPrams.py CHANGED Viewed

@@ -3,7 +3,7 @@ def displayParams(st):
         """
 - `Folder Path` - The path to the folder containing all the files. Please ensure that all files in the folder are in the `Parquet` format.
   - Vectors data files: The file must be named `train.parquet` and should have two columns: `id` as an incrementing `int` and `emb` as an array of `float32`.
-  - Query test vectors: The file must be named `test.parquet` and should have two columns: `id` as an incrementing `int` and `emb` as an array of `float32`.
+  - Query test vectors: The file must be named `test.parquet` and should have two columns: `id` as an incrementing `int` and `emb` as an array of `float32`.
   - Ground truth file: The file must be named `neighbors.parquet` and should have two columns: `id` corresponding to query vectors and `neighbors_id` as an array of `int`.
 - `Train File Count` - If the vector file is too large, you can consider splitting it into multiple files. The naming format for the split files should be `train-[index]-of-[file_count].parquet`. For example, `train-01-of-10.parquet` represents the second file (0-indexed) among 10 split files.
@@ -11,3 +11,14 @@ def displayParams(st):
 - `Use Shuffled Data` - If you check this option, the vector data files need to be modified. VectorDBBench will load the data labeled with `shuffle`. For example, use `shuffle_train.parquet` instead of `train.parquet` and `shuffle_train-04-of-10.parquet` instead of `train-04-of-10.parquet`. The `id` column in the shuffled data can be in any order.
 """
     )
+    st.caption(
+        """We recommend limiting the number of test query vectors, like 1,000.""",
+        help="""
+When conducting concurrent query tests, Vdbbench creates a large number of processes.
+To minimize additional communication overhead during testing,
+we prepare a complete set of test queries for each process, allowing them to run independently.\n
+However, this means that as the number of concurrent processes increases,
+the number of copied query vectors also increases significantly,
+which can place substantial pressure on memory resources.
+""",
+    )

vectordb_bench/frontend/components/run_test/submitTask.py CHANGED Viewed

@@ -1,6 +1,8 @@
 from datetime import datetime
+from vectordb_bench import config
 from vectordb_bench.frontend.config import styles
 from vectordb_bench.interface import benchmark_runner
+from vectordb_bench.models import TaskConfig
 def submitTask(st, tasks, isAllValid):
@@ -47,16 +49,31 @@ def advancedSettings(st):
     k = container[0].number_input("k", min_value=1, value=100, label_visibility="collapsed")
     container[1].caption("K value for number of nearest neighbors to search")
-    return index_already_exists, use_aliyun, k
+    container = st.columns([1, 2])
+    defaultconcurrentInput = ",".join(map(str, config.NUM_CONCURRENCY))
+    concurrentInput = container[0].text_input(
+        "Concurrent Input", value=defaultconcurrentInput, label_visibility="collapsed"
+    )
+    container[1].caption("num of concurrencies for search tests to get max-qps")
+    return index_already_exists, use_aliyun, k, concurrentInput
-def controlPanel(st, tasks, taskLabel, isAllValid):
-    index_already_exists, use_aliyun, k = advancedSettings(st)
+def controlPanel(st, tasks: list[TaskConfig], taskLabel, isAllValid):
+    index_already_exists, use_aliyun, k, concurrentInput = advancedSettings(st)
     def runHandler():
         benchmark_runner.set_drop_old(not index_already_exists)
+        try:
+            concurrentInput_list = [int(item.strip()) for item in concurrentInput.split(",")]
+        except ValueError:
+            st.write("please input correct number")
+            return None
         for task in tasks:
             task.case_config.k = k
+            task.case_config.concurrency_search_config.num_concurrency = concurrentInput_list
         benchmark_runner.set_download_address(use_aliyun)
         benchmark_runner.run(tasks, taskLabel)

vectordb_bench/frontend/config/dbCaseConfigs.py CHANGED Viewed

@@ -1041,6 +1041,26 @@ CaseConfigParamInput_NumCandidates_AliES = CaseConfigInput(
 )
+CaseConfigParamInput_MongoDBQuantizationType = CaseConfigInput(
+    label=CaseConfigParamType.mongodb_quantization_type,
+    inputType=InputType.Option,
+    inputConfig={
+        "options": ["none", "scalar", "binary"],
+    },
+)
+CaseConfigParamInput_MongoDBNumCandidatesRatio = CaseConfigInput(
+    label=CaseConfigParamType.mongodb_num_candidates_ratio,
+    inputType=InputType.Number,
+    inputConfig={
+        "min": 10,
+        "max": 20,
+        "value": 10,
+    },
+)
 MilvusLoadConfig = [
     CaseConfigParamInput_IndexType,
     CaseConfigParamInput_M,
@@ -1224,6 +1244,14 @@ AliyunElasticsearchPerformanceConfig = [
     CaseConfigParamInput_NumCandidates_AliES,
 ]
+MongoDBLoadingConfig = [
+    CaseConfigParamInput_MongoDBQuantizationType,
+]
+MongoDBPerformanceConfig = [
+    CaseConfigParamInput_MongoDBQuantizationType,
+    CaseConfigParamInput_MongoDBNumCandidatesRatio,
+]
 CASE_CONFIG_MAP = {
     DB.Milvus: {
         CaseLabel.Load: MilvusLoadConfig,
@@ -1272,4 +1300,8 @@ CASE_CONFIG_MAP = {
         CaseLabel.Load: AliyunOpensearchLoadingConfig,
         CaseLabel.Performance: AliyunOpenSearchPerformanceConfig,
     },
+    DB.MongoDB: {
+        CaseLabel.Load: MongoDBLoadingConfig,
+        CaseLabel.Performance: MongoDBPerformanceConfig,
+    },
 }

vectordb_bench/interface.py CHANGED Viewed

@@ -65,9 +65,7 @@ class BenchMarkRunner:
             log.warning("Empty tasks submitted")
             return False
-        log.debug(
-            f"tasks: {tasks}, task_label: {task_label}, dataset source: {self.dataset_source}",
-        )
+        log.debug(f"tasks: {tasks}, task_label: {task_label}, dataset source: {self.dataset_source}")
         # Generate run_id
         run_id = uuid.uuid4().hex
@@ -169,14 +167,13 @@ class BenchMarkRunner:
                 drop_old = TaskStage.DROP_OLD in runner.config.stages
                 if (latest_runner and runner == latest_runner) or not self.drop_old:
                     drop_old = False
+                num_cases = running_task.num_cases()
                 try:
-                    log.info(
-                        f"[{idx+1}/{running_task.num_cases()}] start case: {runner.display()}, drop_old={drop_old}",
-                    )
+                    log.info(f"[{idx+1}/{num_cases}] start case: {runner.display()}, drop_old={drop_old}")
                     case_res.metrics = runner.run(drop_old)
                     log.info(
-                        f"[{idx+1}/{running_task.num_cases()}] finish case: {runner.display()}, "
-                        f"result={case_res.metrics}, label={case_res.label}",
+                        f"[{idx+1}/{num_cases}] finish case: {runner.display()}, "
+                        f"result={case_res.metrics}, label={case_res.label}"
                     )
                     # cache the latest succeeded runner
@@ -189,16 +186,12 @@ class BenchMarkRunner:
                     if not drop_old:
                         case_res.metrics.load_duration = cached_load_duration if cached_load_duration else 0.0
                 except (LoadTimeoutError, PerformanceTimeoutError) as e:
-                    log.warning(
-                        f"[{idx+1}/{running_task.num_cases()}] case {runner.display()} failed to run, reason={e}",
-                    )
+                    log.warning(f"[{idx+1}/{num_cases}] case {runner.display()} failed to run, reason={e}")
                     case_res.label = ResultLabel.OUTOFRANGE
                     continue
                 except Exception as e:
-                    log.warning(
-                        f"[{idx+1}/{running_task.num_cases()}] case {runner.display()} failed to run, reason={e}",
-                    )
+                    log.warning(f"[{idx+1}/{num_cases}] case {runner.display()} failed to run, reason={e}")
                     traceback.print_exc()
                     case_res.label = ResultLabel.FAILED
                     continue
@@ -217,9 +210,7 @@ class BenchMarkRunner:
             send_conn.send((SIGNAL.SUCCESS, None))
             send_conn.close()
-            log.info(
-                f"Success to finish task: label={running_task.task_label}, run_id={running_task.run_id}",
-            )
+            log.info(f"Success to finish task: label={running_task.task_label}, run_id={running_task.run_id}")
         except Exception as e:
             err_msg = (
@@ -249,8 +240,8 @@ class BenchMarkRunner:
     def _run_async(self, conn: Connection) -> bool:
         log.info(
-            f"task submitted: id={self.running_task.run_id}, {self.running_task.task_label}, ",
-            f"case number: {len(self.running_task.case_runners)}",
+            f"task submitted: id={self.running_task.run_id}, {self.running_task.task_label}, "
+            f"case number: {len(self.running_task.case_runners)}"
         )
         global global_result_future
         executor = concurrent.futures.ProcessPoolExecutor(

vectordb_bench/log_util.py CHANGED Viewed

@@ -1,8 +1,13 @@
 import logging
 from logging import config
+from pathlib import Path
 def init(log_level: str):
+    # Create logs directory if it doesn't exist
+    log_dir = Path("logs")
+    log_dir.mkdir(exist_ok=True)
     log_config = {
         "version": 1,
         "disable_existing_loggers": False,
@@ -24,15 +29,23 @@ def init(log_level: str):
                 "class": "logging.StreamHandler",
                 "formatter": "default",
             },
+            "file": {
+                "class": "logging.handlers.RotatingFileHandler",
+                "formatter": "default",
+                "filename": "logs/vectordb_bench.log",
+                "maxBytes": 10485760,  # 10MB
+                "backupCount": 5,
+                "encoding": "utf8",
+            },
         },
         "loggers": {
             "vectordb_bench": {
-                "handlers": ["console"],
+                "handlers": ["console", "file"],
                 "level": log_level,
                 "propagate": False,
             },
             "no_color": {
-                "handlers": ["no_color_console"],
+                "handlers": ["no_color_console", "file"],
                 "level": log_level,
                 "propagate": False,
             },

vectordb_bench/models.py CHANGED Viewed

@@ -88,6 +88,10 @@ class CaseConfigParamType(Enum):
     numSearchThreads = "num_search_threads"
     maxNumPrefetchDatasets = "max_num_prefetch_datasets"
+    # mongodb params
+    mongodb_quantization_type = "quantization"
+    mongodb_num_candidates_ratio = "num_candidates_ratio"
 class CustomizedCase(BaseModel):
     pass

{vectordb_bench-0.0.20.dist-info → vectordb_bench-0.0.22.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.2
 Name: vectordb-bench
-Version: 0.0.20
+Version: 0.0.22
 Summary: VectorDBBench is not just an offering of benchmark results for mainstream vector databases and cloud services, it's your go-to tool for the ultimate performance and cost-effectiveness comparison. Designed with ease-of-use in mind, VectorDBBench is devised to help users, even non-professionals, reproduce results or test new systems, making the hunt for the optimal choice amongst a plethora of cloud services and open-source vector databases a breeze.
 Author-email: XuanYang-cn <xuan.yang@zilliz.com>
 Project-URL: repository, https://github.com/zilliztech/VectorDBBench
@@ -21,7 +21,7 @@ Requires-Dist: oss2
 Requires-Dist: psutil
 Requires-Dist: polars
 Requires-Dist: plotly
-Requires-Dist: environs
+Requires-Dist: environs<14.1.0
 Requires-Dist: pydantic<v2
 Requires-Dist: scikit-learn
 Requires-Dist: pymilvus
@@ -73,6 +73,8 @@ Requires-Dist: opensearch-py; extra == "opensearch"
 Provides-Extra: aliyun-opensearch
 Requires-Dist: alibabacloud_ha3engine_vector; extra == "aliyun-opensearch"
 Requires-Dist: alibabacloud_searchengine20211025; extra == "aliyun-opensearch"
+Provides-Extra: mongodb
+Requires-Dist: pymongo; extra == "mongodb"
 # VectorDBBench: A Benchmark Tool for VectorDB
@@ -89,6 +91,8 @@ Closely mimicking real-world production environments, we've set up diverse testi
 Prepare to delve into the world of VectorDBBench, and let it guide you in uncovering your perfect vector database match.
+VectorDBBench is sponsered by Zilliz，the leading opensource vectorDB company behind Milvus. Choose smarter with VectorDBBench- start your free test on [zilliz cloud](https://zilliz.com/) today!
 **Leaderboard:** https://zilliz.com/benchmark
 ## Quick Start
 ### Prerequirement
@@ -128,6 +132,7 @@ All the database client supported
 | chromadb                 | `pip install vectordb-bench[chromadb]`      |
 | awsopensearch            | `pip install vectordb-bench[opensearch]` |
 | aliyun_opensearch        | `pip install vectordb-bench[aliyun_opensearch]` |
+| mongodb                  | `pip install vectordb-bench[mongodb]`       |
 ### Run
@@ -228,6 +233,47 @@ Options:
                                   with-gt]
   --help                          Show this message and exit.
 ```
+### Run awsopensearch from command line
+```shell
+vectordbbench awsopensearch --db-label awsopensearch \
+--m 16 --ef-construction 256 \
+--host search-vector-db-prod-h4f6m4of6x7yp2rz7gdmots7w4.us-west-2.es.amazonaws.com --port 443 \
+--user vector --password '<password>' \
+--case-type Performance1536D5M --num-insert-workers 10  \
+--skip-load --num-concurrency 75
+```
+To list the options for awsopensearch, execute `vectordbbench awsopensearch --help`
+```text
+$ vectordbbench awsopensearch --help
+Usage: vectordbbench awsopensearch [OPTIONS]
+Options:
+  # Sharding and Replication
+  --number-of-shards INTEGER      Number of primary shards for the index
+  --number-of-replicas INTEGER    Number of replica copies for each primary
+                                  shard
+  # Indexing Performance
+  --index-thread-qty INTEGER      Thread count for native engine indexing
+  --index-thread-qty-during-force-merge INTEGER
+                                  Thread count during force merge operations
+  --number-of-indexing-clients INTEGER
+                                  Number of concurrent indexing clients
+  # Index Management
+  --number-of-segments INTEGER    Target number of segments after merging
+  --refresh-interval TEXT         How often to make new data available for
+                                  search
+  --force-merge-enabled BOOLEAN   Whether to perform force merge operation
+  --flush-threshold-size TEXT     Size threshold for flushing the transaction
+                                  log
+  # Memory Management
+  --cb-threshold TEXT             k-NN Memory circuit breaker threshold
+  --help                          Show this message and exit.```
 #### Using a configuration file.
 The vectordbbench command can optionally read some or all the options from a yaml formatted configuration file.
@@ -394,6 +440,13 @@ We have strict requirements for the data set format, please follow them.
 - `Folder Path` - The path to the folder containing all the files. Please ensure that all files in the folder are in the `Parquet` format.
   - Vectors data files: The file must be named `train.parquet` and should have two columns: `id` as an incrementing `int` and `emb` as an array of `float32`.
   - Query test vectors: The file must be named `test.parquet` and should have two columns: `id` as an incrementing `int` and `emb` as an array of `float32`.
+    - We recommend limiting the number of test query vectors, like 1,000.
+    When conducting concurrent query tests, Vdbbench creates a large number of processes.
+    To minimize additional communication overhead during testing,
+    we prepare a complete set of test queries for each process, allowing them to run independently.
+    However, this means that as the number of concurrent processes increases,
+    the number of copied query vectors also increases significantly,
+    which can place substantial pressure on memory resources.
   - Ground truth file: The file must be named `neighbors.parquet` and should have two columns: `id` corresponding to query vectors and `neighbors_id` as an array of `int`.
 - `Train File Count` - If the vector file is too large, you can consider splitting it into multiple files. The naming format for the split files should be `train-[index]-of-[file_count].parquet`. For example, `train-01-of-10.parquet` represents the second file (0-indexed) among 10 split files.

vectordb-bench 0.0.20__py3-none-any.whl → 0.0.22__py3-none-any.whl

vectordb-bench 0.0.20py3-none-any.whl → 0.0.22py3-none-any.whl