vectordb-bench 0.0.29__py3-none-any.whl → 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (90) hide show
  1. vectordb_bench/__init__.py +14 -27
  2. vectordb_bench/backend/assembler.py +19 -6
  3. vectordb_bench/backend/cases.py +186 -23
  4. vectordb_bench/backend/clients/__init__.py +32 -0
  5. vectordb_bench/backend/clients/api.py +22 -1
  6. vectordb_bench/backend/clients/aws_opensearch/aws_opensearch.py +249 -43
  7. vectordb_bench/backend/clients/aws_opensearch/cli.py +51 -21
  8. vectordb_bench/backend/clients/aws_opensearch/config.py +58 -16
  9. vectordb_bench/backend/clients/chroma/chroma.py +6 -2
  10. vectordb_bench/backend/clients/elastic_cloud/config.py +19 -1
  11. vectordb_bench/backend/clients/elastic_cloud/elastic_cloud.py +133 -45
  12. vectordb_bench/backend/clients/lancedb/cli.py +62 -8
  13. vectordb_bench/backend/clients/lancedb/config.py +14 -1
  14. vectordb_bench/backend/clients/lancedb/lancedb.py +21 -9
  15. vectordb_bench/backend/clients/memorydb/memorydb.py +2 -2
  16. vectordb_bench/backend/clients/milvus/cli.py +30 -9
  17. vectordb_bench/backend/clients/milvus/config.py +3 -0
  18. vectordb_bench/backend/clients/milvus/milvus.py +81 -23
  19. vectordb_bench/backend/clients/oceanbase/cli.py +100 -0
  20. vectordb_bench/backend/clients/oceanbase/config.py +125 -0
  21. vectordb_bench/backend/clients/oceanbase/oceanbase.py +215 -0
  22. vectordb_bench/backend/clients/pinecone/pinecone.py +39 -25
  23. vectordb_bench/backend/clients/qdrant_cloud/config.py +59 -3
  24. vectordb_bench/backend/clients/qdrant_cloud/qdrant_cloud.py +100 -33
  25. vectordb_bench/backend/clients/qdrant_local/cli.py +60 -0
  26. vectordb_bench/backend/clients/qdrant_local/config.py +47 -0
  27. vectordb_bench/backend/clients/qdrant_local/qdrant_local.py +232 -0
  28. vectordb_bench/backend/clients/weaviate_cloud/cli.py +29 -3
  29. vectordb_bench/backend/clients/weaviate_cloud/config.py +2 -0
  30. vectordb_bench/backend/clients/weaviate_cloud/weaviate_cloud.py +5 -0
  31. vectordb_bench/backend/dataset.py +143 -27
  32. vectordb_bench/backend/filter.py +76 -0
  33. vectordb_bench/backend/runner/__init__.py +3 -3
  34. vectordb_bench/backend/runner/mp_runner.py +52 -39
  35. vectordb_bench/backend/runner/rate_runner.py +68 -52
  36. vectordb_bench/backend/runner/read_write_runner.py +125 -68
  37. vectordb_bench/backend/runner/serial_runner.py +56 -23
  38. vectordb_bench/backend/task_runner.py +48 -20
  39. vectordb_bench/cli/batch_cli.py +121 -0
  40. vectordb_bench/cli/cli.py +59 -1
  41. vectordb_bench/cli/vectordbbench.py +7 -0
  42. vectordb_bench/config-files/batch_sample_config.yml +17 -0
  43. vectordb_bench/frontend/components/check_results/data.py +16 -11
  44. vectordb_bench/frontend/components/check_results/filters.py +53 -25
  45. vectordb_bench/frontend/components/check_results/headerIcon.py +16 -13
  46. vectordb_bench/frontend/components/check_results/nav.py +20 -0
  47. vectordb_bench/frontend/components/custom/displayCustomCase.py +43 -8
  48. vectordb_bench/frontend/components/custom/displaypPrams.py +10 -5
  49. vectordb_bench/frontend/components/custom/getCustomConfig.py +10 -0
  50. vectordb_bench/frontend/components/label_filter/charts.py +60 -0
  51. vectordb_bench/frontend/components/run_test/caseSelector.py +48 -52
  52. vectordb_bench/frontend/components/run_test/dbSelector.py +9 -5
  53. vectordb_bench/frontend/components/run_test/inputWidget.py +48 -0
  54. vectordb_bench/frontend/components/run_test/submitTask.py +3 -1
  55. vectordb_bench/frontend/components/streaming/charts.py +253 -0
  56. vectordb_bench/frontend/components/streaming/data.py +62 -0
  57. vectordb_bench/frontend/components/tables/data.py +1 -1
  58. vectordb_bench/frontend/components/welcome/explainPrams.py +66 -0
  59. vectordb_bench/frontend/components/welcome/pagestyle.py +106 -0
  60. vectordb_bench/frontend/components/welcome/welcomePrams.py +147 -0
  61. vectordb_bench/frontend/config/dbCaseConfigs.py +420 -41
  62. vectordb_bench/frontend/config/styles.py +32 -2
  63. vectordb_bench/frontend/pages/concurrent.py +5 -1
  64. vectordb_bench/frontend/pages/custom.py +4 -0
  65. vectordb_bench/frontend/pages/label_filter.py +56 -0
  66. vectordb_bench/frontend/pages/quries_per_dollar.py +5 -1
  67. vectordb_bench/frontend/pages/results.py +60 -0
  68. vectordb_bench/frontend/pages/run_test.py +3 -3
  69. vectordb_bench/frontend/pages/streaming.py +135 -0
  70. vectordb_bench/frontend/pages/tables.py +4 -0
  71. vectordb_bench/frontend/vdb_benchmark.py +16 -41
  72. vectordb_bench/interface.py +6 -2
  73. vectordb_bench/metric.py +15 -1
  74. vectordb_bench/models.py +38 -11
  75. vectordb_bench/results/ElasticCloud/result_20250318_standard_elasticcloud.json +5890 -0
  76. vectordb_bench/results/Milvus/result_20250509_standard_milvus.json +6138 -0
  77. vectordb_bench/results/OpenSearch/result_20250224_standard_opensearch.json +7319 -0
  78. vectordb_bench/results/Pinecone/result_20250124_standard_pinecone.json +2365 -0
  79. vectordb_bench/results/QdrantCloud/result_20250602_standard_qdrantcloud.json +3556 -0
  80. vectordb_bench/results/ZillizCloud/result_20250613_standard_zillizcloud.json +6290 -0
  81. vectordb_bench/results/dbPrices.json +12 -4
  82. {vectordb_bench-0.0.29.dist-info → vectordb_bench-1.0.0.dist-info}/METADATA +131 -32
  83. {vectordb_bench-0.0.29.dist-info → vectordb_bench-1.0.0.dist-info}/RECORD +87 -65
  84. {vectordb_bench-0.0.29.dist-info → vectordb_bench-1.0.0.dist-info}/WHEEL +1 -1
  85. vectordb_bench/results/ZillizCloud/result_20230727_standard_zillizcloud.json +0 -791
  86. vectordb_bench/results/ZillizCloud/result_20230808_standard_zillizcloud.json +0 -679
  87. vectordb_bench/results/ZillizCloud/result_20240105_standard_202401_zillizcloud.json +0 -1352
  88. {vectordb_bench-0.0.29.dist-info → vectordb_bench-1.0.0.dist-info}/entry_points.txt +0 -0
  89. {vectordb_bench-0.0.29.dist-info → vectordb_bench-1.0.0.dist-info}/licenses/LICENSE +0 -0
  90. {vectordb_bench-0.0.29.dist-info → vectordb_bench-1.0.0.dist-info}/top_level.txt +0 -0
vectordb_bench/metric.py CHANGED
@@ -13,8 +13,12 @@ class Metric:
13
13
  # for load cases
14
14
  max_load_count: int = 0
15
15
 
16
+ # for both performace and streaming cases
17
+ insert_duration: float = 0.0
18
+ optimize_duration: float = 0.0
19
+ load_duration: float = 0.0 # insert + optimize
20
+
16
21
  # for performance cases
17
- load_duration: float = 0.0 # duration to load all dataset into DB
18
22
  qps: float = 0.0
19
23
  serial_latency_p99: float = 0.0
20
24
  recall: float = 0.0
@@ -24,6 +28,16 @@ class Metric:
24
28
  conc_latency_p99_list: list[float] = field(default_factory=list)
25
29
  conc_latency_avg_list: list[float] = field(default_factory=list)
26
30
 
31
+ # for streaming cases
32
+ st_ideal_insert_duration: int = 0
33
+ st_search_stage_list: list[int] = field(default_factory=list)
34
+ st_search_time_list: list[float] = field(default_factory=list)
35
+ st_max_qps_list_list: list[float] = field(default_factory=list)
36
+ st_recall_list: list[float] = field(default_factory=list)
37
+ st_ndcg_list: list[float] = field(default_factory=list)
38
+ st_serial_latency_p99_list: list[float] = field(default_factory=list)
39
+ st_conc_failed_rate_list: list[float] = field(default_factory=list)
40
+
27
41
 
28
42
  QURIES_PER_DOLLAR_METRIC = "QP$ (Quries per Dollar)"
29
43
  LOAD_DURATION_METRIC = "load_duration"
vectordb_bench/models.py CHANGED
@@ -1,13 +1,13 @@
1
1
  import logging
2
2
  import pathlib
3
3
  from datetime import date, datetime
4
- from enum import Enum, StrEnum, auto
4
+ from enum import Enum, StrEnum
5
5
  from typing import Self
6
6
 
7
7
  import ujson
8
8
 
9
9
  from . import config
10
- from .backend.cases import CaseType
10
+ from .backend.cases import Case, CaseType
11
11
  from .backend.clients import (
12
12
  DB,
13
13
  DBCaseConfig,
@@ -105,10 +105,27 @@ class CaseConfigParamType(Enum):
105
105
  num_partitions = "num_partitions"
106
106
  num_sub_vectors = "num_sub_vectors"
107
107
  sample_rate = "sample_rate"
108
-
109
- # mongodb params
108
+ index_thread_qty_during_force_merge = "index_thread_qty_during_force_merge"
109
+ number_of_indexing_clients = "number_of_indexing_clients"
110
+ number_of_shards = "number_of_shards"
111
+ number_of_replicas = "number_of_replicas"
112
+ index_thread_qty = "index_thread_qty"
113
+ engine_name = "engine_name"
114
+ metric_type_name = "metric_type_name"
110
115
  mongodb_quantization_type = "quantization"
111
116
  mongodb_num_candidates_ratio = "num_candidates_ratio"
117
+ use_partition_key = "use_partition_key"
118
+ refresh_interval = "refresh_interval"
119
+ use_rescore = "use_rescore"
120
+ oversample_ratio = "oversample_ratio"
121
+ use_routing = "use_routing"
122
+
123
+ dataset_with_size_type = "dataset_with_size_type"
124
+ insert_rate = "insert_rate"
125
+ search_stages = "search_stages"
126
+ concurrencies = "concurrencies"
127
+ optimize_after_write = "optimize_after_write"
128
+ read_dur_after_write = "read_dur_after_write"
112
129
 
113
130
 
114
131
  class CustomizedCase(BaseModel):
@@ -144,14 +161,22 @@ class CaseConfig(BaseModel):
144
161
  def __hash__(self) -> int:
145
162
  return hash(self.json())
146
163
 
164
+ @property
165
+ def case(self) -> Case:
166
+ return self.case_id.case_cls(self.custom_case)
167
+
168
+ @property
169
+ def case_name(self) -> str:
170
+ return self.case.name
171
+
147
172
 
148
173
  class TaskStage(StrEnum):
149
174
  """Enumerations of various stages of the task"""
150
175
 
151
- DROP_OLD = auto()
152
- LOAD = auto()
153
- SEARCH_SERIAL = auto()
154
- SEARCH_CONCURRENT = auto()
176
+ DROP_OLD = "drop_old"
177
+ LOAD = "load"
178
+ SEARCH_SERIAL = "search_serial"
179
+ SEARCH_CONCURRENT = "search_concurrent"
155
180
 
156
181
  def __repr__(self) -> str:
157
182
  return str.__repr__(self.value)
@@ -292,12 +317,14 @@ class TestResult(BaseModel):
292
317
  key=lambda x: (
293
318
  x.task_config.db.name,
294
319
  x.task_config.db_config.db_label,
295
- x.task_config.case_config.case_id.name,
320
+ x.task_config.case_config.case_name,
296
321
  ),
297
322
  reverse=True,
298
323
  )
299
324
 
300
325
  filtered_results = [r for r in sorted_results if not filter_list or r.task_config.db not in filter_list]
326
+ if len(filtered_results) == 0:
327
+ return
301
328
 
302
329
  def append_return(x: any, y: any):
303
330
  x.append(y)
@@ -305,7 +332,7 @@ class TestResult(BaseModel):
305
332
 
306
333
  max_db = max(map(len, [f.task_config.db.name for f in filtered_results]))
307
334
  max_db_labels = max(map(len, [f.task_config.db_config.db_label for f in filtered_results])) + 3
308
- max_case = max(map(len, [f.task_config.case_config.case_id.name for f in filtered_results]))
335
+ max_case = max(map(len, [f.task_config.case_config.case_name for f in filtered_results]))
309
336
  max_load_dur = max(map(len, [str(f.metrics.load_duration) for f in filtered_results])) + 3
310
337
  max_qps = max(map(len, [str(f.metrics.qps) for f in filtered_results])) + 3
311
338
  max_recall = max(map(len, [str(f.metrics.recall) for f in filtered_results])) + 3
@@ -359,7 +386,7 @@ class TestResult(BaseModel):
359
386
  % (
360
387
  f.task_config.db.name,
361
388
  f.task_config.db_config.db_label,
362
- f.task_config.case_config.case_id.name,
389
+ f.task_config.case_config.case_name,
363
390
  self.task_label,
364
391
  f.metrics.load_duration,
365
392
  f.metrics.qps,