vectordb-bench 1.0.3__py3-none-any.whl → 1.0.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. vectordb_bench/backend/cases.py +45 -1
  2. vectordb_bench/backend/clients/__init__.py +32 -0
  3. vectordb_bench/backend/clients/aliyun_opensearch/aliyun_opensearch.py +34 -42
  4. vectordb_bench/backend/clients/aliyun_opensearch/config.py +0 -7
  5. vectordb_bench/backend/clients/milvus/cli.py +216 -0
  6. vectordb_bench/backend/clients/oss_opensearch/cli.py +155 -0
  7. vectordb_bench/backend/clients/oss_opensearch/config.py +157 -0
  8. vectordb_bench/backend/clients/oss_opensearch/oss_opensearch.py +582 -0
  9. vectordb_bench/backend/clients/oss_opensearch/run.py +166 -0
  10. vectordb_bench/backend/clients/s3_vectors/config.py +41 -0
  11. vectordb_bench/backend/clients/s3_vectors/s3_vectors.py +171 -0
  12. vectordb_bench/backend/clients/tidb/cli.py +0 -4
  13. vectordb_bench/backend/clients/tidb/config.py +22 -2
  14. vectordb_bench/backend/dataset.py +70 -0
  15. vectordb_bench/backend/filter.py +17 -0
  16. vectordb_bench/backend/runner/mp_runner.py +4 -0
  17. vectordb_bench/backend/runner/read_write_runner.py +10 -9
  18. vectordb_bench/backend/runner/serial_runner.py +23 -7
  19. vectordb_bench/backend/task_runner.py +5 -4
  20. vectordb_bench/cli/vectordbbench.py +2 -0
  21. vectordb_bench/fig/custom_case_run_test.png +0 -0
  22. vectordb_bench/fig/custom_dataset.png +0 -0
  23. vectordb_bench/fig/homepage/bar-chart.png +0 -0
  24. vectordb_bench/fig/homepage/concurrent.png +0 -0
  25. vectordb_bench/fig/homepage/custom.png +0 -0
  26. vectordb_bench/fig/homepage/label_filter.png +0 -0
  27. vectordb_bench/fig/homepage/qp$.png +0 -0
  28. vectordb_bench/fig/homepage/run_test.png +0 -0
  29. vectordb_bench/fig/homepage/streaming.png +0 -0
  30. vectordb_bench/fig/homepage/table.png +0 -0
  31. vectordb_bench/fig/run_test_select_case.png +0 -0
  32. vectordb_bench/fig/run_test_select_db.png +0 -0
  33. vectordb_bench/fig/run_test_submit.png +0 -0
  34. vectordb_bench/frontend/components/check_results/filters.py +1 -4
  35. vectordb_bench/frontend/components/check_results/nav.py +2 -1
  36. vectordb_bench/frontend/components/concurrent/charts.py +5 -0
  37. vectordb_bench/frontend/components/int_filter/charts.py +60 -0
  38. vectordb_bench/frontend/components/streaming/data.py +7 -0
  39. vectordb_bench/frontend/components/welcome/welcomePrams.py +42 -4
  40. vectordb_bench/frontend/config/dbCaseConfigs.py +60 -13
  41. vectordb_bench/frontend/config/styles.py +3 -0
  42. vectordb_bench/frontend/pages/concurrent.py +1 -1
  43. vectordb_bench/frontend/pages/custom.py +1 -1
  44. vectordb_bench/frontend/pages/int_filter.py +56 -0
  45. vectordb_bench/frontend/pages/streaming.py +16 -3
  46. vectordb_bench/metric.py +7 -0
  47. vectordb_bench/models.py +36 -4
  48. vectordb_bench/results/S3Vectors/result_20250722_standard_s3vectors.json +2509 -0
  49. {vectordb_bench-1.0.3.dist-info → vectordb_bench-1.0.5.dist-info}/METADATA +1 -3
  50. {vectordb_bench-1.0.3.dist-info → vectordb_bench-1.0.5.dist-info}/RECORD +54 -32
  51. {vectordb_bench-1.0.3.dist-info → vectordb_bench-1.0.5.dist-info}/WHEEL +0 -0
  52. {vectordb_bench-1.0.3.dist-info → vectordb_bench-1.0.5.dist-info}/entry_points.txt +0 -0
  53. {vectordb_bench-1.0.3.dist-info → vectordb_bench-1.0.5.dist-info}/licenses/LICENSE +0 -0
  54. {vectordb_bench-1.0.3.dist-info → vectordb_bench-1.0.5.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,155 @@
1
+ import logging
2
+ from typing import Annotated, TypedDict, Unpack
3
+
4
+ import click
5
+ from pydantic import SecretStr
6
+
7
+ from ....cli.cli import (
8
+ CommonTypedDict,
9
+ HNSWFlavor1,
10
+ cli,
11
+ click_parameter_decorators_from_typed_dict,
12
+ run,
13
+ )
14
+ from .. import DB
15
+ from .config import OSSOpenSearchQuantization, OSSOS_Engine
16
+
17
+ log = logging.getLogger(__name__)
18
+
19
+
20
+ class OSSOpenSearchTypedDict(TypedDict):
21
+ host: Annotated[str, click.option("--host", type=str, help="Db host", required=True)]
22
+ port: Annotated[int, click.option("--port", type=int, default=80, help="Db Port")]
23
+ user: Annotated[str, click.option("--user", type=str, help="Db User")]
24
+ password: Annotated[str, click.option("--password", type=str, help="Db password")]
25
+ number_of_shards: Annotated[
26
+ int,
27
+ click.option("--number-of-shards", type=int, help="Number of primary shards for the index", default=1),
28
+ ]
29
+ number_of_replicas: Annotated[
30
+ int,
31
+ click.option(
32
+ "--number-of-replicas", type=int, help="Number of replica copies for each primary shard", default=1
33
+ ),
34
+ ]
35
+ index_thread_qty: Annotated[
36
+ int,
37
+ click.option(
38
+ "--index-thread-qty",
39
+ type=int,
40
+ help="Thread count for native engine indexing",
41
+ default=4,
42
+ ),
43
+ ]
44
+
45
+ engine: Annotated[
46
+ str,
47
+ click.option(
48
+ "--engine",
49
+ type=click.Choice(["nmslib", "faiss", "lucene"], case_sensitive=False),
50
+ help="HNSW algorithm implementation to use",
51
+ default="faiss",
52
+ ),
53
+ ]
54
+
55
+ metric_type: Annotated[
56
+ str,
57
+ click.option(
58
+ "--metric-type",
59
+ type=click.Choice(["l2", "cosine", "ip"], case_sensitive=False),
60
+ help="Distance metric type for vector similarity",
61
+ default="l2",
62
+ ),
63
+ ]
64
+
65
+ number_of_segments: Annotated[
66
+ int,
67
+ click.option("--number-of-segments", type=int, help="Target number of segments after merging", default=1),
68
+ ]
69
+
70
+ refresh_interval: Annotated[
71
+ str,
72
+ click.option(
73
+ "--refresh-interval", type=str, help="How often to make new data available for search", default="60s"
74
+ ),
75
+ ]
76
+
77
+ force_merge_enabled: Annotated[
78
+ bool,
79
+ click.option("--force-merge-enabled", type=bool, help="Whether to perform force merge operation", default=True),
80
+ ]
81
+
82
+ flush_threshold_size: Annotated[
83
+ str,
84
+ click.option(
85
+ "--flush-threshold-size", type=str, help="Size threshold for flushing the transaction log", default="5120mb"
86
+ ),
87
+ ]
88
+
89
+ cb_threshold: Annotated[
90
+ str,
91
+ click.option(
92
+ "--cb-threshold",
93
+ type=str,
94
+ help="k-NN Memory circuit breaker threshold",
95
+ default="50%",
96
+ ),
97
+ ]
98
+
99
+ quantization_type: Annotated[
100
+ str | None,
101
+ click.option(
102
+ "--quantization-type",
103
+ type=click.Choice(["fp32", "fp16"]),
104
+ help="quantization type for vectors (in index)",
105
+ default="fp32",
106
+ required=False,
107
+ ),
108
+ ]
109
+
110
+ engine: Annotated[
111
+ str | None,
112
+ click.option(
113
+ "--engine",
114
+ type=click.Choice(["faiss", "lucene"]),
115
+ help="quantization type for vectors (in index)",
116
+ default="faiss",
117
+ required=False,
118
+ ),
119
+ ]
120
+
121
+
122
+ class OSSOpenSearchHNSWTypedDict(CommonTypedDict, OSSOpenSearchTypedDict, HNSWFlavor1): ...
123
+
124
+
125
+ @cli.command()
126
+ @click_parameter_decorators_from_typed_dict(OSSOpenSearchHNSWTypedDict)
127
+ def OSSOpenSearch(**parameters: Unpack[OSSOpenSearchHNSWTypedDict]):
128
+ from .config import OSSOpenSearchConfig, OSSOpenSearchIndexConfig
129
+
130
+ run(
131
+ db=DB.OSSOpenSearch,
132
+ db_config=OSSOpenSearchConfig(
133
+ host=parameters["host"],
134
+ port=parameters["port"],
135
+ user=parameters["user"],
136
+ password=SecretStr(parameters["password"]),
137
+ ),
138
+ db_case_config=OSSOpenSearchIndexConfig(
139
+ number_of_shards=parameters["number_of_shards"],
140
+ number_of_replicas=parameters["number_of_replicas"],
141
+ index_thread_qty=parameters["index_thread_qty"],
142
+ number_of_segments=parameters["number_of_segments"],
143
+ refresh_interval=parameters["refresh_interval"],
144
+ force_merge_enabled=parameters["force_merge_enabled"],
145
+ flush_threshold_size=parameters["flush_threshold_size"],
146
+ index_thread_qty_during_force_merge=parameters["index_thread_qty_during_force_merge"],
147
+ cb_threshold=parameters["cb_threshold"],
148
+ efConstruction=parameters["ef_construction"],
149
+ efSearch=parameters["ef_runtime"],
150
+ M=parameters["m"],
151
+ engine=OSSOS_Engine(parameters["engine"]),
152
+ quantization_type=OSSOpenSearchQuantization(parameters["quantization_type"]),
153
+ ),
154
+ **parameters,
155
+ )
@@ -0,0 +1,157 @@
1
+ import logging
2
+ from enum import Enum
3
+
4
+ from pydantic import BaseModel, SecretStr, root_validator, validator
5
+
6
+ from ..api import DBCaseConfig, DBConfig, MetricType
7
+
8
+ log = logging.getLogger(__name__)
9
+
10
+
11
+ class OSSOpenSearchConfig(DBConfig, BaseModel):
12
+ host: str = ""
13
+ port: int = 80
14
+ user: str | None = None
15
+ password: SecretStr | None = None
16
+
17
+ def to_dict(self) -> dict:
18
+ use_ssl = self.port == 443
19
+ http_auth = (
20
+ (self.user, self.password.get_secret_value())
21
+ if self.user is not None and self.password is not None and len(self.user) != 0 and len(self.password) != 0
22
+ else ()
23
+ )
24
+ return {
25
+ "hosts": [{"host": self.host, "port": self.port}],
26
+ "http_auth": http_auth,
27
+ "use_ssl": use_ssl,
28
+ "http_compress": True,
29
+ "verify_certs": use_ssl,
30
+ "ssl_assert_hostname": False,
31
+ "ssl_show_warn": False,
32
+ "timeout": 600,
33
+ }
34
+
35
+ @validator("*")
36
+ def not_empty_field(cls, v: any, field: any):
37
+ if (
38
+ field.name in cls.common_short_configs()
39
+ or field.name in cls.common_long_configs()
40
+ or field.name in ["user", "password", "host"]
41
+ ):
42
+ return v
43
+ if isinstance(v, str | SecretStr) and len(v) == 0:
44
+ raise ValueError("Empty string!")
45
+ return v
46
+
47
+
48
+ class OSSOS_Engine(Enum):
49
+ faiss = "faiss"
50
+ lucene = "lucene"
51
+
52
+
53
+ class OSSOpenSearchQuantization(Enum):
54
+ fp32 = "fp32"
55
+ fp16 = "fp16"
56
+
57
+
58
+ class OSSOpenSearchIndexConfig(BaseModel, DBCaseConfig):
59
+ metric_type: MetricType = MetricType.L2
60
+ engine: OSSOS_Engine = OSSOS_Engine.faiss
61
+ efConstruction: int = 256
62
+ efSearch: int = 100
63
+ engine_name: str | None = None
64
+ metric_type_name: str | None = None
65
+ M: int = 16
66
+ index_thread_qty: int | None = 4
67
+ number_of_shards: int | None = 1
68
+ number_of_replicas: int | None = 0
69
+ number_of_segments: int | None = 1
70
+ refresh_interval: str | None = "60s"
71
+ force_merge_enabled: bool | None = True
72
+ flush_threshold_size: str | None = "5120mb"
73
+ index_thread_qty_during_force_merge: int = 8
74
+ cb_threshold: str | None = "50%"
75
+ number_of_indexing_clients: int | None = 1
76
+ use_routing: bool = False # for label-filter cases
77
+ oversample_factor: float = 1.0
78
+ quantization_type: OSSOpenSearchQuantization = OSSOpenSearchQuantization.fp32
79
+
80
+ @root_validator
81
+ def validate_engine_name(cls, values: dict):
82
+ """Map engine_name string from UI to engine enum"""
83
+ if values.get("engine_name"):
84
+ engine_name = values["engine_name"].lower()
85
+ if engine_name == "faiss":
86
+ values["engine"] = OSSOS_Engine.faiss
87
+ elif engine_name == "lucene":
88
+ values["engine"] = OSSOS_Engine.lucene
89
+ else:
90
+ log.warning(f"Unknown engine_name: {engine_name}, defaulting to faiss")
91
+ values["engine"] = OSSOS_Engine.faiss
92
+ return values
93
+
94
+ def __eq__(self, obj: any):
95
+ return (
96
+ self.engine == obj.engine
97
+ and self.M == obj.M
98
+ and self.efConstruction == obj.efConstruction
99
+ and self.number_of_shards == obj.number_of_shards
100
+ and self.number_of_replicas == obj.number_of_replicas
101
+ and self.number_of_segments == obj.number_of_segments
102
+ and self.use_routing == obj.use_routing
103
+ and self.quantization_type == obj.quantization_type
104
+ )
105
+
106
+ def __hash__(self) -> int:
107
+ return hash(
108
+ (
109
+ self.engine,
110
+ self.M,
111
+ self.efConstruction,
112
+ self.number_of_shards,
113
+ self.number_of_replicas,
114
+ self.number_of_segments,
115
+ self.use_routing,
116
+ self.quantization_type,
117
+ )
118
+ )
119
+
120
+ def parse_metric(self) -> str:
121
+ log.info(f"User specified metric_type: {self.metric_type_name}")
122
+ self.metric_type = MetricType[self.metric_type_name.upper()]
123
+ if self.metric_type == MetricType.IP:
124
+ return "innerproduct"
125
+ if self.metric_type == MetricType.COSINE:
126
+ return "cosinesimil"
127
+ if self.metric_type == MetricType.L2:
128
+ log.info("Using l2 as specified by user")
129
+ return "l2"
130
+ return "l2"
131
+
132
+ @property
133
+ def use_quant(self) -> bool:
134
+ return self.quantization_type is not OSSOpenSearchQuantization.fp32
135
+
136
+ def index_param(self) -> dict:
137
+ log.info(f"Using engine: {self.engine} for index creation")
138
+ log.info(f"Using metric_type: {self.metric_type_name} for index creation")
139
+ log.info(f"Resulting space_type: {self.parse_metric()} for index creation")
140
+
141
+ return {
142
+ "name": "hnsw",
143
+ "engine": self.engine.value,
144
+ "space_type": self.parse_metric(),
145
+ "parameters": {
146
+ "ef_construction": self.efConstruction,
147
+ "m": self.M,
148
+ **(
149
+ {"encoder": {"name": "sq", "parameters": {"type": self.quantization_type.value}}}
150
+ if self.use_quant
151
+ else {}
152
+ ),
153
+ },
154
+ }
155
+
156
+ def search_param(self) -> dict:
157
+ return {"ef_search": self.efSearch}