vectordb-bench 0.0.19__py3-none-any.whl → 0.0.21__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (105) hide show
  1. vectordb_bench/__init__.py +49 -24
  2. vectordb_bench/__main__.py +4 -3
  3. vectordb_bench/backend/assembler.py +12 -13
  4. vectordb_bench/backend/cases.py +55 -45
  5. vectordb_bench/backend/clients/__init__.py +85 -14
  6. vectordb_bench/backend/clients/aliyun_elasticsearch/aliyun_elasticsearch.py +1 -2
  7. vectordb_bench/backend/clients/aliyun_elasticsearch/config.py +3 -4
  8. vectordb_bench/backend/clients/aliyun_opensearch/aliyun_opensearch.py +112 -77
  9. vectordb_bench/backend/clients/aliyun_opensearch/config.py +6 -7
  10. vectordb_bench/backend/clients/alloydb/alloydb.py +59 -84
  11. vectordb_bench/backend/clients/alloydb/cli.py +51 -34
  12. vectordb_bench/backend/clients/alloydb/config.py +30 -30
  13. vectordb_bench/backend/clients/api.py +13 -24
  14. vectordb_bench/backend/clients/aws_opensearch/aws_opensearch.py +50 -54
  15. vectordb_bench/backend/clients/aws_opensearch/cli.py +4 -7
  16. vectordb_bench/backend/clients/aws_opensearch/config.py +13 -9
  17. vectordb_bench/backend/clients/aws_opensearch/run.py +69 -59
  18. vectordb_bench/backend/clients/chroma/chroma.py +39 -40
  19. vectordb_bench/backend/clients/chroma/config.py +4 -2
  20. vectordb_bench/backend/clients/elastic_cloud/config.py +5 -5
  21. vectordb_bench/backend/clients/elastic_cloud/elastic_cloud.py +24 -26
  22. vectordb_bench/backend/clients/memorydb/cli.py +8 -8
  23. vectordb_bench/backend/clients/memorydb/config.py +2 -2
  24. vectordb_bench/backend/clients/memorydb/memorydb.py +67 -58
  25. vectordb_bench/backend/clients/milvus/cli.py +41 -83
  26. vectordb_bench/backend/clients/milvus/config.py +18 -8
  27. vectordb_bench/backend/clients/milvus/milvus.py +19 -39
  28. vectordb_bench/backend/clients/pgdiskann/cli.py +29 -22
  29. vectordb_bench/backend/clients/pgdiskann/config.py +29 -26
  30. vectordb_bench/backend/clients/pgdiskann/pgdiskann.py +56 -77
  31. vectordb_bench/backend/clients/pgvecto_rs/cli.py +9 -11
  32. vectordb_bench/backend/clients/pgvecto_rs/config.py +8 -14
  33. vectordb_bench/backend/clients/pgvecto_rs/pgvecto_rs.py +34 -43
  34. vectordb_bench/backend/clients/pgvector/cli.py +40 -31
  35. vectordb_bench/backend/clients/pgvector/config.py +63 -73
  36. vectordb_bench/backend/clients/pgvector/pgvector.py +98 -104
  37. vectordb_bench/backend/clients/pgvectorscale/cli.py +38 -24
  38. vectordb_bench/backend/clients/pgvectorscale/config.py +14 -15
  39. vectordb_bench/backend/clients/pgvectorscale/pgvectorscale.py +39 -49
  40. vectordb_bench/backend/clients/pinecone/config.py +1 -0
  41. vectordb_bench/backend/clients/pinecone/pinecone.py +15 -25
  42. vectordb_bench/backend/clients/qdrant_cloud/config.py +11 -10
  43. vectordb_bench/backend/clients/qdrant_cloud/qdrant_cloud.py +41 -35
  44. vectordb_bench/backend/clients/redis/cli.py +6 -12
  45. vectordb_bench/backend/clients/redis/config.py +7 -5
  46. vectordb_bench/backend/clients/redis/redis.py +95 -62
  47. vectordb_bench/backend/clients/test/cli.py +2 -3
  48. vectordb_bench/backend/clients/test/config.py +2 -2
  49. vectordb_bench/backend/clients/test/test.py +5 -9
  50. vectordb_bench/backend/clients/weaviate_cloud/cli.py +3 -4
  51. vectordb_bench/backend/clients/weaviate_cloud/config.py +2 -2
  52. vectordb_bench/backend/clients/weaviate_cloud/weaviate_cloud.py +37 -26
  53. vectordb_bench/backend/clients/zilliz_cloud/cli.py +14 -11
  54. vectordb_bench/backend/clients/zilliz_cloud/config.py +2 -4
  55. vectordb_bench/backend/clients/zilliz_cloud/zilliz_cloud.py +1 -1
  56. vectordb_bench/backend/data_source.py +18 -14
  57. vectordb_bench/backend/dataset.py +47 -27
  58. vectordb_bench/backend/result_collector.py +2 -3
  59. vectordb_bench/backend/runner/__init__.py +4 -6
  60. vectordb_bench/backend/runner/mp_runner.py +56 -23
  61. vectordb_bench/backend/runner/rate_runner.py +30 -19
  62. vectordb_bench/backend/runner/read_write_runner.py +46 -22
  63. vectordb_bench/backend/runner/serial_runner.py +81 -46
  64. vectordb_bench/backend/runner/util.py +4 -3
  65. vectordb_bench/backend/task_runner.py +92 -92
  66. vectordb_bench/backend/utils.py +17 -10
  67. vectordb_bench/base.py +0 -1
  68. vectordb_bench/cli/cli.py +65 -60
  69. vectordb_bench/cli/vectordbbench.py +6 -7
  70. vectordb_bench/frontend/components/check_results/charts.py +8 -19
  71. vectordb_bench/frontend/components/check_results/data.py +4 -16
  72. vectordb_bench/frontend/components/check_results/filters.py +8 -16
  73. vectordb_bench/frontend/components/check_results/nav.py +4 -4
  74. vectordb_bench/frontend/components/check_results/priceTable.py +1 -3
  75. vectordb_bench/frontend/components/check_results/stPageConfig.py +2 -1
  76. vectordb_bench/frontend/components/concurrent/charts.py +12 -12
  77. vectordb_bench/frontend/components/custom/displayCustomCase.py +17 -11
  78. vectordb_bench/frontend/components/custom/displaypPrams.py +4 -2
  79. vectordb_bench/frontend/components/custom/getCustomConfig.py +1 -2
  80. vectordb_bench/frontend/components/custom/initStyle.py +1 -1
  81. vectordb_bench/frontend/components/get_results/saveAsImage.py +2 -0
  82. vectordb_bench/frontend/components/run_test/caseSelector.py +3 -9
  83. vectordb_bench/frontend/components/run_test/dbConfigSetting.py +1 -4
  84. vectordb_bench/frontend/components/run_test/dbSelector.py +1 -1
  85. vectordb_bench/frontend/components/run_test/generateTasks.py +8 -8
  86. vectordb_bench/frontend/components/run_test/submitTask.py +14 -18
  87. vectordb_bench/frontend/components/tables/data.py +3 -6
  88. vectordb_bench/frontend/config/dbCaseConfigs.py +51 -84
  89. vectordb_bench/frontend/pages/concurrent.py +3 -5
  90. vectordb_bench/frontend/pages/custom.py +30 -9
  91. vectordb_bench/frontend/pages/quries_per_dollar.py +3 -3
  92. vectordb_bench/frontend/pages/run_test.py +3 -7
  93. vectordb_bench/frontend/utils.py +1 -1
  94. vectordb_bench/frontend/vdb_benchmark.py +4 -6
  95. vectordb_bench/interface.py +45 -24
  96. vectordb_bench/log_util.py +59 -64
  97. vectordb_bench/metric.py +10 -11
  98. vectordb_bench/models.py +26 -43
  99. {vectordb_bench-0.0.19.dist-info → vectordb_bench-0.0.21.dist-info}/METADATA +22 -15
  100. vectordb_bench-0.0.21.dist-info/RECORD +135 -0
  101. {vectordb_bench-0.0.19.dist-info → vectordb_bench-0.0.21.dist-info}/WHEEL +1 -1
  102. vectordb_bench-0.0.19.dist-info/RECORD +0 -135
  103. {vectordb_bench-0.0.19.dist-info → vectordb_bench-0.0.21.dist-info}/LICENSE +0 -0
  104. {vectordb_bench-0.0.19.dist-info → vectordb_bench-0.0.21.dist-info}/entry_points.txt +0 -0
  105. {vectordb_bench-0.0.19.dist-info → vectordb_bench-0.0.21.dist-info}/top_level.txt +0 -0
@@ -1,37 +1,40 @@
1
1
  import logging
2
2
  from contextlib import contextmanager
3
- from typing import Any, Type
4
- from ..api import VectorDB, DBConfig, DBCaseConfig, EmptyDBCaseConfig, IndexType
5
- from .config import RedisConfig
3
+ from typing import Any
4
+
5
+ import numpy as np
6
6
  import redis
7
- from redis.commands.search.field import TagField, VectorField, NumericField
7
+ from redis.commands.search.field import NumericField, TagField, VectorField
8
8
  from redis.commands.search.indexDefinition import IndexDefinition, IndexType
9
9
  from redis.commands.search.query import Query
10
- import numpy as np
11
10
 
11
+ from ..api import DBCaseConfig, VectorDB
12
12
 
13
13
  log = logging.getLogger(__name__)
14
- INDEX_NAME = "index" # Vector Index Name
14
+ INDEX_NAME = "index" # Vector Index Name
15
+
15
16
 
16
17
  class Redis(VectorDB):
17
18
  def __init__(
18
- self,
19
- dim: int,
20
- db_config: dict,
21
- db_case_config: DBCaseConfig,
22
- drop_old: bool = False,
23
-
24
- **kwargs
25
- ):
26
-
19
+ self,
20
+ dim: int,
21
+ db_config: dict,
22
+ db_case_config: DBCaseConfig,
23
+ drop_old: bool = False,
24
+ **kwargs,
25
+ ):
27
26
  self.db_config = db_config
28
27
  self.case_config = db_case_config
29
28
  self.collection_name = INDEX_NAME
30
29
 
31
30
  # Create a redis connection, if db has password configured, add it to the connection here and in init():
32
- password=self.db_config["password"]
33
- conn = redis.Redis(host=self.db_config["host"], port=self.db_config["port"], password=password, db=0)
34
-
31
+ password = self.db_config["password"]
32
+ conn = redis.Redis(
33
+ host=self.db_config["host"],
34
+ port=self.db_config["port"],
35
+ password=password,
36
+ db=0,
37
+ )
35
38
 
36
39
  if drop_old:
37
40
  try:
@@ -40,7 +43,7 @@ class Redis(VectorDB):
40
43
  except redis.exceptions.ResponseError:
41
44
  drop_old = False
42
45
  log.info(f"Redis client drop_old collection: {self.collection_name}")
43
-
46
+
44
47
  self.make_index(dim, conn)
45
48
  conn.close()
46
49
  conn = None
@@ -49,16 +52,20 @@ class Redis(VectorDB):
49
52
  try:
50
53
  # check to see if index exists
51
54
  conn.ft(INDEX_NAME).info()
52
- except:
55
+ except Exception:
53
56
  schema = (
54
- TagField("id"),
55
- NumericField("metadata"),
56
- VectorField("vector", # Vector Field Name
57
- "HNSW", { # Vector Index Type: FLAT or HNSW
58
- "TYPE": "FLOAT32", # FLOAT32 or FLOAT64
59
- "DIM": vector_dimensions, # Number of Vector Dimensions
60
- "DISTANCE_METRIC": "COSINE", # Vector Search Distance Metric
61
- }
57
+ TagField("id"),
58
+ NumericField("metadata"),
59
+ VectorField(
60
+ "vector", # Vector Field Name
61
+ "HNSW", # Vector Index Type: FLAT or HNSW
62
+ {
63
+ "TYPE": "FLOAT32", # FLOAT32 or FLOAT64
64
+ "DIM": vector_dimensions, # Number of Vector Dimensions
65
+ "DISTANCE_METRIC": "COSINE", # Vector Search Distance Metric
66
+ "M": self.case_config.index_param()["params"]["M"],
67
+ "EF_CONSTRUCTION": self.case_config.index_param()["params"]["efConstruction"],
68
+ },
62
69
  ),
63
70
  )
64
71
 
@@ -69,61 +76,62 @@ class Redis(VectorDB):
69
76
 
70
77
  @contextmanager
71
78
  def init(self) -> None:
72
- """ create and destory connections to database.
79
+ """create and destory connections to database.
73
80
 
74
81
  Examples:
75
82
  >>> with self.init():
76
83
  >>> self.insert_embeddings()
77
84
  """
78
- self.conn = redis.Redis(host=self.db_config["host"], port=self.db_config["port"], password=self.db_config["password"], db=0)
85
+ self.conn = redis.Redis(
86
+ host=self.db_config["host"],
87
+ port=self.db_config["port"],
88
+ password=self.db_config["password"],
89
+ db=0,
90
+ )
79
91
  yield
80
92
  self.conn.close()
81
93
  self.conn = None
82
94
 
83
-
84
95
  def ready_to_search(self) -> bool:
85
96
  """Check if the database is ready to search."""
86
- pass
87
-
88
-
89
- def ready_to_load(self) -> bool:
90
- pass
91
97
 
92
- def optimize(self) -> None:
98
+ def optimize(self, data_size: int | None = None):
93
99
  pass
94
100
 
95
-
96
101
  def insert_embeddings(
97
102
  self,
98
103
  embeddings: list[list[float]],
99
104
  metadata: list[int],
100
105
  **kwargs: Any,
101
- ) -> (int, Exception):
106
+ ) -> tuple[int, Exception]:
102
107
  """Insert embeddings into the database.
103
108
  Should call self.init() first.
104
109
  """
105
110
 
106
- batch_size = 1000 # Adjust this as needed, but don't make too big
111
+ batch_size = 1000 # Adjust this as needed, but don't make too big
107
112
  try:
108
113
  with self.conn.pipeline(transaction=False) as pipe:
109
114
  for i, embedding in enumerate(embeddings):
110
- embedding = np.array(embedding).astype(np.float32)
111
- pipe.hset(metadata[i], mapping = {
112
- "id": str(metadata[i]),
113
- "metadata": metadata[i],
114
- "vector": embedding.tobytes(),
115
- })
115
+ ndarr_emb = np.array(embedding).astype(np.float32)
116
+ pipe.hset(
117
+ metadata[i],
118
+ mapping={
119
+ "id": str(metadata[i]),
120
+ "metadata": metadata[i],
121
+ "vector": ndarr_emb.tobytes(),
122
+ },
123
+ )
116
124
  # Execute the pipe so we don't keep too much in memory at once
117
125
  if i % batch_size == 0:
118
- res = pipe.execute()
126
+ _ = pipe.execute()
119
127
 
120
- res = pipe.execute()
128
+ _ = pipe.execute()
121
129
  result_len = i + 1
122
130
  except Exception as e:
123
131
  return 0, e
124
-
132
+
125
133
  return result_len, None
126
-
134
+
127
135
  def search_embedding(
128
136
  self,
129
137
  query: list[float],
@@ -131,28 +139,53 @@ class Redis(VectorDB):
131
139
  filters: dict | None = None,
132
140
  timeout: int | None = None,
133
141
  **kwargs: Any,
134
- ) -> (list[int]):
142
+ ) -> list[int]:
135
143
  assert self.conn is not None
136
-
144
+
137
145
  query_vector = np.array(query).astype(np.float32).tobytes()
138
- query_obj = Query(f"*=>[KNN {k} @vector $vec as score]").sort_by("score").return_fields("id", "score").paging(0, k).dialect(2)
146
+ ef_runtime = self.case_config.search_param()["params"]["ef"]
147
+ query_obj = (
148
+ Query(f"*=>[KNN {k} @vector $vec EF_RUNTIME {ef_runtime} as score]")
149
+ .sort_by("score")
150
+ .return_fields("id", "score")
151
+ .paging(0, k)
152
+ .dialect(2)
153
+ )
139
154
  query_params = {"vec": query_vector}
140
-
155
+
141
156
  if filters:
142
157
  # benchmark test filters of format: {'metadata': '>=10000', 'id': 10000}
143
158
  # gets exact match for id, and range for metadata if they exist in filters
144
159
  id_value = filters.get("id")
145
160
  metadata_value = filters.get("metadata")
146
161
  if id_value and metadata_value:
147
- query_obj = Query(f"(@metadata:[{metadata_value} +inf] @id:{ {id_value} })=>[KNN {k} @vector $vec as score]").sort_by("score").return_fields("id", "score").paging(0, k).dialect(2)
162
+ query_obj = (
163
+ Query(
164
+ f"(@metadata:[{metadata_value} +inf] @id:{ {id_value} })=>[KNN {k} ",
165
+ f"@vector $vec EF_RUNTIME {ef_runtime} as score]",
166
+ )
167
+ .sort_by("score")
168
+ .return_fields("id", "score")
169
+ .paging(0, k)
170
+ .dialect(2)
171
+ )
148
172
  elif id_value:
149
- #gets exact match for id
150
- query_obj = Query(f"@id:{ {id_value} }=>[KNN {k} @vector $vec as score]").sort_by("score").return_fields("id", "score").paging(0, k).dialect(2)
151
- else: #metadata only case, greater than or equal to metadata value
152
- query_obj = Query(f"@metadata:[{metadata_value} +inf]=>[KNN {k} @vector $vec as score]").sort_by("score").return_fields("id", "score").paging(0, k).dialect(2)
173
+ # gets exact match for id
174
+ query_obj = (
175
+ Query(f"@id:{ {id_value} }=>[KNN {k} @vector $vec EF_RUNTIME {ef_runtime} as score]")
176
+ .sort_by("score")
177
+ .return_fields("id", "score")
178
+ .paging(0, k)
179
+ .dialect(2)
180
+ )
181
+ else: # metadata only case, greater than or equal to metadata value
182
+ query_obj = (
183
+ Query(f"@metadata:[{metadata_value} +inf]=>[KNN {k} @vector $vec EF_RUNTIME {ef_runtime} as score]")
184
+ .sort_by("score")
185
+ .return_fields("id", "score")
186
+ .paging(0, k)
187
+ .dialect(2)
188
+ )
153
189
  res = self.conn.ft(INDEX_NAME).search(query_obj, query_params)
154
190
  # doc in res of format {'id': '9831', 'payload': None, 'score': '1.19209289551e-07'}
155
191
  return [int(doc["id"]) for doc in res.docs]
156
-
157
-
158
-
@@ -10,15 +10,14 @@ from .. import DB
10
10
  from ..test.config import TestConfig, TestIndexConfig
11
11
 
12
12
 
13
- class TestTypedDict(CommonTypedDict):
14
- ...
13
+ class TestTypedDict(CommonTypedDict): ...
15
14
 
16
15
 
17
16
  @cli.command()
18
17
  @click_parameter_decorators_from_typed_dict(TestTypedDict)
19
18
  def Test(**parameters: Unpack[TestTypedDict]):
20
19
  run(
21
- db=DB.NewClient,
20
+ db=DB.Test,
22
21
  db_config=TestConfig(db_label=parameters["db_label"]),
23
22
  db_case_config=TestIndexConfig(),
24
23
  **parameters,
@@ -1,6 +1,6 @@
1
- from pydantic import BaseModel, SecretStr
1
+ from pydantic import BaseModel
2
2
 
3
- from ..api import DBCaseConfig, DBConfig, IndexType, MetricType
3
+ from ..api import DBCaseConfig, DBConfig, MetricType
4
4
 
5
5
 
6
6
  class TestConfig(DBConfig):
@@ -1,6 +1,7 @@
1
1
  import logging
2
+ from collections.abc import Generator
2
3
  from contextlib import contextmanager
3
- from typing import Any, Generator, Optional, Tuple
4
+ from typing import Any
4
5
 
5
6
  from ..api import DBCaseConfig, VectorDB
6
7
 
@@ -32,10 +33,7 @@ class Test(VectorDB):
32
33
 
33
34
  yield
34
35
 
35
- def ready_to_load(self) -> bool:
36
- return True
37
-
38
- def optimize(self) -> None:
36
+ def optimize(self, data_size: int | None = None):
39
37
  pass
40
38
 
41
39
  def insert_embeddings(
@@ -43,11 +41,10 @@ class Test(VectorDB):
43
41
  embeddings: list[list[float]],
44
42
  metadata: list[int],
45
43
  **kwargs: Any,
46
- ) -> Tuple[int, Optional[Exception]]:
44
+ ) -> tuple[int, Exception | None]:
47
45
  """Insert embeddings into the database.
48
46
  Should call self.init() first.
49
47
  """
50
- raise RuntimeError("Not implemented")
51
48
  return len(metadata), None
52
49
 
53
50
  def search_embedding(
@@ -58,5 +55,4 @@ class Test(VectorDB):
58
55
  timeout: int | None = None,
59
56
  **kwargs: Any,
60
57
  ) -> list[int]:
61
- raise NotImplementedError
62
- return [i for i in range(k)]
58
+ return list(range(k))
@@ -14,7 +14,8 @@ from .. import DB
14
14
 
15
15
  class WeaviateTypedDict(CommonTypedDict):
16
16
  api_key: Annotated[
17
- str, click.option("--api-key", type=str, help="Weaviate api key", required=True)
17
+ str,
18
+ click.option("--api-key", type=str, help="Weaviate api key", required=True),
18
19
  ]
19
20
  url: Annotated[
20
21
  str,
@@ -34,8 +35,6 @@ def Weaviate(**parameters: Unpack[WeaviateTypedDict]):
34
35
  api_key=SecretStr(parameters["api_key"]),
35
36
  url=SecretStr(parameters["url"]),
36
37
  ),
37
- db_case_config=WeaviateIndexConfig(
38
- ef=256, efConstruction=256, maxConnections=16
39
- ),
38
+ db_case_config=WeaviateIndexConfig(ef=256, efConstruction=256, maxConnections=16),
40
39
  **parameters,
41
40
  )
@@ -1,6 +1,6 @@
1
1
  from pydantic import BaseModel, SecretStr
2
2
 
3
- from ..api import DBConfig, DBCaseConfig, MetricType
3
+ from ..api import DBCaseConfig, DBConfig, MetricType
4
4
 
5
5
 
6
6
  class WeaviateConfig(DBConfig):
@@ -23,7 +23,7 @@ class WeaviateIndexConfig(BaseModel, DBCaseConfig):
23
23
  def parse_metric(self) -> str:
24
24
  if self.metric_type == MetricType.L2:
25
25
  return "l2-squared"
26
- elif self.metric_type == MetricType.IP:
26
+ if self.metric_type == MetricType.IP:
27
27
  return "dot"
28
28
  return "cosine"
29
29
 
@@ -1,13 +1,13 @@
1
1
  """Wrapper around the Weaviate vector database over VectorDB"""
2
2
 
3
3
  import logging
4
- from typing import Iterable
4
+ from collections.abc import Iterable
5
5
  from contextlib import contextmanager
6
6
 
7
7
  import weaviate
8
8
  from weaviate.exceptions import WeaviateBaseError
9
9
 
10
- from ..api import VectorDB, DBCaseConfig
10
+ from ..api import DBCaseConfig, VectorDB
11
11
 
12
12
  log = logging.getLogger(__name__)
13
13
 
@@ -23,7 +23,13 @@ class WeaviateCloud(VectorDB):
23
23
  **kwargs,
24
24
  ):
25
25
  """Initialize wrapper around the weaviate vector database."""
26
- db_config.update({"auth_client_secret": weaviate.AuthApiKey(api_key=db_config.get("auth_client_secret"))})
26
+ db_config.update(
27
+ {
28
+ "auth_client_secret": weaviate.AuthApiKey(
29
+ api_key=db_config.get("auth_client_secret"),
30
+ ),
31
+ },
32
+ )
27
33
  self.db_config = db_config
28
34
  self.case_config = db_case_config
29
35
  self.collection_name = collection_name
@@ -33,6 +39,7 @@ class WeaviateCloud(VectorDB):
33
39
  self._index_name = "vector_idx"
34
40
 
35
41
  from weaviate import Client
42
+
36
43
  client = Client(**db_config)
37
44
  if drop_old:
38
45
  try:
@@ -40,7 +47,7 @@ class WeaviateCloud(VectorDB):
40
47
  log.info(f"weaviate client drop_old collection: {self.collection_name}")
41
48
  client.schema.delete_class(self.collection_name)
42
49
  except WeaviateBaseError as e:
43
- log.warning(f"Failed to drop collection: {self.collection_name} error: {str(e)}")
50
+ log.warning(f"Failed to drop collection: {self.collection_name} error: {e!s}")
44
51
  raise e from None
45
52
  self._create_collection(client)
46
53
  client = None
@@ -54,20 +61,20 @@ class WeaviateCloud(VectorDB):
54
61
  >>> self.search_embedding()
55
62
  """
56
63
  from weaviate import Client
64
+
57
65
  self.client = Client(**self.db_config)
58
66
  yield
59
67
  self.client = None
60
- del(self.client)
61
-
62
- def ready_to_load(self):
63
- """Should call insert first, do nothing"""
64
- pass
68
+ del self.client
65
69
 
66
- def optimize(self):
70
+ def optimize(self, data_size: int | None = None):
67
71
  assert self.client.schema.exists(self.collection_name)
68
- self.client.schema.update_config(self.collection_name, {"vectorIndexConfig": self.case_config.search_param() } )
72
+ self.client.schema.update_config(
73
+ self.collection_name,
74
+ {"vectorIndexConfig": self.case_config.search_param()},
75
+ )
69
76
 
70
- def _create_collection(self, client):
77
+ def _create_collection(self, client: weaviate.Client) -> None:
71
78
  if not client.schema.exists(self.collection_name):
72
79
  log.info(f"Create collection: {self.collection_name}")
73
80
  class_obj = {
@@ -78,13 +85,13 @@ class WeaviateCloud(VectorDB):
78
85
  "dataType": ["int"],
79
86
  "name": self._scalar_field,
80
87
  },
81
- ]
88
+ ],
82
89
  }
83
90
  class_obj["vectorIndexConfig"] = self.case_config.index_param()
84
91
  try:
85
92
  client.schema.create_class(class_obj)
86
93
  except WeaviateBaseError as e:
87
- log.warning(f"Failed to create collection: {self.collection_name} error: {str(e)}")
94
+ log.warning(f"Failed to create collection: {self.collection_name} error: {e!s}")
88
95
  raise e from None
89
96
 
90
97
  def insert_embeddings(
@@ -102,15 +109,17 @@ class WeaviateCloud(VectorDB):
102
109
  batch.dynamic = True
103
110
  res = []
104
111
  for i in range(len(metadata)):
105
- res.append(batch.add_data_object(
106
- {self._scalar_field: metadata[i]},
107
- class_name=self.collection_name,
108
- vector=embeddings[i]
109
- ))
112
+ res.append(
113
+ batch.add_data_object(
114
+ {self._scalar_field: metadata[i]},
115
+ class_name=self.collection_name,
116
+ vector=embeddings[i],
117
+ ),
118
+ )
110
119
  insert_count += 1
111
120
  return (len(res), None)
112
121
  except WeaviateBaseError as e:
113
- log.warning(f"Failed to insert data, error: {str(e)}")
122
+ log.warning(f"Failed to insert data, error: {e!s}")
114
123
  return (insert_count, e)
115
124
 
116
125
  def search_embedding(
@@ -125,12 +134,17 @@ class WeaviateCloud(VectorDB):
125
134
  """
126
135
  assert self.client.schema.exists(self.collection_name)
127
136
 
128
- query_obj = self.client.query.get(self.collection_name, [self._scalar_field]).with_additional("distance").with_near_vector({"vector": query}).with_limit(k)
137
+ query_obj = (
138
+ self.client.query.get(self.collection_name, [self._scalar_field])
139
+ .with_additional("distance")
140
+ .with_near_vector({"vector": query})
141
+ .with_limit(k)
142
+ )
129
143
  if filters:
130
144
  where_filter = {
131
145
  "path": "key",
132
146
  "operator": "GreaterThanEqual",
133
- "valueInt": filters.get('id')
147
+ "valueInt": filters.get("id"),
134
148
  }
135
149
  query_obj = query_obj.with_where(where_filter)
136
150
 
@@ -138,7 +152,4 @@ class WeaviateCloud(VectorDB):
138
152
  res = query_obj.do()
139
153
 
140
154
  # Organize results.
141
- ret = [result[self._scalar_field] for result in res["data"]["Get"][self.collection_name]]
142
-
143
- return ret
144
-
155
+ return [result[self._scalar_field] for result in res["data"]["Get"][self.collection_name]]
@@ -1,33 +1,36 @@
1
+ import os
1
2
  from typing import Annotated, Unpack
2
3
 
3
4
  import click
4
- import os
5
5
  from pydantic import SecretStr
6
6
 
7
+ from vectordb_bench.backend.clients import DB
7
8
  from vectordb_bench.cli.cli import (
8
9
  CommonTypedDict,
9
10
  cli,
10
11
  click_parameter_decorators_from_typed_dict,
11
12
  run,
12
13
  )
13
- from vectordb_bench.backend.clients import DB
14
14
 
15
15
 
16
16
  class ZillizTypedDict(CommonTypedDict):
17
17
  uri: Annotated[
18
- str, click.option("--uri", type=str, help="uri connection string", required=True)
18
+ str,
19
+ click.option("--uri", type=str, help="uri connection string", required=True),
19
20
  ]
20
21
  user_name: Annotated[
21
- str, click.option("--user-name", type=str, help="Db username", required=True)
22
+ str,
23
+ click.option("--user-name", type=str, help="Db username", required=True),
22
24
  ]
23
25
  password: Annotated[
24
26
  str,
25
- click.option("--password",
26
- type=str,
27
- help="Zilliz password",
28
- default=lambda: os.environ.get("ZILLIZ_PASSWORD", ""),
29
- show_default="$ZILLIZ_PASSWORD",
30
- ),
27
+ click.option(
28
+ "--password",
29
+ type=str,
30
+ help="Zilliz password",
31
+ default=lambda: os.environ.get("ZILLIZ_PASSWORD", ""),
32
+ show_default="$ZILLIZ_PASSWORD",
33
+ ),
31
34
  ]
32
35
  level: Annotated[
33
36
  str,
@@ -38,7 +41,7 @@ class ZillizTypedDict(CommonTypedDict):
38
41
  @cli.command()
39
42
  @click_parameter_decorators_from_typed_dict(ZillizTypedDict)
40
43
  def ZillizAutoIndex(**parameters: Unpack[ZillizTypedDict]):
41
- from .config import ZillizCloudConfig, AutoIndexConfig
44
+ from .config import AutoIndexConfig, ZillizCloudConfig
42
45
 
43
46
  run(
44
47
  db=DB.ZillizCloud,
@@ -1,7 +1,7 @@
1
1
  from pydantic import SecretStr
2
2
 
3
3
  from ..api import DBCaseConfig, DBConfig
4
- from ..milvus.config import MilvusIndexConfig, IndexType
4
+ from ..milvus.config import IndexType, MilvusIndexConfig
5
5
 
6
6
 
7
7
  class ZillizCloudConfig(DBConfig):
@@ -33,7 +33,5 @@ class AutoIndexConfig(MilvusIndexConfig, DBCaseConfig):
33
33
  "metric_type": self.parse_metric(),
34
34
  "params": {
35
35
  "level": self.level,
36
- }
36
+ },
37
37
  }
38
-
39
-
@@ -1,7 +1,7 @@
1
1
  """Wrapper around the ZillizCloud vector database over VectorDB"""
2
2
 
3
- from ..milvus.milvus import Milvus
4
3
  from ..api import DBCaseConfig
4
+ from ..milvus.milvus import Milvus
5
5
 
6
6
 
7
7
  class ZillizCloud(Milvus):