vectordb-bench 0.0.18__py3-none-any.whl → 0.0.20__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (105) hide show
  1. vectordb_bench/__init__.py +49 -24
  2. vectordb_bench/__main__.py +4 -3
  3. vectordb_bench/backend/assembler.py +12 -13
  4. vectordb_bench/backend/cases.py +56 -46
  5. vectordb_bench/backend/clients/__init__.py +101 -14
  6. vectordb_bench/backend/clients/aliyun_elasticsearch/aliyun_elasticsearch.py +26 -0
  7. vectordb_bench/backend/clients/aliyun_elasticsearch/config.py +18 -0
  8. vectordb_bench/backend/clients/aliyun_opensearch/aliyun_opensearch.py +345 -0
  9. vectordb_bench/backend/clients/aliyun_opensearch/config.py +47 -0
  10. vectordb_bench/backend/clients/alloydb/alloydb.py +58 -80
  11. vectordb_bench/backend/clients/alloydb/cli.py +52 -35
  12. vectordb_bench/backend/clients/alloydb/config.py +30 -30
  13. vectordb_bench/backend/clients/api.py +8 -9
  14. vectordb_bench/backend/clients/aws_opensearch/aws_opensearch.py +46 -47
  15. vectordb_bench/backend/clients/aws_opensearch/cli.py +4 -7
  16. vectordb_bench/backend/clients/aws_opensearch/config.py +13 -9
  17. vectordb_bench/backend/clients/aws_opensearch/run.py +69 -59
  18. vectordb_bench/backend/clients/chroma/chroma.py +38 -36
  19. vectordb_bench/backend/clients/chroma/config.py +4 -2
  20. vectordb_bench/backend/clients/elastic_cloud/config.py +5 -5
  21. vectordb_bench/backend/clients/elastic_cloud/elastic_cloud.py +23 -22
  22. vectordb_bench/backend/clients/memorydb/cli.py +8 -8
  23. vectordb_bench/backend/clients/memorydb/config.py +2 -2
  24. vectordb_bench/backend/clients/memorydb/memorydb.py +65 -53
  25. vectordb_bench/backend/clients/milvus/cli.py +62 -80
  26. vectordb_bench/backend/clients/milvus/config.py +31 -7
  27. vectordb_bench/backend/clients/milvus/milvus.py +23 -26
  28. vectordb_bench/backend/clients/pgdiskann/cli.py +29 -22
  29. vectordb_bench/backend/clients/pgdiskann/config.py +29 -26
  30. vectordb_bench/backend/clients/pgdiskann/pgdiskann.py +55 -73
  31. vectordb_bench/backend/clients/pgvecto_rs/cli.py +9 -11
  32. vectordb_bench/backend/clients/pgvecto_rs/config.py +8 -14
  33. vectordb_bench/backend/clients/pgvecto_rs/pgvecto_rs.py +33 -34
  34. vectordb_bench/backend/clients/pgvector/cli.py +40 -31
  35. vectordb_bench/backend/clients/pgvector/config.py +63 -73
  36. vectordb_bench/backend/clients/pgvector/pgvector.py +97 -98
  37. vectordb_bench/backend/clients/pgvectorscale/cli.py +38 -24
  38. vectordb_bench/backend/clients/pgvectorscale/config.py +14 -15
  39. vectordb_bench/backend/clients/pgvectorscale/pgvectorscale.py +38 -43
  40. vectordb_bench/backend/clients/pinecone/config.py +1 -0
  41. vectordb_bench/backend/clients/pinecone/pinecone.py +14 -21
  42. vectordb_bench/backend/clients/qdrant_cloud/config.py +11 -10
  43. vectordb_bench/backend/clients/qdrant_cloud/qdrant_cloud.py +40 -31
  44. vectordb_bench/backend/clients/redis/cli.py +6 -12
  45. vectordb_bench/backend/clients/redis/config.py +7 -5
  46. vectordb_bench/backend/clients/redis/redis.py +94 -58
  47. vectordb_bench/backend/clients/test/cli.py +1 -2
  48. vectordb_bench/backend/clients/test/config.py +2 -2
  49. vectordb_bench/backend/clients/test/test.py +4 -5
  50. vectordb_bench/backend/clients/weaviate_cloud/cli.py +3 -4
  51. vectordb_bench/backend/clients/weaviate_cloud/config.py +2 -2
  52. vectordb_bench/backend/clients/weaviate_cloud/weaviate_cloud.py +36 -22
  53. vectordb_bench/backend/clients/zilliz_cloud/cli.py +14 -11
  54. vectordb_bench/backend/clients/zilliz_cloud/config.py +2 -4
  55. vectordb_bench/backend/clients/zilliz_cloud/zilliz_cloud.py +1 -1
  56. vectordb_bench/backend/data_source.py +30 -18
  57. vectordb_bench/backend/dataset.py +47 -27
  58. vectordb_bench/backend/result_collector.py +2 -3
  59. vectordb_bench/backend/runner/__init__.py +4 -6
  60. vectordb_bench/backend/runner/mp_runner.py +85 -34
  61. vectordb_bench/backend/runner/rate_runner.py +51 -23
  62. vectordb_bench/backend/runner/read_write_runner.py +140 -46
  63. vectordb_bench/backend/runner/serial_runner.py +99 -50
  64. vectordb_bench/backend/runner/util.py +4 -19
  65. vectordb_bench/backend/task_runner.py +95 -74
  66. vectordb_bench/backend/utils.py +17 -9
  67. vectordb_bench/base.py +0 -1
  68. vectordb_bench/cli/cli.py +65 -60
  69. vectordb_bench/cli/vectordbbench.py +6 -7
  70. vectordb_bench/frontend/components/check_results/charts.py +8 -19
  71. vectordb_bench/frontend/components/check_results/data.py +4 -16
  72. vectordb_bench/frontend/components/check_results/filters.py +8 -16
  73. vectordb_bench/frontend/components/check_results/nav.py +4 -4
  74. vectordb_bench/frontend/components/check_results/priceTable.py +1 -3
  75. vectordb_bench/frontend/components/check_results/stPageConfig.py +2 -1
  76. vectordb_bench/frontend/components/concurrent/charts.py +12 -12
  77. vectordb_bench/frontend/components/custom/displayCustomCase.py +17 -11
  78. vectordb_bench/frontend/components/custom/displaypPrams.py +4 -2
  79. vectordb_bench/frontend/components/custom/getCustomConfig.py +1 -2
  80. vectordb_bench/frontend/components/custom/initStyle.py +1 -1
  81. vectordb_bench/frontend/components/get_results/saveAsImage.py +2 -0
  82. vectordb_bench/frontend/components/run_test/caseSelector.py +3 -9
  83. vectordb_bench/frontend/components/run_test/dbConfigSetting.py +1 -4
  84. vectordb_bench/frontend/components/run_test/dbSelector.py +1 -1
  85. vectordb_bench/frontend/components/run_test/generateTasks.py +8 -8
  86. vectordb_bench/frontend/components/run_test/submitTask.py +14 -18
  87. vectordb_bench/frontend/components/tables/data.py +3 -6
  88. vectordb_bench/frontend/config/dbCaseConfigs.py +108 -83
  89. vectordb_bench/frontend/pages/concurrent.py +3 -5
  90. vectordb_bench/frontend/pages/custom.py +30 -9
  91. vectordb_bench/frontend/pages/quries_per_dollar.py +3 -3
  92. vectordb_bench/frontend/pages/run_test.py +3 -7
  93. vectordb_bench/frontend/utils.py +1 -1
  94. vectordb_bench/frontend/vdb_benchmark.py +4 -6
  95. vectordb_bench/interface.py +56 -26
  96. vectordb_bench/log_util.py +59 -64
  97. vectordb_bench/metric.py +10 -11
  98. vectordb_bench/models.py +26 -43
  99. {vectordb_bench-0.0.18.dist-info → vectordb_bench-0.0.20.dist-info}/METADATA +34 -42
  100. vectordb_bench-0.0.20.dist-info/RECORD +135 -0
  101. {vectordb_bench-0.0.18.dist-info → vectordb_bench-0.0.20.dist-info}/WHEEL +1 -1
  102. vectordb_bench-0.0.18.dist-info/RECORD +0 -131
  103. {vectordb_bench-0.0.18.dist-info → vectordb_bench-0.0.20.dist-info}/LICENSE +0 -0
  104. {vectordb_bench-0.0.18.dist-info → vectordb_bench-0.0.20.dist-info}/entry_points.txt +0 -0
  105. {vectordb_bench-0.0.18.dist-info → vectordb_bench-0.0.20.dist-info}/top_level.txt +0 -0
@@ -1,37 +1,40 @@
1
1
  import logging
2
2
  from contextlib import contextmanager
3
- from typing import Any, Type
4
- from ..api import VectorDB, DBConfig, DBCaseConfig, EmptyDBCaseConfig, IndexType
5
- from .config import RedisConfig
3
+ from typing import Any
4
+
5
+ import numpy as np
6
6
  import redis
7
- from redis.commands.search.field import TagField, VectorField, NumericField
7
+ from redis.commands.search.field import NumericField, TagField, VectorField
8
8
  from redis.commands.search.indexDefinition import IndexDefinition, IndexType
9
9
  from redis.commands.search.query import Query
10
- import numpy as np
11
10
 
11
+ from ..api import DBCaseConfig, VectorDB
12
12
 
13
13
  log = logging.getLogger(__name__)
14
- INDEX_NAME = "index" # Vector Index Name
14
+ INDEX_NAME = "index" # Vector Index Name
15
+
15
16
 
16
17
  class Redis(VectorDB):
17
18
  def __init__(
18
- self,
19
- dim: int,
20
- db_config: dict,
21
- db_case_config: DBCaseConfig,
22
- drop_old: bool = False,
23
-
24
- **kwargs
25
- ):
26
-
19
+ self,
20
+ dim: int,
21
+ db_config: dict,
22
+ db_case_config: DBCaseConfig,
23
+ drop_old: bool = False,
24
+ **kwargs,
25
+ ):
27
26
  self.db_config = db_config
28
27
  self.case_config = db_case_config
29
28
  self.collection_name = INDEX_NAME
30
29
 
31
30
  # Create a redis connection, if db has password configured, add it to the connection here and in init():
32
- password=self.db_config["password"]
33
- conn = redis.Redis(host=self.db_config["host"], port=self.db_config["port"], password=password, db=0)
34
-
31
+ password = self.db_config["password"]
32
+ conn = redis.Redis(
33
+ host=self.db_config["host"],
34
+ port=self.db_config["port"],
35
+ password=password,
36
+ db=0,
37
+ )
35
38
 
36
39
  if drop_old:
37
40
  try:
@@ -40,7 +43,7 @@ class Redis(VectorDB):
40
43
  except redis.exceptions.ResponseError:
41
44
  drop_old = False
42
45
  log.info(f"Redis client drop_old collection: {self.collection_name}")
43
-
46
+
44
47
  self.make_index(dim, conn)
45
48
  conn.close()
46
49
  conn = None
@@ -49,16 +52,20 @@ class Redis(VectorDB):
49
52
  try:
50
53
  # check to see if index exists
51
54
  conn.ft(INDEX_NAME).info()
52
- except:
55
+ except Exception:
53
56
  schema = (
54
- TagField("id"),
55
- NumericField("metadata"),
56
- VectorField("vector", # Vector Field Name
57
- "HNSW", { # Vector Index Type: FLAT or HNSW
58
- "TYPE": "FLOAT32", # FLOAT32 or FLOAT64
59
- "DIM": vector_dimensions, # Number of Vector Dimensions
60
- "DISTANCE_METRIC": "COSINE", # Vector Search Distance Metric
61
- }
57
+ TagField("id"),
58
+ NumericField("metadata"),
59
+ VectorField(
60
+ "vector", # Vector Field Name
61
+ "HNSW", # Vector Index Type: FLAT or HNSW
62
+ {
63
+ "TYPE": "FLOAT32", # FLOAT32 or FLOAT64
64
+ "DIM": vector_dimensions, # Number of Vector Dimensions
65
+ "DISTANCE_METRIC": "COSINE", # Vector Search Distance Metric
66
+ "M": self.case_config.index_param()["params"]["M"],
67
+ "EF_CONSTRUCTION": self.case_config.index_param()["params"]["efConstruction"],
68
+ },
62
69
  ),
63
70
  )
64
71
 
@@ -69,22 +76,24 @@ class Redis(VectorDB):
69
76
 
70
77
  @contextmanager
71
78
  def init(self) -> None:
72
- """ create and destory connections to database.
79
+ """create and destory connections to database.
73
80
 
74
81
  Examples:
75
82
  >>> with self.init():
76
83
  >>> self.insert_embeddings()
77
84
  """
78
- self.conn = redis.Redis(host=self.db_config["host"], port=self.db_config["port"], password=self.db_config["password"], db=0)
85
+ self.conn = redis.Redis(
86
+ host=self.db_config["host"],
87
+ port=self.db_config["port"],
88
+ password=self.db_config["password"],
89
+ db=0,
90
+ )
79
91
  yield
80
92
  self.conn.close()
81
93
  self.conn = None
82
94
 
83
-
84
95
  def ready_to_search(self) -> bool:
85
96
  """Check if the database is ready to search."""
86
- pass
87
-
88
97
 
89
98
  def ready_to_load(self) -> bool:
90
99
  pass
@@ -92,38 +101,40 @@ class Redis(VectorDB):
92
101
  def optimize(self) -> None:
93
102
  pass
94
103
 
95
-
96
104
  def insert_embeddings(
97
105
  self,
98
106
  embeddings: list[list[float]],
99
107
  metadata: list[int],
100
108
  **kwargs: Any,
101
- ) -> (int, Exception):
109
+ ) -> tuple[int, Exception]:
102
110
  """Insert embeddings into the database.
103
111
  Should call self.init() first.
104
112
  """
105
113
 
106
- batch_size = 1000 # Adjust this as needed, but don't make too big
114
+ batch_size = 1000 # Adjust this as needed, but don't make too big
107
115
  try:
108
116
  with self.conn.pipeline(transaction=False) as pipe:
109
117
  for i, embedding in enumerate(embeddings):
110
- embedding = np.array(embedding).astype(np.float32)
111
- pipe.hset(metadata[i], mapping = {
112
- "id": str(metadata[i]),
113
- "metadata": metadata[i],
114
- "vector": embedding.tobytes(),
115
- })
118
+ ndarr_emb = np.array(embedding).astype(np.float32)
119
+ pipe.hset(
120
+ metadata[i],
121
+ mapping={
122
+ "id": str(metadata[i]),
123
+ "metadata": metadata[i],
124
+ "vector": ndarr_emb.tobytes(),
125
+ },
126
+ )
116
127
  # Execute the pipe so we don't keep too much in memory at once
117
128
  if i % batch_size == 0:
118
- res = pipe.execute()
129
+ _ = pipe.execute()
119
130
 
120
- res = pipe.execute()
131
+ _ = pipe.execute()
121
132
  result_len = i + 1
122
133
  except Exception as e:
123
134
  return 0, e
124
-
135
+
125
136
  return result_len, None
126
-
137
+
127
138
  def search_embedding(
128
139
  self,
129
140
  query: list[float],
@@ -131,28 +142,53 @@ class Redis(VectorDB):
131
142
  filters: dict | None = None,
132
143
  timeout: int | None = None,
133
144
  **kwargs: Any,
134
- ) -> (list[int]):
145
+ ) -> list[int]:
135
146
  assert self.conn is not None
136
-
147
+
137
148
  query_vector = np.array(query).astype(np.float32).tobytes()
138
- query_obj = Query(f"*=>[KNN {k} @vector $vec as score]").sort_by("score").return_fields("id", "score").paging(0, k).dialect(2)
149
+ ef_runtime = self.case_config.search_param()["params"]["ef"]
150
+ query_obj = (
151
+ Query(f"*=>[KNN {k} @vector $vec EF_RUNTIME {ef_runtime} as score]")
152
+ .sort_by("score")
153
+ .return_fields("id", "score")
154
+ .paging(0, k)
155
+ .dialect(2)
156
+ )
139
157
  query_params = {"vec": query_vector}
140
-
158
+
141
159
  if filters:
142
160
  # benchmark test filters of format: {'metadata': '>=10000', 'id': 10000}
143
161
  # gets exact match for id, and range for metadata if they exist in filters
144
162
  id_value = filters.get("id")
145
163
  metadata_value = filters.get("metadata")
146
164
  if id_value and metadata_value:
147
- query_obj = Query(f"(@metadata:[{metadata_value} +inf] @id:{ {id_value} })=>[KNN {k} @vector $vec as score]").sort_by("score").return_fields("id", "score").paging(0, k).dialect(2)
165
+ query_obj = (
166
+ Query(
167
+ f"(@metadata:[{metadata_value} +inf] @id:{ {id_value} })=>[KNN {k} ",
168
+ f"@vector $vec EF_RUNTIME {ef_runtime} as score]",
169
+ )
170
+ .sort_by("score")
171
+ .return_fields("id", "score")
172
+ .paging(0, k)
173
+ .dialect(2)
174
+ )
148
175
  elif id_value:
149
- #gets exact match for id
150
- query_obj = Query(f"@id:{ {id_value} }=>[KNN {k} @vector $vec as score]").sort_by("score").return_fields("id", "score").paging(0, k).dialect(2)
151
- else: #metadata only case, greater than or equal to metadata value
152
- query_obj = Query(f"@metadata:[{metadata_value} +inf]=>[KNN {k} @vector $vec as score]").sort_by("score").return_fields("id", "score").paging(0, k).dialect(2)
176
+ # gets exact match for id
177
+ query_obj = (
178
+ Query(f"@id:{ {id_value} }=>[KNN {k} @vector $vec EF_RUNTIME {ef_runtime} as score]")
179
+ .sort_by("score")
180
+ .return_fields("id", "score")
181
+ .paging(0, k)
182
+ .dialect(2)
183
+ )
184
+ else: # metadata only case, greater than or equal to metadata value
185
+ query_obj = (
186
+ Query(f"@metadata:[{metadata_value} +inf]=>[KNN {k} @vector $vec EF_RUNTIME {ef_runtime} as score]")
187
+ .sort_by("score")
188
+ .return_fields("id", "score")
189
+ .paging(0, k)
190
+ .dialect(2)
191
+ )
153
192
  res = self.conn.ft(INDEX_NAME).search(query_obj, query_params)
154
193
  # doc in res of format {'id': '9831', 'payload': None, 'score': '1.19209289551e-07'}
155
194
  return [int(doc["id"]) for doc in res.docs]
156
-
157
-
158
-
@@ -10,8 +10,7 @@ from .. import DB
10
10
  from ..test.config import TestConfig, TestIndexConfig
11
11
 
12
12
 
13
- class TestTypedDict(CommonTypedDict):
14
- ...
13
+ class TestTypedDict(CommonTypedDict): ...
15
14
 
16
15
 
17
16
  @cli.command()
@@ -1,6 +1,6 @@
1
- from pydantic import BaseModel, SecretStr
1
+ from pydantic import BaseModel
2
2
 
3
- from ..api import DBCaseConfig, DBConfig, IndexType, MetricType
3
+ from ..api import DBCaseConfig, DBConfig, MetricType
4
4
 
5
5
 
6
6
  class TestConfig(DBConfig):
@@ -1,6 +1,7 @@
1
1
  import logging
2
+ from collections.abc import Generator
2
3
  from contextlib import contextmanager
3
- from typing import Any, Generator, Optional, Tuple
4
+ from typing import Any
4
5
 
5
6
  from ..api import DBCaseConfig, VectorDB
6
7
 
@@ -43,11 +44,10 @@ class Test(VectorDB):
43
44
  embeddings: list[list[float]],
44
45
  metadata: list[int],
45
46
  **kwargs: Any,
46
- ) -> Tuple[int, Optional[Exception]]:
47
+ ) -> tuple[int, Exception | None]:
47
48
  """Insert embeddings into the database.
48
49
  Should call self.init() first.
49
50
  """
50
- raise RuntimeError("Not implemented")
51
51
  return len(metadata), None
52
52
 
53
53
  def search_embedding(
@@ -58,5 +58,4 @@ class Test(VectorDB):
58
58
  timeout: int | None = None,
59
59
  **kwargs: Any,
60
60
  ) -> list[int]:
61
- raise NotImplementedError
62
- return [i for i in range(k)]
61
+ return list(range(k))
@@ -14,7 +14,8 @@ from .. import DB
14
14
 
15
15
  class WeaviateTypedDict(CommonTypedDict):
16
16
  api_key: Annotated[
17
- str, click.option("--api-key", type=str, help="Weaviate api key", required=True)
17
+ str,
18
+ click.option("--api-key", type=str, help="Weaviate api key", required=True),
18
19
  ]
19
20
  url: Annotated[
20
21
  str,
@@ -34,8 +35,6 @@ def Weaviate(**parameters: Unpack[WeaviateTypedDict]):
34
35
  api_key=SecretStr(parameters["api_key"]),
35
36
  url=SecretStr(parameters["url"]),
36
37
  ),
37
- db_case_config=WeaviateIndexConfig(
38
- ef=256, efConstruction=256, maxConnections=16
39
- ),
38
+ db_case_config=WeaviateIndexConfig(ef=256, efConstruction=256, maxConnections=16),
40
39
  **parameters,
41
40
  )
@@ -1,6 +1,6 @@
1
1
  from pydantic import BaseModel, SecretStr
2
2
 
3
- from ..api import DBConfig, DBCaseConfig, MetricType
3
+ from ..api import DBCaseConfig, DBConfig, MetricType
4
4
 
5
5
 
6
6
  class WeaviateConfig(DBConfig):
@@ -23,7 +23,7 @@ class WeaviateIndexConfig(BaseModel, DBCaseConfig):
23
23
  def parse_metric(self) -> str:
24
24
  if self.metric_type == MetricType.L2:
25
25
  return "l2-squared"
26
- elif self.metric_type == MetricType.IP:
26
+ if self.metric_type == MetricType.IP:
27
27
  return "dot"
28
28
  return "cosine"
29
29
 
@@ -1,13 +1,13 @@
1
1
  """Wrapper around the Weaviate vector database over VectorDB"""
2
2
 
3
3
  import logging
4
- from typing import Iterable
4
+ from collections.abc import Iterable
5
5
  from contextlib import contextmanager
6
6
 
7
7
  import weaviate
8
8
  from weaviate.exceptions import WeaviateBaseError
9
9
 
10
- from ..api import VectorDB, DBCaseConfig
10
+ from ..api import DBCaseConfig, VectorDB
11
11
 
12
12
  log = logging.getLogger(__name__)
13
13
 
@@ -23,7 +23,13 @@ class WeaviateCloud(VectorDB):
23
23
  **kwargs,
24
24
  ):
25
25
  """Initialize wrapper around the weaviate vector database."""
26
- db_config.update({"auth_client_secret": weaviate.AuthApiKey(api_key=db_config.get("auth_client_secret"))})
26
+ db_config.update(
27
+ {
28
+ "auth_client_secret": weaviate.AuthApiKey(
29
+ api_key=db_config.get("auth_client_secret"),
30
+ ),
31
+ },
32
+ )
27
33
  self.db_config = db_config
28
34
  self.case_config = db_case_config
29
35
  self.collection_name = collection_name
@@ -33,6 +39,7 @@ class WeaviateCloud(VectorDB):
33
39
  self._index_name = "vector_idx"
34
40
 
35
41
  from weaviate import Client
42
+
36
43
  client = Client(**db_config)
37
44
  if drop_old:
38
45
  try:
@@ -40,7 +47,7 @@ class WeaviateCloud(VectorDB):
40
47
  log.info(f"weaviate client drop_old collection: {self.collection_name}")
41
48
  client.schema.delete_class(self.collection_name)
42
49
  except WeaviateBaseError as e:
43
- log.warning(f"Failed to drop collection: {self.collection_name} error: {str(e)}")
50
+ log.warning(f"Failed to drop collection: {self.collection_name} error: {e!s}")
44
51
  raise e from None
45
52
  self._create_collection(client)
46
53
  client = None
@@ -54,20 +61,23 @@ class WeaviateCloud(VectorDB):
54
61
  >>> self.search_embedding()
55
62
  """
56
63
  from weaviate import Client
64
+
57
65
  self.client = Client(**self.db_config)
58
66
  yield
59
67
  self.client = None
60
- del(self.client)
68
+ del self.client
61
69
 
62
70
  def ready_to_load(self):
63
71
  """Should call insert first, do nothing"""
64
- pass
65
72
 
66
73
  def optimize(self):
67
74
  assert self.client.schema.exists(self.collection_name)
68
- self.client.schema.update_config(self.collection_name, {"vectorIndexConfig": self.case_config.search_param() } )
75
+ self.client.schema.update_config(
76
+ self.collection_name,
77
+ {"vectorIndexConfig": self.case_config.search_param()},
78
+ )
69
79
 
70
- def _create_collection(self, client):
80
+ def _create_collection(self, client: weaviate.Client) -> None:
71
81
  if not client.schema.exists(self.collection_name):
72
82
  log.info(f"Create collection: {self.collection_name}")
73
83
  class_obj = {
@@ -78,13 +88,13 @@ class WeaviateCloud(VectorDB):
78
88
  "dataType": ["int"],
79
89
  "name": self._scalar_field,
80
90
  },
81
- ]
91
+ ],
82
92
  }
83
93
  class_obj["vectorIndexConfig"] = self.case_config.index_param()
84
94
  try:
85
95
  client.schema.create_class(class_obj)
86
96
  except WeaviateBaseError as e:
87
- log.warning(f"Failed to create collection: {self.collection_name} error: {str(e)}")
97
+ log.warning(f"Failed to create collection: {self.collection_name} error: {e!s}")
88
98
  raise e from None
89
99
 
90
100
  def insert_embeddings(
@@ -102,15 +112,17 @@ class WeaviateCloud(VectorDB):
102
112
  batch.dynamic = True
103
113
  res = []
104
114
  for i in range(len(metadata)):
105
- res.append(batch.add_data_object(
106
- {self._scalar_field: metadata[i]},
107
- class_name=self.collection_name,
108
- vector=embeddings[i]
109
- ))
115
+ res.append(
116
+ batch.add_data_object(
117
+ {self._scalar_field: metadata[i]},
118
+ class_name=self.collection_name,
119
+ vector=embeddings[i],
120
+ ),
121
+ )
110
122
  insert_count += 1
111
123
  return (len(res), None)
112
124
  except WeaviateBaseError as e:
113
- log.warning(f"Failed to insert data, error: {str(e)}")
125
+ log.warning(f"Failed to insert data, error: {e!s}")
114
126
  return (insert_count, e)
115
127
 
116
128
  def search_embedding(
@@ -125,12 +137,17 @@ class WeaviateCloud(VectorDB):
125
137
  """
126
138
  assert self.client.schema.exists(self.collection_name)
127
139
 
128
- query_obj = self.client.query.get(self.collection_name, [self._scalar_field]).with_additional("distance").with_near_vector({"vector": query}).with_limit(k)
140
+ query_obj = (
141
+ self.client.query.get(self.collection_name, [self._scalar_field])
142
+ .with_additional("distance")
143
+ .with_near_vector({"vector": query})
144
+ .with_limit(k)
145
+ )
129
146
  if filters:
130
147
  where_filter = {
131
148
  "path": "key",
132
149
  "operator": "GreaterThanEqual",
133
- "valueInt": filters.get('id')
150
+ "valueInt": filters.get("id"),
134
151
  }
135
152
  query_obj = query_obj.with_where(where_filter)
136
153
 
@@ -138,7 +155,4 @@ class WeaviateCloud(VectorDB):
138
155
  res = query_obj.do()
139
156
 
140
157
  # Organize results.
141
- ret = [result[self._scalar_field] for result in res["data"]["Get"][self.collection_name]]
142
-
143
- return ret
144
-
158
+ return [result[self._scalar_field] for result in res["data"]["Get"][self.collection_name]]
@@ -1,33 +1,36 @@
1
+ import os
1
2
  from typing import Annotated, Unpack
2
3
 
3
4
  import click
4
- import os
5
5
  from pydantic import SecretStr
6
6
 
7
+ from vectordb_bench.backend.clients import DB
7
8
  from vectordb_bench.cli.cli import (
8
9
  CommonTypedDict,
9
10
  cli,
10
11
  click_parameter_decorators_from_typed_dict,
11
12
  run,
12
13
  )
13
- from vectordb_bench.backend.clients import DB
14
14
 
15
15
 
16
16
  class ZillizTypedDict(CommonTypedDict):
17
17
  uri: Annotated[
18
- str, click.option("--uri", type=str, help="uri connection string", required=True)
18
+ str,
19
+ click.option("--uri", type=str, help="uri connection string", required=True),
19
20
  ]
20
21
  user_name: Annotated[
21
- str, click.option("--user-name", type=str, help="Db username", required=True)
22
+ str,
23
+ click.option("--user-name", type=str, help="Db username", required=True),
22
24
  ]
23
25
  password: Annotated[
24
26
  str,
25
- click.option("--password",
26
- type=str,
27
- help="Zilliz password",
28
- default=lambda: os.environ.get("ZILLIZ_PASSWORD", ""),
29
- show_default="$ZILLIZ_PASSWORD",
30
- ),
27
+ click.option(
28
+ "--password",
29
+ type=str,
30
+ help="Zilliz password",
31
+ default=lambda: os.environ.get("ZILLIZ_PASSWORD", ""),
32
+ show_default="$ZILLIZ_PASSWORD",
33
+ ),
31
34
  ]
32
35
  level: Annotated[
33
36
  str,
@@ -38,7 +41,7 @@ class ZillizTypedDict(CommonTypedDict):
38
41
  @cli.command()
39
42
  @click_parameter_decorators_from_typed_dict(ZillizTypedDict)
40
43
  def ZillizAutoIndex(**parameters: Unpack[ZillizTypedDict]):
41
- from .config import ZillizCloudConfig, AutoIndexConfig
44
+ from .config import AutoIndexConfig, ZillizCloudConfig
42
45
 
43
46
  run(
44
47
  db=DB.ZillizCloud,
@@ -1,7 +1,7 @@
1
1
  from pydantic import SecretStr
2
2
 
3
3
  from ..api import DBCaseConfig, DBConfig
4
- from ..milvus.config import MilvusIndexConfig, IndexType
4
+ from ..milvus.config import IndexType, MilvusIndexConfig
5
5
 
6
6
 
7
7
  class ZillizCloudConfig(DBConfig):
@@ -33,7 +33,5 @@ class AutoIndexConfig(MilvusIndexConfig, DBCaseConfig):
33
33
  "metric_type": self.parse_metric(),
34
34
  "params": {
35
35
  "level": self.level,
36
- }
36
+ },
37
37
  }
38
-
39
-
@@ -1,7 +1,7 @@
1
1
  """Wrapper around the ZillizCloud vector database over VectorDB"""
2
2
 
3
- from ..milvus.milvus import Milvus
4
3
  from ..api import DBCaseConfig
4
+ from ..milvus.milvus import Milvus
5
5
 
6
6
 
7
7
  class ZillizCloud(Milvus):