vectordb-bench 0.0.19__py3-none-any.whl → 0.0.21__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vectordb_bench/__init__.py +49 -24
- vectordb_bench/__main__.py +4 -3
- vectordb_bench/backend/assembler.py +12 -13
- vectordb_bench/backend/cases.py +55 -45
- vectordb_bench/backend/clients/__init__.py +85 -14
- vectordb_bench/backend/clients/aliyun_elasticsearch/aliyun_elasticsearch.py +1 -2
- vectordb_bench/backend/clients/aliyun_elasticsearch/config.py +3 -4
- vectordb_bench/backend/clients/aliyun_opensearch/aliyun_opensearch.py +112 -77
- vectordb_bench/backend/clients/aliyun_opensearch/config.py +6 -7
- vectordb_bench/backend/clients/alloydb/alloydb.py +59 -84
- vectordb_bench/backend/clients/alloydb/cli.py +51 -34
- vectordb_bench/backend/clients/alloydb/config.py +30 -30
- vectordb_bench/backend/clients/api.py +13 -24
- vectordb_bench/backend/clients/aws_opensearch/aws_opensearch.py +50 -54
- vectordb_bench/backend/clients/aws_opensearch/cli.py +4 -7
- vectordb_bench/backend/clients/aws_opensearch/config.py +13 -9
- vectordb_bench/backend/clients/aws_opensearch/run.py +69 -59
- vectordb_bench/backend/clients/chroma/chroma.py +39 -40
- vectordb_bench/backend/clients/chroma/config.py +4 -2
- vectordb_bench/backend/clients/elastic_cloud/config.py +5 -5
- vectordb_bench/backend/clients/elastic_cloud/elastic_cloud.py +24 -26
- vectordb_bench/backend/clients/memorydb/cli.py +8 -8
- vectordb_bench/backend/clients/memorydb/config.py +2 -2
- vectordb_bench/backend/clients/memorydb/memorydb.py +67 -58
- vectordb_bench/backend/clients/milvus/cli.py +41 -83
- vectordb_bench/backend/clients/milvus/config.py +18 -8
- vectordb_bench/backend/clients/milvus/milvus.py +19 -39
- vectordb_bench/backend/clients/pgdiskann/cli.py +29 -22
- vectordb_bench/backend/clients/pgdiskann/config.py +29 -26
- vectordb_bench/backend/clients/pgdiskann/pgdiskann.py +56 -77
- vectordb_bench/backend/clients/pgvecto_rs/cli.py +9 -11
- vectordb_bench/backend/clients/pgvecto_rs/config.py +8 -14
- vectordb_bench/backend/clients/pgvecto_rs/pgvecto_rs.py +34 -43
- vectordb_bench/backend/clients/pgvector/cli.py +40 -31
- vectordb_bench/backend/clients/pgvector/config.py +63 -73
- vectordb_bench/backend/clients/pgvector/pgvector.py +98 -104
- vectordb_bench/backend/clients/pgvectorscale/cli.py +38 -24
- vectordb_bench/backend/clients/pgvectorscale/config.py +14 -15
- vectordb_bench/backend/clients/pgvectorscale/pgvectorscale.py +39 -49
- vectordb_bench/backend/clients/pinecone/config.py +1 -0
- vectordb_bench/backend/clients/pinecone/pinecone.py +15 -25
- vectordb_bench/backend/clients/qdrant_cloud/config.py +11 -10
- vectordb_bench/backend/clients/qdrant_cloud/qdrant_cloud.py +41 -35
- vectordb_bench/backend/clients/redis/cli.py +6 -12
- vectordb_bench/backend/clients/redis/config.py +7 -5
- vectordb_bench/backend/clients/redis/redis.py +95 -62
- vectordb_bench/backend/clients/test/cli.py +2 -3
- vectordb_bench/backend/clients/test/config.py +2 -2
- vectordb_bench/backend/clients/test/test.py +5 -9
- vectordb_bench/backend/clients/weaviate_cloud/cli.py +3 -4
- vectordb_bench/backend/clients/weaviate_cloud/config.py +2 -2
- vectordb_bench/backend/clients/weaviate_cloud/weaviate_cloud.py +37 -26
- vectordb_bench/backend/clients/zilliz_cloud/cli.py +14 -11
- vectordb_bench/backend/clients/zilliz_cloud/config.py +2 -4
- vectordb_bench/backend/clients/zilliz_cloud/zilliz_cloud.py +1 -1
- vectordb_bench/backend/data_source.py +18 -14
- vectordb_bench/backend/dataset.py +47 -27
- vectordb_bench/backend/result_collector.py +2 -3
- vectordb_bench/backend/runner/__init__.py +4 -6
- vectordb_bench/backend/runner/mp_runner.py +56 -23
- vectordb_bench/backend/runner/rate_runner.py +30 -19
- vectordb_bench/backend/runner/read_write_runner.py +46 -22
- vectordb_bench/backend/runner/serial_runner.py +81 -46
- vectordb_bench/backend/runner/util.py +4 -3
- vectordb_bench/backend/task_runner.py +92 -92
- vectordb_bench/backend/utils.py +17 -10
- vectordb_bench/base.py +0 -1
- vectordb_bench/cli/cli.py +65 -60
- vectordb_bench/cli/vectordbbench.py +6 -7
- vectordb_bench/frontend/components/check_results/charts.py +8 -19
- vectordb_bench/frontend/components/check_results/data.py +4 -16
- vectordb_bench/frontend/components/check_results/filters.py +8 -16
- vectordb_bench/frontend/components/check_results/nav.py +4 -4
- vectordb_bench/frontend/components/check_results/priceTable.py +1 -3
- vectordb_bench/frontend/components/check_results/stPageConfig.py +2 -1
- vectordb_bench/frontend/components/concurrent/charts.py +12 -12
- vectordb_bench/frontend/components/custom/displayCustomCase.py +17 -11
- vectordb_bench/frontend/components/custom/displaypPrams.py +4 -2
- vectordb_bench/frontend/components/custom/getCustomConfig.py +1 -2
- vectordb_bench/frontend/components/custom/initStyle.py +1 -1
- vectordb_bench/frontend/components/get_results/saveAsImage.py +2 -0
- vectordb_bench/frontend/components/run_test/caseSelector.py +3 -9
- vectordb_bench/frontend/components/run_test/dbConfigSetting.py +1 -4
- vectordb_bench/frontend/components/run_test/dbSelector.py +1 -1
- vectordb_bench/frontend/components/run_test/generateTasks.py +8 -8
- vectordb_bench/frontend/components/run_test/submitTask.py +14 -18
- vectordb_bench/frontend/components/tables/data.py +3 -6
- vectordb_bench/frontend/config/dbCaseConfigs.py +51 -84
- vectordb_bench/frontend/pages/concurrent.py +3 -5
- vectordb_bench/frontend/pages/custom.py +30 -9
- vectordb_bench/frontend/pages/quries_per_dollar.py +3 -3
- vectordb_bench/frontend/pages/run_test.py +3 -7
- vectordb_bench/frontend/utils.py +1 -1
- vectordb_bench/frontend/vdb_benchmark.py +4 -6
- vectordb_bench/interface.py +45 -24
- vectordb_bench/log_util.py +59 -64
- vectordb_bench/metric.py +10 -11
- vectordb_bench/models.py +26 -43
- {vectordb_bench-0.0.19.dist-info → vectordb_bench-0.0.21.dist-info}/METADATA +22 -15
- vectordb_bench-0.0.21.dist-info/RECORD +135 -0
- {vectordb_bench-0.0.19.dist-info → vectordb_bench-0.0.21.dist-info}/WHEEL +1 -1
- vectordb_bench-0.0.19.dist-info/RECORD +0 -135
- {vectordb_bench-0.0.19.dist-info → vectordb_bench-0.0.21.dist-info}/LICENSE +0 -0
- {vectordb_bench-0.0.19.dist-info → vectordb_bench-0.0.21.dist-info}/entry_points.txt +0 -0
- {vectordb_bench-0.0.19.dist-info → vectordb_bench-0.0.21.dist-info}/top_level.txt +0 -0
@@ -1,7 +1,9 @@
|
|
1
1
|
from abc import abstractmethod
|
2
|
-
from
|
2
|
+
from collections.abc import Mapping, Sequence
|
3
|
+
from typing import Any, LiteralString, TypedDict
|
4
|
+
|
3
5
|
from pydantic import BaseModel, SecretStr
|
4
|
-
|
6
|
+
|
5
7
|
from ..api import DBCaseConfig, DBConfig, IndexType, MetricType
|
6
8
|
|
7
9
|
POSTGRE_URL_PLACEHOLDER = "postgresql://%s:%s@%s/%s"
|
@@ -9,7 +11,7 @@ POSTGRE_URL_PLACEHOLDER = "postgresql://%s:%s@%s/%s"
|
|
9
11
|
|
10
12
|
class PgDiskANNConfigDict(TypedDict):
|
11
13
|
"""These keys will be directly used as kwargs in psycopg connection string,
|
12
|
-
|
14
|
+
so the names must match exactly psycopg API"""
|
13
15
|
|
14
16
|
user: str
|
15
17
|
password: str
|
@@ -41,44 +43,43 @@ class PgDiskANNIndexConfig(BaseModel, DBCaseConfig):
|
|
41
43
|
metric_type: MetricType | None = None
|
42
44
|
create_index_before_load: bool = False
|
43
45
|
create_index_after_load: bool = True
|
44
|
-
maintenance_work_mem:
|
45
|
-
max_parallel_workers:
|
46
|
+
maintenance_work_mem: str | None
|
47
|
+
max_parallel_workers: int | None
|
46
48
|
|
47
49
|
def parse_metric(self) -> str:
|
48
50
|
if self.metric_type == MetricType.L2:
|
49
51
|
return "vector_l2_ops"
|
50
|
-
|
52
|
+
if self.metric_type == MetricType.IP:
|
51
53
|
return "vector_ip_ops"
|
52
54
|
return "vector_cosine_ops"
|
53
55
|
|
54
56
|
def parse_metric_fun_op(self) -> LiteralString:
|
55
57
|
if self.metric_type == MetricType.L2:
|
56
58
|
return "<->"
|
57
|
-
|
59
|
+
if self.metric_type == MetricType.IP:
|
58
60
|
return "<#>"
|
59
61
|
return "<=>"
|
60
62
|
|
61
63
|
def parse_metric_fun_str(self) -> str:
|
62
64
|
if self.metric_type == MetricType.L2:
|
63
65
|
return "l2_distance"
|
64
|
-
|
66
|
+
if self.metric_type == MetricType.IP:
|
65
67
|
return "max_inner_product"
|
66
68
|
return "cosine_distance"
|
67
|
-
|
69
|
+
|
68
70
|
@abstractmethod
|
69
|
-
def index_param(self) -> dict:
|
70
|
-
...
|
71
|
+
def index_param(self) -> dict: ...
|
71
72
|
|
72
73
|
@abstractmethod
|
73
|
-
def search_param(self) -> dict:
|
74
|
-
...
|
74
|
+
def search_param(self) -> dict: ...
|
75
75
|
|
76
76
|
@abstractmethod
|
77
|
-
def session_param(self) -> dict:
|
78
|
-
...
|
77
|
+
def session_param(self) -> dict: ...
|
79
78
|
|
80
79
|
@staticmethod
|
81
|
-
def _optionally_build_with_options(
|
80
|
+
def _optionally_build_with_options(
|
81
|
+
with_options: Mapping[str, Any],
|
82
|
+
) -> Sequence[dict[str, Any]]:
|
82
83
|
"""Walk through mappings, creating a List of {key1 = value} pairs. That will be used to build a where clause"""
|
83
84
|
options = []
|
84
85
|
for option_name, value in with_options.items():
|
@@ -87,35 +88,36 @@ class PgDiskANNIndexConfig(BaseModel, DBCaseConfig):
|
|
87
88
|
{
|
88
89
|
"option_name": option_name,
|
89
90
|
"val": str(value),
|
90
|
-
}
|
91
|
+
},
|
91
92
|
)
|
92
93
|
return options
|
93
94
|
|
94
95
|
@staticmethod
|
95
96
|
def _optionally_build_set_options(
|
96
|
-
set_mapping: Mapping[str, Any]
|
97
|
+
set_mapping: Mapping[str, Any],
|
97
98
|
) -> Sequence[dict[str, Any]]:
|
98
99
|
"""Walk through options, creating 'SET 'key1 = "value1";' list"""
|
99
100
|
session_options = []
|
100
101
|
for setting_name, value in set_mapping.items():
|
101
102
|
if value:
|
102
103
|
session_options.append(
|
103
|
-
{
|
104
|
+
{
|
105
|
+
"parameter": {
|
104
106
|
"setting_name": setting_name,
|
105
107
|
"val": str(value),
|
106
108
|
},
|
107
|
-
}
|
109
|
+
},
|
108
110
|
)
|
109
111
|
return session_options
|
110
|
-
|
112
|
+
|
111
113
|
|
112
114
|
class PgDiskANNImplConfig(PgDiskANNIndexConfig):
|
113
115
|
index: IndexType = IndexType.DISKANN
|
114
116
|
max_neighbors: int | None
|
115
117
|
l_value_ib: int | None
|
116
118
|
l_value_is: float | None
|
117
|
-
maintenance_work_mem:
|
118
|
-
max_parallel_workers:
|
119
|
+
maintenance_work_mem: str | None = None
|
120
|
+
max_parallel_workers: int | None = None
|
119
121
|
|
120
122
|
def index_param(self) -> dict:
|
121
123
|
return {
|
@@ -128,18 +130,19 @@ class PgDiskANNImplConfig(PgDiskANNIndexConfig):
|
|
128
130
|
"maintenance_work_mem": self.maintenance_work_mem,
|
129
131
|
"max_parallel_workers": self.max_parallel_workers,
|
130
132
|
}
|
131
|
-
|
133
|
+
|
132
134
|
def search_param(self) -> dict:
|
133
135
|
return {
|
134
136
|
"metric": self.parse_metric(),
|
135
137
|
"metric_fun_op": self.parse_metric_fun_op(),
|
136
138
|
}
|
137
|
-
|
139
|
+
|
138
140
|
def session_param(self) -> dict:
|
139
141
|
return {
|
140
142
|
"diskann.l_value_is": self.l_value_is,
|
141
143
|
}
|
142
|
-
|
144
|
+
|
145
|
+
|
143
146
|
_pgdiskann_case_config = {
|
144
147
|
IndexType.DISKANN: PgDiskANNImplConfig,
|
145
148
|
}
|
@@ -1,9 +1,9 @@
|
|
1
1
|
"""Wrapper around the pg_diskann vector database over VectorDB"""
|
2
2
|
|
3
3
|
import logging
|
4
|
-
import
|
4
|
+
from collections.abc import Generator
|
5
5
|
from contextlib import contextmanager
|
6
|
-
from typing import Any
|
6
|
+
from typing import Any
|
7
7
|
|
8
8
|
import numpy as np
|
9
9
|
import psycopg
|
@@ -44,20 +44,21 @@ class PgDiskANN(VectorDB):
|
|
44
44
|
self._primary_field = "id"
|
45
45
|
self._vector_field = "embedding"
|
46
46
|
|
47
|
-
self.conn, self.cursor = self._create_connection(**self.db_config)
|
47
|
+
self.conn, self.cursor = self._create_connection(**self.db_config)
|
48
48
|
|
49
49
|
log.info(f"{self.name} config values: {self.db_config}\n{self.case_config}")
|
50
50
|
if not any(
|
51
51
|
(
|
52
52
|
self.case_config.create_index_before_load,
|
53
53
|
self.case_config.create_index_after_load,
|
54
|
-
)
|
54
|
+
),
|
55
55
|
):
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
f"{err}\n{pprint.pformat(self.db_config)}\n{pprint.pformat(self.case_config)}"
|
56
|
+
msg = (
|
57
|
+
f"{self.name} config must create an index using create_index_before_load or create_index_after_load"
|
58
|
+
f"{self.name} config values: {self.db_config}\n{self.case_config}"
|
60
59
|
)
|
60
|
+
log.error(msg)
|
61
|
+
raise RuntimeError(msg)
|
61
62
|
|
62
63
|
if drop_old:
|
63
64
|
self._drop_index()
|
@@ -72,7 +73,7 @@ class PgDiskANN(VectorDB):
|
|
72
73
|
self.conn = None
|
73
74
|
|
74
75
|
@staticmethod
|
75
|
-
def _create_connection(**kwargs) ->
|
76
|
+
def _create_connection(**kwargs) -> tuple[Connection, Cursor]:
|
76
77
|
conn = psycopg.connect(**kwargs)
|
77
78
|
conn.cursor().execute("CREATE EXTENSION IF NOT EXISTS pg_diskann CASCADE")
|
78
79
|
conn.commit()
|
@@ -101,25 +102,25 @@ class PgDiskANN(VectorDB):
|
|
101
102
|
log.debug(command.as_string(self.cursor))
|
102
103
|
self.cursor.execute(command)
|
103
104
|
self.conn.commit()
|
104
|
-
|
105
|
+
|
105
106
|
self._filtered_search = sql.Composed(
|
106
107
|
[
|
107
108
|
sql.SQL(
|
108
|
-
"SELECT id FROM public.{table_name} WHERE id >= %s ORDER BY embedding "
|
109
|
-
|
109
|
+
"SELECT id FROM public.{table_name} WHERE id >= %s ORDER BY embedding ",
|
110
|
+
).format(table_name=sql.Identifier(self.table_name)),
|
110
111
|
sql.SQL(self.case_config.search_param()["metric_fun_op"]),
|
111
112
|
sql.SQL(" %s::vector LIMIT %s::int"),
|
112
|
-
]
|
113
|
+
],
|
113
114
|
)
|
114
115
|
|
115
116
|
self._unfiltered_search = sql.Composed(
|
116
117
|
[
|
117
118
|
sql.SQL("SELECT id FROM public.{} ORDER BY embedding ").format(
|
118
|
-
sql.Identifier(self.table_name)
|
119
|
+
sql.Identifier(self.table_name),
|
119
120
|
),
|
120
121
|
sql.SQL(self.case_config.search_param()["metric_fun_op"]),
|
121
122
|
sql.SQL(" %s::vector LIMIT %s::int"),
|
122
|
-
]
|
123
|
+
],
|
123
124
|
)
|
124
125
|
|
125
126
|
try:
|
@@ -137,15 +138,12 @@ class PgDiskANN(VectorDB):
|
|
137
138
|
|
138
139
|
self.cursor.execute(
|
139
140
|
sql.SQL("DROP TABLE IF EXISTS public.{table_name}").format(
|
140
|
-
table_name=sql.Identifier(self.table_name)
|
141
|
-
)
|
141
|
+
table_name=sql.Identifier(self.table_name),
|
142
|
+
),
|
142
143
|
)
|
143
144
|
self.conn.commit()
|
144
145
|
|
145
|
-
def
|
146
|
-
pass
|
147
|
-
|
148
|
-
def optimize(self):
|
146
|
+
def optimize(self, data_size: int | None = None):
|
149
147
|
self._post_insert()
|
150
148
|
|
151
149
|
def _post_insert(self):
|
@@ -160,7 +158,7 @@ class PgDiskANN(VectorDB):
|
|
160
158
|
log.info(f"{self.name} client drop index : {self._index_name}")
|
161
159
|
|
162
160
|
drop_index_sql = sql.SQL("DROP INDEX IF EXISTS {index_name}").format(
|
163
|
-
index_name=sql.Identifier(self._index_name)
|
161
|
+
index_name=sql.Identifier(self._index_name),
|
164
162
|
)
|
165
163
|
log.debug(drop_index_sql.as_string(self.cursor))
|
166
164
|
self.cursor.execute(drop_index_sql)
|
@@ -175,64 +173,53 @@ class PgDiskANN(VectorDB):
|
|
175
173
|
if index_param["maintenance_work_mem"] is not None:
|
176
174
|
self.cursor.execute(
|
177
175
|
sql.SQL("SET maintenance_work_mem TO {};").format(
|
178
|
-
index_param["maintenance_work_mem"]
|
179
|
-
)
|
176
|
+
index_param["maintenance_work_mem"],
|
177
|
+
),
|
180
178
|
)
|
181
179
|
self.cursor.execute(
|
182
180
|
sql.SQL("ALTER USER {} SET maintenance_work_mem TO {};").format(
|
183
181
|
sql.Identifier(self.db_config["user"]),
|
184
182
|
index_param["maintenance_work_mem"],
|
185
|
-
)
|
183
|
+
),
|
186
184
|
)
|
187
185
|
self.conn.commit()
|
188
186
|
|
189
187
|
if index_param["max_parallel_workers"] is not None:
|
190
188
|
self.cursor.execute(
|
191
189
|
sql.SQL("SET max_parallel_maintenance_workers TO '{}';").format(
|
192
|
-
index_param["max_parallel_workers"]
|
193
|
-
)
|
190
|
+
index_param["max_parallel_workers"],
|
191
|
+
),
|
194
192
|
)
|
195
193
|
self.cursor.execute(
|
196
|
-
sql.SQL(
|
197
|
-
"ALTER USER {} SET max_parallel_maintenance_workers TO '{}';"
|
198
|
-
).format(
|
194
|
+
sql.SQL("ALTER USER {} SET max_parallel_maintenance_workers TO '{}';").format(
|
199
195
|
sql.Identifier(self.db_config["user"]),
|
200
196
|
index_param["max_parallel_workers"],
|
201
|
-
)
|
197
|
+
),
|
202
198
|
)
|
203
199
|
self.cursor.execute(
|
204
200
|
sql.SQL("SET max_parallel_workers TO '{}';").format(
|
205
|
-
index_param["max_parallel_workers"]
|
206
|
-
)
|
201
|
+
index_param["max_parallel_workers"],
|
202
|
+
),
|
207
203
|
)
|
208
204
|
self.cursor.execute(
|
209
|
-
sql.SQL(
|
210
|
-
"ALTER USER {} SET max_parallel_workers TO '{}';"
|
211
|
-
).format(
|
205
|
+
sql.SQL("ALTER USER {} SET max_parallel_workers TO '{}';").format(
|
212
206
|
sql.Identifier(self.db_config["user"]),
|
213
207
|
index_param["max_parallel_workers"],
|
214
|
-
)
|
208
|
+
),
|
215
209
|
)
|
216
210
|
self.cursor.execute(
|
217
|
-
sql.SQL(
|
218
|
-
"ALTER TABLE {} SET (parallel_workers = {});"
|
219
|
-
).format(
|
211
|
+
sql.SQL("ALTER TABLE {} SET (parallel_workers = {});").format(
|
220
212
|
sql.Identifier(self.table_name),
|
221
213
|
index_param["max_parallel_workers"],
|
222
|
-
)
|
214
|
+
),
|
223
215
|
)
|
224
216
|
self.conn.commit()
|
225
217
|
|
226
|
-
results = self.cursor.execute(
|
227
|
-
|
228
|
-
).fetchall()
|
229
|
-
results.extend(
|
230
|
-
self.cursor.execute(sql.SQL("SHOW max_parallel_workers;")).fetchall()
|
231
|
-
)
|
232
|
-
results.extend(
|
233
|
-
self.cursor.execute(sql.SQL("SHOW maintenance_work_mem;")).fetchall()
|
234
|
-
)
|
218
|
+
results = self.cursor.execute(sql.SQL("SHOW max_parallel_maintenance_workers;")).fetchall()
|
219
|
+
results.extend(self.cursor.execute(sql.SQL("SHOW max_parallel_workers;")).fetchall())
|
220
|
+
results.extend(self.cursor.execute(sql.SQL("SHOW maintenance_work_mem;")).fetchall())
|
235
221
|
log.info(f"{self.name} parallel index creation parameters: {results}")
|
222
|
+
|
236
223
|
def _create_index(self):
|
237
224
|
assert self.conn is not None, "Connection is not initialized"
|
238
225
|
assert self.cursor is not None, "Cursor is not initialized"
|
@@ -248,28 +235,23 @@ class PgDiskANN(VectorDB):
|
|
248
235
|
sql.SQL("{option_name} = {val}").format(
|
249
236
|
option_name=sql.Identifier(option_name),
|
250
237
|
val=sql.Identifier(str(option_val)),
|
251
|
-
)
|
238
|
+
),
|
252
239
|
)
|
253
|
-
|
254
|
-
if any(options)
|
255
|
-
with_clause = sql.SQL("WITH ({});").format(sql.SQL(", ").join(options))
|
256
|
-
else:
|
257
|
-
with_clause = sql.Composed(())
|
240
|
+
|
241
|
+
with_clause = sql.SQL("WITH ({});").format(sql.SQL(", ").join(options)) if any(options) else sql.Composed(())
|
258
242
|
|
259
243
|
index_create_sql = sql.SQL(
|
260
244
|
"""
|
261
|
-
CREATE INDEX IF NOT EXISTS {index_name} ON public.{table_name}
|
245
|
+
CREATE INDEX IF NOT EXISTS {index_name} ON public.{table_name}
|
262
246
|
USING {index_type} (embedding {embedding_metric})
|
263
|
-
"""
|
247
|
+
""",
|
264
248
|
).format(
|
265
249
|
index_name=sql.Identifier(self._index_name),
|
266
250
|
table_name=sql.Identifier(self.table_name),
|
267
251
|
index_type=sql.Identifier(index_param["index_type"].lower()),
|
268
252
|
embedding_metric=sql.Identifier(index_param["metric"]),
|
269
253
|
)
|
270
|
-
index_create_sql_with_with_clause = (
|
271
|
-
index_create_sql + with_clause
|
272
|
-
).join(" ")
|
254
|
+
index_create_sql_with_with_clause = (index_create_sql + with_clause).join(" ")
|
273
255
|
log.debug(index_create_sql_with_with_clause.as_string(self.cursor))
|
274
256
|
self.cursor.execute(index_create_sql_with_with_clause)
|
275
257
|
self.conn.commit()
|
@@ -283,14 +265,12 @@ class PgDiskANN(VectorDB):
|
|
283
265
|
|
284
266
|
self.cursor.execute(
|
285
267
|
sql.SQL(
|
286
|
-
"CREATE TABLE IF NOT EXISTS public.{table_name} (id BIGINT PRIMARY KEY, embedding vector({dim}));"
|
287
|
-
).format(table_name=sql.Identifier(self.table_name), dim=dim)
|
268
|
+
"CREATE TABLE IF NOT EXISTS public.{table_name} (id BIGINT PRIMARY KEY, embedding vector({dim}));",
|
269
|
+
).format(table_name=sql.Identifier(self.table_name), dim=dim),
|
288
270
|
)
|
289
271
|
self.conn.commit()
|
290
272
|
except Exception as e:
|
291
|
-
log.warning(
|
292
|
-
f"Failed to create pgdiskann table: {self.table_name} error: {e}"
|
293
|
-
)
|
273
|
+
log.warning(f"Failed to create pgdiskann table: {self.table_name} error: {e}")
|
294
274
|
raise e from None
|
295
275
|
|
296
276
|
def insert_embeddings(
|
@@ -298,7 +278,7 @@ class PgDiskANN(VectorDB):
|
|
298
278
|
embeddings: list[list[float]],
|
299
279
|
metadata: list[int],
|
300
280
|
**kwargs: Any,
|
301
|
-
) ->
|
281
|
+
) -> tuple[int, Exception | None]:
|
302
282
|
assert self.conn is not None, "Connection is not initialized"
|
303
283
|
assert self.cursor is not None, "Cursor is not initialized"
|
304
284
|
|
@@ -308,8 +288,8 @@ class PgDiskANN(VectorDB):
|
|
308
288
|
|
309
289
|
with self.cursor.copy(
|
310
290
|
sql.SQL("COPY public.{table_name} FROM STDIN (FORMAT BINARY)").format(
|
311
|
-
table_name=sql.Identifier(self.table_name)
|
312
|
-
)
|
291
|
+
table_name=sql.Identifier(self.table_name),
|
292
|
+
),
|
313
293
|
) as copy:
|
314
294
|
copy.set_types(["bigint", "vector"])
|
315
295
|
for i, row in enumerate(metadata_arr):
|
@@ -321,9 +301,7 @@ class PgDiskANN(VectorDB):
|
|
321
301
|
|
322
302
|
return len(metadata), None
|
323
303
|
except Exception as e:
|
324
|
-
log.warning(
|
325
|
-
f"Failed to insert data into table ({self.table_name}), error: {e}"
|
326
|
-
)
|
304
|
+
log.warning(f"Failed to insert data into table ({self.table_name}), error: {e}")
|
327
305
|
return 0, e
|
328
306
|
|
329
307
|
def search_embedding(
|
@@ -340,11 +318,12 @@ class PgDiskANN(VectorDB):
|
|
340
318
|
if filters:
|
341
319
|
gt = filters.get("id")
|
342
320
|
result = self.cursor.execute(
|
343
|
-
|
344
|
-
|
321
|
+
self._filtered_search,
|
322
|
+
(gt, q, k),
|
323
|
+
prepare=True,
|
324
|
+
binary=True,
|
325
|
+
)
|
345
326
|
else:
|
346
|
-
result = self.cursor.execute(
|
347
|
-
self._unfiltered_search, (q, k), prepare=True, binary=True
|
348
|
-
)
|
327
|
+
result = self.cursor.execute(self._unfiltered_search, (q, k), prepare=True, binary=True)
|
349
328
|
|
350
329
|
return [int(i[0]) for i in result.fetchall()]
|
@@ -1,9 +1,11 @@
|
|
1
|
-
|
1
|
+
import os
|
2
|
+
from typing import Annotated, Unpack
|
2
3
|
|
3
4
|
import click
|
4
|
-
import os
|
5
5
|
from pydantic import SecretStr
|
6
6
|
|
7
|
+
from vectordb_bench.backend.clients import DB
|
8
|
+
|
7
9
|
from ....cli.cli import (
|
8
10
|
CommonTypedDict,
|
9
11
|
HNSWFlavor1,
|
@@ -12,12 +14,12 @@ from ....cli.cli import (
|
|
12
14
|
click_parameter_decorators_from_typed_dict,
|
13
15
|
run,
|
14
16
|
)
|
15
|
-
from vectordb_bench.backend.clients import DB
|
16
17
|
|
17
18
|
|
18
19
|
class PgVectoRSTypedDict(CommonTypedDict):
|
19
20
|
user_name: Annotated[
|
20
|
-
str,
|
21
|
+
str,
|
22
|
+
click.option("--user-name", type=str, help="Db username", required=True),
|
21
23
|
]
|
22
24
|
password: Annotated[
|
23
25
|
str,
|
@@ -30,14 +32,10 @@ class PgVectoRSTypedDict(CommonTypedDict):
|
|
30
32
|
),
|
31
33
|
]
|
32
34
|
|
33
|
-
host: Annotated[
|
34
|
-
|
35
|
-
]
|
36
|
-
db_name: Annotated[
|
37
|
-
str, click.option("--db-name", type=str, help="Db name", required=True)
|
38
|
-
]
|
35
|
+
host: Annotated[str, click.option("--host", type=str, help="Db host", required=True)]
|
36
|
+
db_name: Annotated[str, click.option("--db-name", type=str, help="Db name", required=True)]
|
39
37
|
max_parallel_workers: Annotated[
|
40
|
-
|
38
|
+
int | None,
|
41
39
|
click.option(
|
42
40
|
"--max-parallel-workers",
|
43
41
|
type=int,
|
@@ -1,11 +1,11 @@
|
|
1
1
|
from abc import abstractmethod
|
2
2
|
from typing import TypedDict
|
3
3
|
|
4
|
+
from pgvecto_rs.types import Flat, Hnsw, IndexOption, Ivf, Quantization
|
5
|
+
from pgvecto_rs.types.index import QuantizationRatio, QuantizationType
|
4
6
|
from pydantic import BaseModel, SecretStr
|
5
|
-
from pgvecto_rs.types import IndexOption, Ivf, Hnsw, Flat, Quantization
|
6
|
-
from pgvecto_rs.types.index import QuantizationType, QuantizationRatio
|
7
7
|
|
8
|
-
from ..api import
|
8
|
+
from ..api import DBCaseConfig, DBConfig, IndexType, MetricType
|
9
9
|
|
10
10
|
POSTGRE_URL_PLACEHOLDER = "postgresql://%s:%s@%s/%s"
|
11
11
|
|
@@ -52,14 +52,14 @@ class PgVectoRSIndexConfig(BaseModel, DBCaseConfig):
|
|
52
52
|
def parse_metric(self) -> str:
|
53
53
|
if self.metric_type == MetricType.L2:
|
54
54
|
return "vector_l2_ops"
|
55
|
-
|
55
|
+
if self.metric_type == MetricType.IP:
|
56
56
|
return "vector_dot_ops"
|
57
57
|
return "vector_cos_ops"
|
58
58
|
|
59
59
|
def parse_metric_fun_op(self) -> str:
|
60
60
|
if self.metric_type == MetricType.L2:
|
61
61
|
return "<->"
|
62
|
-
|
62
|
+
if self.metric_type == MetricType.IP:
|
63
63
|
return "<#>"
|
64
64
|
return "<=>"
|
65
65
|
|
@@ -85,9 +85,7 @@ class PgVectoRSHNSWConfig(PgVectoRSIndexConfig):
|
|
85
85
|
if self.quantization_type is None:
|
86
86
|
quantization = None
|
87
87
|
else:
|
88
|
-
quantization = Quantization(
|
89
|
-
typ=self.quantization_type, ratio=self.quantization_ratio
|
90
|
-
)
|
88
|
+
quantization = Quantization(typ=self.quantization_type, ratio=self.quantization_ratio)
|
91
89
|
|
92
90
|
option = IndexOption(
|
93
91
|
index=Hnsw(
|
@@ -115,9 +113,7 @@ class PgVectoRSIVFFlatConfig(PgVectoRSIndexConfig):
|
|
115
113
|
if self.quantization_type is None:
|
116
114
|
quantization = None
|
117
115
|
else:
|
118
|
-
quantization = Quantization(
|
119
|
-
typ=self.quantization_type, ratio=self.quantization_ratio
|
120
|
-
)
|
116
|
+
quantization = Quantization(typ=self.quantization_type, ratio=self.quantization_ratio)
|
121
117
|
|
122
118
|
option = IndexOption(
|
123
119
|
index=Ivf(nlist=self.lists, quantization=quantization),
|
@@ -139,9 +135,7 @@ class PgVectoRSFLATConfig(PgVectoRSIndexConfig):
|
|
139
135
|
if self.quantization_type is None:
|
140
136
|
quantization = None
|
141
137
|
else:
|
142
|
-
quantization = Quantization(
|
143
|
-
typ=self.quantization_type, ratio=self.quantization_ratio
|
144
|
-
)
|
138
|
+
quantization = Quantization(typ=self.quantization_type, ratio=self.quantization_ratio)
|
145
139
|
|
146
140
|
option = IndexOption(
|
147
141
|
index=Flat(
|