sqlalchemy-cratedb 0.36.1__py2.py3-none-any.whl → 0.37.0__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sqlalchemy_cratedb/__init__.py +3 -0
- sqlalchemy_cratedb/compiler.py +6 -0
- sqlalchemy_cratedb/dialect.py +14 -2
- sqlalchemy_cratedb/type/__init__.py +1 -0
- sqlalchemy_cratedb/type/vector.py +173 -0
- {sqlalchemy_cratedb-0.36.1.dist-info → sqlalchemy_cratedb-0.37.0.dist-info}/METADATA +6 -1
- {sqlalchemy_cratedb-0.36.1.dist-info → sqlalchemy_cratedb-0.37.0.dist-info}/RECORD +12 -11
- {sqlalchemy_cratedb-0.36.1.dist-info → sqlalchemy_cratedb-0.37.0.dist-info}/LICENSE +0 -0
- {sqlalchemy_cratedb-0.36.1.dist-info → sqlalchemy_cratedb-0.37.0.dist-info}/NOTICE +0 -0
- {sqlalchemy_cratedb-0.36.1.dist-info → sqlalchemy_cratedb-0.37.0.dist-info}/WHEEL +0 -0
- {sqlalchemy_cratedb-0.36.1.dist-info → sqlalchemy_cratedb-0.37.0.dist-info}/entry_points.txt +0 -0
- {sqlalchemy_cratedb-0.36.1.dist-info → sqlalchemy_cratedb-0.37.0.dist-info}/top_level.txt +0 -0
sqlalchemy_cratedb/__init__.py
CHANGED
@@ -27,6 +27,7 @@ from .support import insert_bulk
|
|
27
27
|
from .type.array import ObjectArray
|
28
28
|
from .type.geo import Geopoint, Geoshape
|
29
29
|
from .type.object import ObjectType
|
30
|
+
from .type.vector import FloatVector, knn_match
|
30
31
|
|
31
32
|
if SA_VERSION < SA_1_4:
|
32
33
|
import textwrap
|
@@ -51,9 +52,11 @@ if SA_VERSION < SA_1_4:
|
|
51
52
|
|
52
53
|
__all__ = [
|
53
54
|
dialect,
|
55
|
+
FloatVector,
|
54
56
|
Geopoint,
|
55
57
|
Geoshape,
|
56
58
|
ObjectArray,
|
57
59
|
ObjectType,
|
58
60
|
match,
|
61
|
+
knn_match,
|
59
62
|
]
|
sqlalchemy_cratedb/compiler.py
CHANGED
@@ -238,6 +238,12 @@ class CrateTypeCompiler(compiler.GenericTypeCompiler):
|
|
238
238
|
def visit_OBJECT(self, type_, **kw):
|
239
239
|
return "OBJECT"
|
240
240
|
|
241
|
+
def visit_FLOAT_VECTOR(self, type_, **kw):
|
242
|
+
dimensions = type_.dimensions
|
243
|
+
if dimensions is None:
|
244
|
+
raise ValueError("FloatVector must be initialized with dimension size")
|
245
|
+
return f"FLOAT_VECTOR({dimensions})"
|
246
|
+
|
241
247
|
|
242
248
|
class CrateCompiler(compiler.SQLCompiler):
|
243
249
|
|
sqlalchemy_cratedb/dialect.py
CHANGED
@@ -33,7 +33,7 @@ from .compiler import (
|
|
33
33
|
)
|
34
34
|
from crate.client.exceptions import TimezoneUnawareException
|
35
35
|
from .sa_version import SA_VERSION, SA_1_4, SA_2_0
|
36
|
-
from .type import ObjectArray, ObjectType
|
36
|
+
from .type import FloatVector, ObjectArray, ObjectType
|
37
37
|
|
38
38
|
TYPES_MAP = {
|
39
39
|
"boolean": sqltypes.Boolean,
|
@@ -51,7 +51,8 @@ TYPES_MAP = {
|
|
51
51
|
"float": sqltypes.Float,
|
52
52
|
"real": sqltypes.Float,
|
53
53
|
"string": sqltypes.String,
|
54
|
-
"text": sqltypes.String
|
54
|
+
"text": sqltypes.String,
|
55
|
+
"float_vector": FloatVector,
|
55
56
|
}
|
56
57
|
try:
|
57
58
|
# SQLAlchemy >= 1.1
|
@@ -228,6 +229,15 @@ class CrateDialect(default.DefaultDialect):
|
|
228
229
|
def _get_default_schema_name(self, connection):
|
229
230
|
return 'doc'
|
230
231
|
|
232
|
+
def _get_effective_schema_name(self, connection):
|
233
|
+
schema_name_raw = connection.engine.url.query.get("schema")
|
234
|
+
schema_name = None
|
235
|
+
if isinstance(schema_name_raw, str):
|
236
|
+
schema_name = schema_name_raw
|
237
|
+
elif isinstance(schema_name_raw, tuple):
|
238
|
+
schema_name = schema_name_raw[0]
|
239
|
+
return schema_name
|
240
|
+
|
231
241
|
def _get_server_version_info(self, connection):
|
232
242
|
return tuple(connection.connection.lowest_server_version.version)
|
233
243
|
|
@@ -257,6 +267,8 @@ class CrateDialect(default.DefaultDialect):
|
|
257
267
|
|
258
268
|
@reflection.cache
|
259
269
|
def get_table_names(self, connection, schema=None, **kw):
|
270
|
+
if schema is None:
|
271
|
+
schema = self._get_effective_schema_name(connection)
|
260
272
|
cursor = connection.exec_driver_sql(
|
261
273
|
"SELECT table_name FROM information_schema.tables "
|
262
274
|
"WHERE {0} = ? "
|
@@ -0,0 +1,173 @@
|
|
1
|
+
"""
|
2
|
+
## About
|
3
|
+
SQLAlchemy data type implementation for CrateDB's `FLOAT_VECTOR` type.
|
4
|
+
|
5
|
+
## References
|
6
|
+
- https://cratedb.com/docs/crate/reference/en/latest/general/ddl/data-types.html#float-vector
|
7
|
+
- https://cratedb.com/docs/crate/reference/en/latest/general/builtins/scalar-functions.html#scalar-knn-match
|
8
|
+
|
9
|
+
## Details
|
10
|
+
The implementation is based on SQLAlchemy's `TypeDecorator`, and also
|
11
|
+
offers compiler support.
|
12
|
+
|
13
|
+
## Notes
|
14
|
+
CrateDB currently only supports the similarity function `VectorSimilarityFunction.EUCLIDEAN`.
|
15
|
+
-- https://github.com/crate/crate/blob/5.5.1/server/src/main/java/io/crate/types/FloatVectorType.java#L55
|
16
|
+
|
17
|
+
pgvector use a comparator to apply different similarity functions as operators,
|
18
|
+
see `pgvector.sqlalchemy.Vector.comparator_factory`.
|
19
|
+
|
20
|
+
<->: l2/euclidean_distance
|
21
|
+
<#>: max_inner_product
|
22
|
+
<=>: cosine_distance
|
23
|
+
|
24
|
+
## Backlog
|
25
|
+
- After dropping support for SQLAlchemy 1.3, use
|
26
|
+
`class FloatVector(sa.TypeDecorator[t.Sequence[float]]):`
|
27
|
+
|
28
|
+
## Origin
|
29
|
+
This module is based on the corresponding pgvector implementation
|
30
|
+
by Andrew Kane. Thank you.
|
31
|
+
|
32
|
+
The MIT License (MIT)
|
33
|
+
Copyright (c) 2021-2023 Andrew Kane
|
34
|
+
https://github.com/pgvector/pgvector-python
|
35
|
+
"""
|
36
|
+
import typing as t
|
37
|
+
|
38
|
+
if t.TYPE_CHECKING:
|
39
|
+
import numpy.typing as npt # pragma: no cover
|
40
|
+
|
41
|
+
import sqlalchemy as sa
|
42
|
+
from sqlalchemy.sql.expression import ColumnElement, literal
|
43
|
+
from sqlalchemy.ext.compiler import compiles
|
44
|
+
|
45
|
+
|
46
|
+
__all__ = [
|
47
|
+
"from_db",
|
48
|
+
"knn_match",
|
49
|
+
"to_db",
|
50
|
+
"FloatVector",
|
51
|
+
]
|
52
|
+
|
53
|
+
|
54
|
+
def from_db(value: t.Iterable) -> t.Optional["npt.ArrayLike"]:
|
55
|
+
import numpy as np
|
56
|
+
|
57
|
+
# from `pgvector.utils`
|
58
|
+
# could be ndarray if already cast by lower-level driver
|
59
|
+
if value is None or isinstance(value, np.ndarray):
|
60
|
+
return value
|
61
|
+
|
62
|
+
return np.array(value, dtype=np.float32)
|
63
|
+
|
64
|
+
|
65
|
+
def to_db(value: t.Any, dim: t.Optional[int] = None) -> t.Optional[t.List]:
|
66
|
+
import numpy as np
|
67
|
+
|
68
|
+
# from `pgvector.utils`
|
69
|
+
if value is None:
|
70
|
+
return value
|
71
|
+
|
72
|
+
if isinstance(value, np.ndarray):
|
73
|
+
if value.ndim != 1:
|
74
|
+
raise ValueError("expected ndim to be 1")
|
75
|
+
|
76
|
+
if not np.issubdtype(value.dtype, np.integer) and not np.issubdtype(value.dtype, np.floating):
|
77
|
+
raise ValueError("dtype must be numeric")
|
78
|
+
|
79
|
+
value = value.tolist()
|
80
|
+
|
81
|
+
if dim is not None and len(value) != dim:
|
82
|
+
raise ValueError("expected %d dimensions, not %d" % (dim, len(value)))
|
83
|
+
|
84
|
+
return value
|
85
|
+
|
86
|
+
|
87
|
+
class FloatVector(sa.TypeDecorator):
|
88
|
+
"""
|
89
|
+
SQLAlchemy `FloatVector` data type for CrateDB.
|
90
|
+
"""
|
91
|
+
|
92
|
+
cache_ok = False
|
93
|
+
|
94
|
+
__visit_name__ = "FLOAT_VECTOR"
|
95
|
+
|
96
|
+
_is_array = True
|
97
|
+
|
98
|
+
zero_indexes = False
|
99
|
+
|
100
|
+
impl = sa.ARRAY
|
101
|
+
|
102
|
+
def __init__(self, dimensions: int = None):
|
103
|
+
super().__init__(sa.FLOAT, dimensions=dimensions)
|
104
|
+
|
105
|
+
def as_generic(self, allow_nulltype=False):
|
106
|
+
return sa.ARRAY(item_type=sa.FLOAT)
|
107
|
+
|
108
|
+
@property
|
109
|
+
def python_type(self):
|
110
|
+
return list
|
111
|
+
|
112
|
+
def bind_processor(self, dialect: sa.engine.Dialect) -> t.Callable:
|
113
|
+
def process(value: t.Iterable) -> t.Optional[t.List]:
|
114
|
+
return to_db(value, self.dimensions)
|
115
|
+
|
116
|
+
return process
|
117
|
+
|
118
|
+
def result_processor(self, dialect: sa.engine.Dialect, coltype: t.Any) -> t.Callable:
|
119
|
+
def process(value: t.Any) -> t.Optional["npt.ArrayLike"]:
|
120
|
+
return from_db(value)
|
121
|
+
|
122
|
+
return process
|
123
|
+
|
124
|
+
|
125
|
+
class KnnMatch(ColumnElement):
|
126
|
+
"""
|
127
|
+
Wrap CrateDB's `KNN_MATCH` function into an SQLAlchemy function.
|
128
|
+
|
129
|
+
https://cratedb.com/docs/crate/reference/en/latest/general/builtins/scalar-functions.html#scalar-knn-match
|
130
|
+
"""
|
131
|
+
inherit_cache = True
|
132
|
+
|
133
|
+
def __init__(self, column, term, k=None):
|
134
|
+
super().__init__()
|
135
|
+
self.column = column
|
136
|
+
self.term = term
|
137
|
+
self.k = k
|
138
|
+
|
139
|
+
def compile_column(self, compiler):
|
140
|
+
return compiler.process(self.column)
|
141
|
+
|
142
|
+
def compile_term(self, compiler):
|
143
|
+
return compiler.process(literal(self.term))
|
144
|
+
|
145
|
+
def compile_k(self, compiler):
|
146
|
+
return compiler.process(literal(self.k))
|
147
|
+
|
148
|
+
|
149
|
+
def knn_match(column, term, k):
|
150
|
+
"""
|
151
|
+
Generate a match predicate for vector search.
|
152
|
+
|
153
|
+
:param column: A reference to a column or an index.
|
154
|
+
|
155
|
+
:param term: The term to match against. This is an array of floating point
|
156
|
+
values, which is compared to other vectors using a HNSW index search.
|
157
|
+
|
158
|
+
:param k: The `k` argument determines the number of nearest neighbours to
|
159
|
+
search in the index.
|
160
|
+
"""
|
161
|
+
return KnnMatch(column, term, k)
|
162
|
+
|
163
|
+
|
164
|
+
@compiles(KnnMatch)
|
165
|
+
def compile_knn_match(knn_match, compiler, **kwargs):
|
166
|
+
"""
|
167
|
+
Clause compiler for `KNN_MATCH`.
|
168
|
+
"""
|
169
|
+
return "KNN_MATCH(%s, %s, %s)" % (
|
170
|
+
knn_match.compile_column(compiler),
|
171
|
+
knn_match.compile_term(compiler),
|
172
|
+
knn_match.compile_k(compiler),
|
173
|
+
)
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: sqlalchemy-cratedb
|
3
|
-
Version: 0.
|
3
|
+
Version: 0.37.0
|
4
4
|
Summary: SQLAlchemy dialect for CrateDB.
|
5
5
|
Author-email: "Crate.io" <office@crate.io>
|
6
6
|
License: Apache License 2.0
|
@@ -68,6 +68,8 @@ Requires-Dist: sqlalchemy <2.1,>=1
|
|
68
68
|
Requires-Dist: verlib2 ==0.2
|
69
69
|
Requires-Dist: backports.zoneinfo <1 ; python_version < "3.9"
|
70
70
|
Requires-Dist: importlib-resources ; python_version < "3.9"
|
71
|
+
Provides-Extra: all
|
72
|
+
Requires-Dist: sqlalchemy-cratedb[vector] ; extra == 'all'
|
71
73
|
Provides-Extra: develop
|
72
74
|
Requires-Dist: black <25 ; extra == 'develop'
|
73
75
|
Requires-Dist: mypy <1.11 ; extra == 'develop'
|
@@ -82,12 +84,15 @@ Provides-Extra: release
|
|
82
84
|
Requires-Dist: build <2 ; extra == 'release'
|
83
85
|
Requires-Dist: twine <6 ; extra == 'release'
|
84
86
|
Provides-Extra: test
|
87
|
+
Requires-Dist: cratedb-toolkit[testing] ; extra == 'test'
|
85
88
|
Requires-Dist: dask[dataframe] ; extra == 'test'
|
86
89
|
Requires-Dist: pandas <2.3 ; extra == 'test'
|
87
90
|
Requires-Dist: pueblo >=0.0.7 ; extra == 'test'
|
88
91
|
Requires-Dist: pytest <9 ; extra == 'test'
|
89
92
|
Requires-Dist: pytest-cov <6 ; extra == 'test'
|
90
93
|
Requires-Dist: pytest-mock <4 ; extra == 'test'
|
94
|
+
Provides-Extra: vector
|
95
|
+
Requires-Dist: numpy ; extra == 'vector'
|
91
96
|
|
92
97
|
# SQLAlchemy dialect for CrateDB
|
93
98
|
|
@@ -1,6 +1,6 @@
|
|
1
|
-
sqlalchemy_cratedb/__init__.py,sha256=
|
2
|
-
sqlalchemy_cratedb/compiler.py,sha256=
|
3
|
-
sqlalchemy_cratedb/dialect.py,sha256=
|
1
|
+
sqlalchemy_cratedb/__init__.py,sha256=HIsl7K75Yi2Z1rey-LXPiuonRVJ-Qj8TVWKmIk5d8g8,2289
|
2
|
+
sqlalchemy_cratedb/compiler.py,sha256=SDLeuwmk31KWYrNCXKtBqqatIA3z42GAopRhQLHO3o8,11639
|
3
|
+
sqlalchemy_cratedb/dialect.py,sha256=kC0HqTzhPSj9eD2IlqDxJR_1iLL58nEpzKqGY2yETPA,14205
|
4
4
|
sqlalchemy_cratedb/predicate.py,sha256=HT7tHF7PxX71G8m91lqpH732iVQZtO8_NPZUIsllcys,3622
|
5
5
|
sqlalchemy_cratedb/sa_version.py,sha256=NhNA5FUw0Sm5n-fSVo6CTcaioDRovUknFOslX31ir-I,1168
|
6
6
|
sqlalchemy_cratedb/support.py,sha256=gw3I4UUe2pfpfxeyJka-eETTP_2Dz9LkjmgONnO382E,2589
|
@@ -9,14 +9,15 @@ sqlalchemy_cratedb/compat/api13.py,sha256=koqqq48UmAmrjXOyc5lV7gaVEl8xmi5xHUqTBV
|
|
9
9
|
sqlalchemy_cratedb/compat/core10.py,sha256=P46_ABaii-LQrzeeu8bkhCkoYLr2sVZB3TMGr2BSqD4,8636
|
10
10
|
sqlalchemy_cratedb/compat/core14.py,sha256=xh_wnNkMUD8aE_VFQPnPeXWsPIFzPg0ZjkhfJBzo3IM,11606
|
11
11
|
sqlalchemy_cratedb/compat/core20.py,sha256=QgNEA4z37ldFI4X9HapbTcga7KvsrdMkoMRjxvCJ4tE,15145
|
12
|
-
sqlalchemy_cratedb/type/__init__.py,sha256=
|
12
|
+
sqlalchemy_cratedb/type/__init__.py,sha256=8Bp0RKTFAx-3ETOcG4R4wjhL5rarN7tJLfrue7S6mH8,141
|
13
13
|
sqlalchemy_cratedb/type/array.py,sha256=RrHkFoNmTKtk_6md0OtsR7xofRxdk0PbfO-JFfevBJI,4396
|
14
14
|
sqlalchemy_cratedb/type/geo.py,sha256=9wFGxGMxxN-7_qZTlmWialzaPp_zzFj4U-yVqY_deA0,1377
|
15
15
|
sqlalchemy_cratedb/type/object.py,sha256=-bebiW38vor3grD9qsomzJ_z3zRpNMb2XLLto2fFez4,3016
|
16
|
-
sqlalchemy_cratedb
|
17
|
-
sqlalchemy_cratedb-0.
|
18
|
-
sqlalchemy_cratedb-0.
|
19
|
-
sqlalchemy_cratedb-0.
|
20
|
-
sqlalchemy_cratedb-0.
|
21
|
-
sqlalchemy_cratedb-0.
|
22
|
-
sqlalchemy_cratedb-0.
|
16
|
+
sqlalchemy_cratedb/type/vector.py,sha256=5Q2v-RuiNKriSm-EX7tTb1PelqqoBvjDOFDB27Xxk5I,4723
|
17
|
+
sqlalchemy_cratedb-0.37.0.dist-info/LICENSE,sha256=s_w3FXmAYQuatqsgvyYLnGyC_13KOqp3W1DUEXO9RpY,10175
|
18
|
+
sqlalchemy_cratedb-0.37.0.dist-info/METADATA,sha256=JILkri_5NoeIpVpTpZXxczffrmSH50el_VCp0jvEQWk,6561
|
19
|
+
sqlalchemy_cratedb-0.37.0.dist-info/NOTICE,sha256=yU9CWOf_XrVU7fpqGgM9tDjppoMyfHHBmFVMiINZk-k,1167
|
20
|
+
sqlalchemy_cratedb-0.37.0.dist-info/WHEEL,sha256=DZajD4pwLWue70CAfc7YaxT1wLUciNBvN_TTcvXpltE,110
|
21
|
+
sqlalchemy_cratedb-0.37.0.dist-info/entry_points.txt,sha256=c14wyCG3OeM64_DUbI_vLVUXR3e3GhDyO_PCjo6UQMU,57
|
22
|
+
sqlalchemy_cratedb-0.37.0.dist-info/top_level.txt,sha256=UjjXz0burl_-2MApzLzffHG_2RXm6KljZvoGJHISMPo,19
|
23
|
+
sqlalchemy_cratedb-0.37.0.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|
{sqlalchemy_cratedb-0.36.1.dist-info → sqlalchemy_cratedb-0.37.0.dist-info}/entry_points.txt
RENAMED
File without changes
|
File without changes
|