sqlalchemy-cratedb 0.36.0__py2.py3-none-any.whl → 0.37.0__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -27,6 +27,7 @@ from .support import insert_bulk
27
27
  from .type.array import ObjectArray
28
28
  from .type.geo import Geopoint, Geoshape
29
29
  from .type.object import ObjectType
30
+ from .type.vector import FloatVector, knn_match
30
31
 
31
32
  if SA_VERSION < SA_1_4:
32
33
  import textwrap
@@ -51,9 +52,11 @@ if SA_VERSION < SA_1_4:
51
52
 
52
53
  __all__ = [
53
54
  dialect,
55
+ FloatVector,
54
56
  Geopoint,
55
57
  Geoshape,
56
58
  ObjectArray,
57
59
  ObjectType,
58
60
  match,
61
+ knn_match,
59
62
  ]
@@ -238,6 +238,12 @@ class CrateTypeCompiler(compiler.GenericTypeCompiler):
238
238
  def visit_OBJECT(self, type_, **kw):
239
239
  return "OBJECT"
240
240
 
241
+ def visit_FLOAT_VECTOR(self, type_, **kw):
242
+ dimensions = type_.dimensions
243
+ if dimensions is None:
244
+ raise ValueError("FloatVector must be initialized with dimension size")
245
+ return f"FLOAT_VECTOR({dimensions})"
246
+
241
247
 
242
248
  class CrateCompiler(compiler.SQLCompiler):
243
249
 
@@ -33,7 +33,7 @@ from .compiler import (
33
33
  )
34
34
  from crate.client.exceptions import TimezoneUnawareException
35
35
  from .sa_version import SA_VERSION, SA_1_4, SA_2_0
36
- from .type import ObjectArray, ObjectType
36
+ from .type import FloatVector, ObjectArray, ObjectType
37
37
 
38
38
  TYPES_MAP = {
39
39
  "boolean": sqltypes.Boolean,
@@ -51,7 +51,8 @@ TYPES_MAP = {
51
51
  "float": sqltypes.Float,
52
52
  "real": sqltypes.Float,
53
53
  "string": sqltypes.String,
54
- "text": sqltypes.String
54
+ "text": sqltypes.String,
55
+ "float_vector": FloatVector,
55
56
  }
56
57
  try:
57
58
  # SQLAlchemy >= 1.1
@@ -228,6 +229,15 @@ class CrateDialect(default.DefaultDialect):
228
229
  def _get_default_schema_name(self, connection):
229
230
  return 'doc'
230
231
 
232
+ def _get_effective_schema_name(self, connection):
233
+ schema_name_raw = connection.engine.url.query.get("schema")
234
+ schema_name = None
235
+ if isinstance(schema_name_raw, str):
236
+ schema_name = schema_name_raw
237
+ elif isinstance(schema_name_raw, tuple):
238
+ schema_name = schema_name_raw[0]
239
+ return schema_name
240
+
231
241
  def _get_server_version_info(self, connection):
232
242
  return tuple(connection.connection.lowest_server_version.version)
233
243
 
@@ -257,6 +267,8 @@ class CrateDialect(default.DefaultDialect):
257
267
 
258
268
  @reflection.cache
259
269
  def get_table_names(self, connection, schema=None, **kw):
270
+ if schema is None:
271
+ schema = self._get_effective_schema_name(connection)
260
272
  cursor = connection.exec_driver_sql(
261
273
  "SELECT table_name FROM information_schema.tables "
262
274
  "WHERE {0} = ? "
@@ -1,3 +1,4 @@
1
1
  from .array import ObjectArray
2
2
  from .geo import Geopoint, Geoshape
3
3
  from .object import ObjectType
4
+ from .vector import FloatVector, knn_match
@@ -0,0 +1,173 @@
1
+ """
2
+ ## About
3
+ SQLAlchemy data type implementation for CrateDB's `FLOAT_VECTOR` type.
4
+
5
+ ## References
6
+ - https://cratedb.com/docs/crate/reference/en/latest/general/ddl/data-types.html#float-vector
7
+ - https://cratedb.com/docs/crate/reference/en/latest/general/builtins/scalar-functions.html#scalar-knn-match
8
+
9
+ ## Details
10
+ The implementation is based on SQLAlchemy's `TypeDecorator`, and also
11
+ offers compiler support.
12
+
13
+ ## Notes
14
+ CrateDB currently only supports the similarity function `VectorSimilarityFunction.EUCLIDEAN`.
15
+ -- https://github.com/crate/crate/blob/5.5.1/server/src/main/java/io/crate/types/FloatVectorType.java#L55
16
+
17
+ pgvector use a comparator to apply different similarity functions as operators,
18
+ see `pgvector.sqlalchemy.Vector.comparator_factory`.
19
+
20
+ <->: l2/euclidean_distance
21
+ <#>: max_inner_product
22
+ <=>: cosine_distance
23
+
24
+ ## Backlog
25
+ - After dropping support for SQLAlchemy 1.3, use
26
+ `class FloatVector(sa.TypeDecorator[t.Sequence[float]]):`
27
+
28
+ ## Origin
29
+ This module is based on the corresponding pgvector implementation
30
+ by Andrew Kane. Thank you.
31
+
32
+ The MIT License (MIT)
33
+ Copyright (c) 2021-2023 Andrew Kane
34
+ https://github.com/pgvector/pgvector-python
35
+ """
36
+ import typing as t
37
+
38
+ if t.TYPE_CHECKING:
39
+ import numpy.typing as npt # pragma: no cover
40
+
41
+ import sqlalchemy as sa
42
+ from sqlalchemy.sql.expression import ColumnElement, literal
43
+ from sqlalchemy.ext.compiler import compiles
44
+
45
+
46
+ __all__ = [
47
+ "from_db",
48
+ "knn_match",
49
+ "to_db",
50
+ "FloatVector",
51
+ ]
52
+
53
+
54
+ def from_db(value: t.Iterable) -> t.Optional["npt.ArrayLike"]:
55
+ import numpy as np
56
+
57
+ # from `pgvector.utils`
58
+ # could be ndarray if already cast by lower-level driver
59
+ if value is None or isinstance(value, np.ndarray):
60
+ return value
61
+
62
+ return np.array(value, dtype=np.float32)
63
+
64
+
65
+ def to_db(value: t.Any, dim: t.Optional[int] = None) -> t.Optional[t.List]:
66
+ import numpy as np
67
+
68
+ # from `pgvector.utils`
69
+ if value is None:
70
+ return value
71
+
72
+ if isinstance(value, np.ndarray):
73
+ if value.ndim != 1:
74
+ raise ValueError("expected ndim to be 1")
75
+
76
+ if not np.issubdtype(value.dtype, np.integer) and not np.issubdtype(value.dtype, np.floating):
77
+ raise ValueError("dtype must be numeric")
78
+
79
+ value = value.tolist()
80
+
81
+ if dim is not None and len(value) != dim:
82
+ raise ValueError("expected %d dimensions, not %d" % (dim, len(value)))
83
+
84
+ return value
85
+
86
+
87
+ class FloatVector(sa.TypeDecorator):
88
+ """
89
+ SQLAlchemy `FloatVector` data type for CrateDB.
90
+ """
91
+
92
+ cache_ok = False
93
+
94
+ __visit_name__ = "FLOAT_VECTOR"
95
+
96
+ _is_array = True
97
+
98
+ zero_indexes = False
99
+
100
+ impl = sa.ARRAY
101
+
102
+ def __init__(self, dimensions: int = None):
103
+ super().__init__(sa.FLOAT, dimensions=dimensions)
104
+
105
+ def as_generic(self, allow_nulltype=False):
106
+ return sa.ARRAY(item_type=sa.FLOAT)
107
+
108
+ @property
109
+ def python_type(self):
110
+ return list
111
+
112
+ def bind_processor(self, dialect: sa.engine.Dialect) -> t.Callable:
113
+ def process(value: t.Iterable) -> t.Optional[t.List]:
114
+ return to_db(value, self.dimensions)
115
+
116
+ return process
117
+
118
+ def result_processor(self, dialect: sa.engine.Dialect, coltype: t.Any) -> t.Callable:
119
+ def process(value: t.Any) -> t.Optional["npt.ArrayLike"]:
120
+ return from_db(value)
121
+
122
+ return process
123
+
124
+
125
+ class KnnMatch(ColumnElement):
126
+ """
127
+ Wrap CrateDB's `KNN_MATCH` function into an SQLAlchemy function.
128
+
129
+ https://cratedb.com/docs/crate/reference/en/latest/general/builtins/scalar-functions.html#scalar-knn-match
130
+ """
131
+ inherit_cache = True
132
+
133
+ def __init__(self, column, term, k=None):
134
+ super().__init__()
135
+ self.column = column
136
+ self.term = term
137
+ self.k = k
138
+
139
+ def compile_column(self, compiler):
140
+ return compiler.process(self.column)
141
+
142
+ def compile_term(self, compiler):
143
+ return compiler.process(literal(self.term))
144
+
145
+ def compile_k(self, compiler):
146
+ return compiler.process(literal(self.k))
147
+
148
+
149
+ def knn_match(column, term, k):
150
+ """
151
+ Generate a match predicate for vector search.
152
+
153
+ :param column: A reference to a column or an index.
154
+
155
+ :param term: The term to match against. This is an array of floating point
156
+ values, which is compared to other vectors using a HNSW index search.
157
+
158
+ :param k: The `k` argument determines the number of nearest neighbours to
159
+ search in the index.
160
+ """
161
+ return KnnMatch(column, term, k)
162
+
163
+
164
+ @compiles(KnnMatch)
165
+ def compile_knn_match(knn_match, compiler, **kwargs):
166
+ """
167
+ Clause compiler for `KNN_MATCH`.
168
+ """
169
+ return "KNN_MATCH(%s, %s, %s)" % (
170
+ knn_match.compile_column(compiler),
171
+ knn_match.compile_term(compiler),
172
+ knn_match.compile_k(compiler),
173
+ )
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: sqlalchemy-cratedb
3
- Version: 0.36.0
3
+ Version: 0.37.0
4
4
  Summary: SQLAlchemy dialect for CrateDB.
5
5
  Author-email: "Crate.io" <office@crate.io>
6
6
  License: Apache License 2.0
@@ -62,12 +62,14 @@ Requires-Python: >=3.6
62
62
  Description-Content-Type: text/markdown
63
63
  License-File: LICENSE
64
64
  License-File: NOTICE
65
- Requires-Dist: crate
65
+ Requires-Dist: crate ==1.0.0dev0
66
66
  Requires-Dist: geojson <4,>=2.5
67
67
  Requires-Dist: sqlalchemy <2.1,>=1
68
68
  Requires-Dist: verlib2 ==0.2
69
69
  Requires-Dist: backports.zoneinfo <1 ; python_version < "3.9"
70
70
  Requires-Dist: importlib-resources ; python_version < "3.9"
71
+ Provides-Extra: all
72
+ Requires-Dist: sqlalchemy-cratedb[vector] ; extra == 'all'
71
73
  Provides-Extra: develop
72
74
  Requires-Dist: black <25 ; extra == 'develop'
73
75
  Requires-Dist: mypy <1.11 ; extra == 'develop'
@@ -82,12 +84,15 @@ Provides-Extra: release
82
84
  Requires-Dist: build <2 ; extra == 'release'
83
85
  Requires-Dist: twine <6 ; extra == 'release'
84
86
  Provides-Extra: test
87
+ Requires-Dist: cratedb-toolkit[testing] ; extra == 'test'
85
88
  Requires-Dist: dask[dataframe] ; extra == 'test'
86
89
  Requires-Dist: pandas <2.3 ; extra == 'test'
87
90
  Requires-Dist: pueblo >=0.0.7 ; extra == 'test'
88
91
  Requires-Dist: pytest <9 ; extra == 'test'
89
92
  Requires-Dist: pytest-cov <6 ; extra == 'test'
90
93
  Requires-Dist: pytest-mock <4 ; extra == 'test'
94
+ Provides-Extra: vector
95
+ Requires-Dist: numpy ; extra == 'vector'
91
96
 
92
97
  # SQLAlchemy dialect for CrateDB
93
98
 
@@ -1,6 +1,6 @@
1
- sqlalchemy_cratedb/__init__.py,sha256=ahkoO7im4phxQSceEfW2cybgCCWPpBj58sT6eg3aNbU,2209
2
- sqlalchemy_cratedb/compiler.py,sha256=FHIq7BXpOZTB-vHm7DkVXmAQ2_D4zYX1nQg-5q0KZ1s,11393
3
- sqlalchemy_cratedb/dialect.py,sha256=lCZerHVNnh7VuLzq0pFPLRfmsqgplOwXCORoXHrH22Q,13708
1
+ sqlalchemy_cratedb/__init__.py,sha256=HIsl7K75Yi2Z1rey-LXPiuonRVJ-Qj8TVWKmIk5d8g8,2289
2
+ sqlalchemy_cratedb/compiler.py,sha256=SDLeuwmk31KWYrNCXKtBqqatIA3z42GAopRhQLHO3o8,11639
3
+ sqlalchemy_cratedb/dialect.py,sha256=kC0HqTzhPSj9eD2IlqDxJR_1iLL58nEpzKqGY2yETPA,14205
4
4
  sqlalchemy_cratedb/predicate.py,sha256=HT7tHF7PxX71G8m91lqpH732iVQZtO8_NPZUIsllcys,3622
5
5
  sqlalchemy_cratedb/sa_version.py,sha256=NhNA5FUw0Sm5n-fSVo6CTcaioDRovUknFOslX31ir-I,1168
6
6
  sqlalchemy_cratedb/support.py,sha256=gw3I4UUe2pfpfxeyJka-eETTP_2Dz9LkjmgONnO382E,2589
@@ -9,14 +9,15 @@ sqlalchemy_cratedb/compat/api13.py,sha256=koqqq48UmAmrjXOyc5lV7gaVEl8xmi5xHUqTBV
9
9
  sqlalchemy_cratedb/compat/core10.py,sha256=P46_ABaii-LQrzeeu8bkhCkoYLr2sVZB3TMGr2BSqD4,8636
10
10
  sqlalchemy_cratedb/compat/core14.py,sha256=xh_wnNkMUD8aE_VFQPnPeXWsPIFzPg0ZjkhfJBzo3IM,11606
11
11
  sqlalchemy_cratedb/compat/core20.py,sha256=QgNEA4z37ldFI4X9HapbTcga7KvsrdMkoMRjxvCJ4tE,15145
12
- sqlalchemy_cratedb/type/__init__.py,sha256=iTGuaDZ40LiRXgEVd9RfUS5sODB0wqkMQVZMhuO8zyI,98
12
+ sqlalchemy_cratedb/type/__init__.py,sha256=8Bp0RKTFAx-3ETOcG4R4wjhL5rarN7tJLfrue7S6mH8,141
13
13
  sqlalchemy_cratedb/type/array.py,sha256=RrHkFoNmTKtk_6md0OtsR7xofRxdk0PbfO-JFfevBJI,4396
14
14
  sqlalchemy_cratedb/type/geo.py,sha256=9wFGxGMxxN-7_qZTlmWialzaPp_zzFj4U-yVqY_deA0,1377
15
15
  sqlalchemy_cratedb/type/object.py,sha256=-bebiW38vor3grD9qsomzJ_z3zRpNMb2XLLto2fFez4,3016
16
- sqlalchemy_cratedb-0.36.0.dist-info/LICENSE,sha256=s_w3FXmAYQuatqsgvyYLnGyC_13KOqp3W1DUEXO9RpY,10175
17
- sqlalchemy_cratedb-0.36.0.dist-info/METADATA,sha256=V5N1KWycH7hRJlrahQsJ6-ZzsTy_ong3MCCrjVgAgjo,6348
18
- sqlalchemy_cratedb-0.36.0.dist-info/NOTICE,sha256=yU9CWOf_XrVU7fpqGgM9tDjppoMyfHHBmFVMiINZk-k,1167
19
- sqlalchemy_cratedb-0.36.0.dist-info/WHEEL,sha256=DZajD4pwLWue70CAfc7YaxT1wLUciNBvN_TTcvXpltE,110
20
- sqlalchemy_cratedb-0.36.0.dist-info/entry_points.txt,sha256=c14wyCG3OeM64_DUbI_vLVUXR3e3GhDyO_PCjo6UQMU,57
21
- sqlalchemy_cratedb-0.36.0.dist-info/top_level.txt,sha256=UjjXz0burl_-2MApzLzffHG_2RXm6KljZvoGJHISMPo,19
22
- sqlalchemy_cratedb-0.36.0.dist-info/RECORD,,
16
+ sqlalchemy_cratedb/type/vector.py,sha256=5Q2v-RuiNKriSm-EX7tTb1PelqqoBvjDOFDB27Xxk5I,4723
17
+ sqlalchemy_cratedb-0.37.0.dist-info/LICENSE,sha256=s_w3FXmAYQuatqsgvyYLnGyC_13KOqp3W1DUEXO9RpY,10175
18
+ sqlalchemy_cratedb-0.37.0.dist-info/METADATA,sha256=JILkri_5NoeIpVpTpZXxczffrmSH50el_VCp0jvEQWk,6561
19
+ sqlalchemy_cratedb-0.37.0.dist-info/NOTICE,sha256=yU9CWOf_XrVU7fpqGgM9tDjppoMyfHHBmFVMiINZk-k,1167
20
+ sqlalchemy_cratedb-0.37.0.dist-info/WHEEL,sha256=DZajD4pwLWue70CAfc7YaxT1wLUciNBvN_TTcvXpltE,110
21
+ sqlalchemy_cratedb-0.37.0.dist-info/entry_points.txt,sha256=c14wyCG3OeM64_DUbI_vLVUXR3e3GhDyO_PCjo6UQMU,57
22
+ sqlalchemy_cratedb-0.37.0.dist-info/top_level.txt,sha256=UjjXz0burl_-2MApzLzffHG_2RXm6KljZvoGJHISMPo,19
23
+ sqlalchemy_cratedb-0.37.0.dist-info/RECORD,,