sqlalchemy-cratedb 0.38.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sqlalchemy_cratedb/__init__.py +62 -0
- sqlalchemy_cratedb/compat/__init__.py +0 -0
- sqlalchemy_cratedb/compat/api13.py +156 -0
- sqlalchemy_cratedb/compat/core10.py +264 -0
- sqlalchemy_cratedb/compat/core14.py +359 -0
- sqlalchemy_cratedb/compat/core20.py +447 -0
- sqlalchemy_cratedb/compiler.py +372 -0
- sqlalchemy_cratedb/dialect.py +381 -0
- sqlalchemy_cratedb/predicate.py +99 -0
- sqlalchemy_cratedb/sa_version.py +28 -0
- sqlalchemy_cratedb/support/__init__.py +14 -0
- sqlalchemy_cratedb/support/pandas.py +111 -0
- sqlalchemy_cratedb/support/polyfill.py +125 -0
- sqlalchemy_cratedb/support/util.py +41 -0
- sqlalchemy_cratedb/type/__init__.py +4 -0
- sqlalchemy_cratedb/type/array.py +144 -0
- sqlalchemy_cratedb/type/geo.py +48 -0
- sqlalchemy_cratedb/type/object.py +92 -0
- sqlalchemy_cratedb/type/vector.py +173 -0
- sqlalchemy_cratedb-0.38.0.dist-info/LICENSE +178 -0
- sqlalchemy_cratedb-0.38.0.dist-info/METADATA +143 -0
- sqlalchemy_cratedb-0.38.0.dist-info/NOTICE +24 -0
- sqlalchemy_cratedb-0.38.0.dist-info/RECORD +26 -0
- sqlalchemy_cratedb-0.38.0.dist-info/WHEEL +5 -0
- sqlalchemy_cratedb-0.38.0.dist-info/entry_points.txt +2 -0
- sqlalchemy_cratedb-0.38.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,381 @@
|
|
1
|
+
# -*- coding: utf-8; -*-
|
2
|
+
#
|
3
|
+
# Licensed to CRATE Technology GmbH ("Crate") under one or more contributor
|
4
|
+
# license agreements. See the NOTICE file distributed with this work for
|
5
|
+
# additional information regarding copyright ownership. Crate licenses
|
6
|
+
# this file to you under the Apache License, Version 2.0 (the "License");
|
7
|
+
# you may not use this file except in compliance with the License. You may
|
8
|
+
# obtain a copy of the License at
|
9
|
+
#
|
10
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
11
|
+
#
|
12
|
+
# Unless required by applicable law or agreed to in writing, software
|
13
|
+
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
14
|
+
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
15
|
+
# License for the specific language governing permissions and limitations
|
16
|
+
# under the License.
|
17
|
+
#
|
18
|
+
# However, if you have executed another commercial license agreement
|
19
|
+
# with Crate these terms will supersede the license and you may use the
|
20
|
+
# software solely pursuant to the terms of the relevant commercial agreement.
|
21
|
+
|
22
|
+
import logging
|
23
|
+
from datetime import datetime, date
|
24
|
+
|
25
|
+
from sqlalchemy import types as sqltypes
|
26
|
+
from sqlalchemy.engine import default, reflection
|
27
|
+
from sqlalchemy.sql import functions
|
28
|
+
from sqlalchemy.util import asbool, to_list
|
29
|
+
|
30
|
+
from .compiler import (
|
31
|
+
CrateTypeCompiler,
|
32
|
+
CrateDDLCompiler,
|
33
|
+
CrateIdentifierPreparer,
|
34
|
+
)
|
35
|
+
from .sa_version import SA_VERSION, SA_1_4, SA_2_0
|
36
|
+
from .type import FloatVector, ObjectArray, ObjectType
|
37
|
+
|
38
|
+
TYPES_MAP = {
|
39
|
+
"boolean": sqltypes.Boolean,
|
40
|
+
"short": sqltypes.SmallInteger,
|
41
|
+
"smallint": sqltypes.SmallInteger,
|
42
|
+
"timestamp": sqltypes.TIMESTAMP(timezone=False),
|
43
|
+
"timestamp with time zone": sqltypes.TIMESTAMP(timezone=True),
|
44
|
+
"object": ObjectType,
|
45
|
+
"integer": sqltypes.Integer,
|
46
|
+
"long": sqltypes.NUMERIC,
|
47
|
+
"bigint": sqltypes.NUMERIC,
|
48
|
+
"double": sqltypes.DECIMAL,
|
49
|
+
"double precision": sqltypes.DECIMAL,
|
50
|
+
"object_array": ObjectArray,
|
51
|
+
"float": sqltypes.Float,
|
52
|
+
"real": sqltypes.Float,
|
53
|
+
"string": sqltypes.String,
|
54
|
+
"text": sqltypes.String,
|
55
|
+
"float_vector": FloatVector,
|
56
|
+
}
|
57
|
+
try:
|
58
|
+
# SQLAlchemy >= 1.1
|
59
|
+
from sqlalchemy.types import ARRAY
|
60
|
+
TYPES_MAP["integer_array"] = ARRAY(sqltypes.Integer)
|
61
|
+
TYPES_MAP["boolean_array"] = ARRAY(sqltypes.Boolean)
|
62
|
+
TYPES_MAP["short_array"] = ARRAY(sqltypes.SmallInteger)
|
63
|
+
TYPES_MAP["smallint_array"] = ARRAY(sqltypes.SmallInteger)
|
64
|
+
TYPES_MAP["timestamp_array"] = ARRAY(sqltypes.TIMESTAMP(timezone=False))
|
65
|
+
TYPES_MAP["timestamp with time zone_array"] = ARRAY(sqltypes.TIMESTAMP(timezone=True))
|
66
|
+
TYPES_MAP["long_array"] = ARRAY(sqltypes.NUMERIC)
|
67
|
+
TYPES_MAP["bigint_array"] = ARRAY(sqltypes.NUMERIC)
|
68
|
+
TYPES_MAP["double_array"] = ARRAY(sqltypes.DECIMAL)
|
69
|
+
TYPES_MAP["double precision_array"] = ARRAY(sqltypes.DECIMAL)
|
70
|
+
TYPES_MAP["float_array"] = ARRAY(sqltypes.Float)
|
71
|
+
TYPES_MAP["real_array"] = ARRAY(sqltypes.Float)
|
72
|
+
TYPES_MAP["string_array"] = ARRAY(sqltypes.String)
|
73
|
+
TYPES_MAP["text_array"] = ARRAY(sqltypes.String)
|
74
|
+
except Exception:
|
75
|
+
pass
|
76
|
+
|
77
|
+
|
78
|
+
log = logging.getLogger(__name__)
|
79
|
+
|
80
|
+
|
81
|
+
class Date(sqltypes.Date):
|
82
|
+
def bind_processor(self, dialect):
|
83
|
+
def process(value):
|
84
|
+
if value is not None:
|
85
|
+
assert isinstance(value, date)
|
86
|
+
return value.strftime('%Y-%m-%d')
|
87
|
+
return process
|
88
|
+
|
89
|
+
def result_processor(self, dialect, coltype):
|
90
|
+
def process(value):
|
91
|
+
if not value:
|
92
|
+
return
|
93
|
+
try:
|
94
|
+
return datetime.utcfromtimestamp(value / 1e3).date()
|
95
|
+
except TypeError:
|
96
|
+
pass
|
97
|
+
|
98
|
+
# Crate doesn't really have datetime or date types but a
|
99
|
+
# timestamp type. The "date" mapping (conversion to long)
|
100
|
+
# is only applied if the schema definition for the column exists
|
101
|
+
# and if the sql insert statement was used.
|
102
|
+
# In case of dynamic mapping or using the rest indexing endpoint
|
103
|
+
# the date will be returned in the format it was inserted.
|
104
|
+
log.warning(
|
105
|
+
"Received timestamp isn't a long value."
|
106
|
+
"Trying to parse as date string and then as datetime string")
|
107
|
+
try:
|
108
|
+
return datetime.strptime(value, '%Y-%m-%d').date()
|
109
|
+
except ValueError:
|
110
|
+
return datetime.strptime(value, '%Y-%m-%dT%H:%M:%S.%fZ').date()
|
111
|
+
return process
|
112
|
+
|
113
|
+
|
114
|
+
class DateTime(sqltypes.DateTime):
|
115
|
+
|
116
|
+
def bind_processor(self, dialect):
|
117
|
+
def process(value):
|
118
|
+
if isinstance(value, (datetime, date)):
|
119
|
+
return value.strftime('%Y-%m-%dT%H:%M:%S.%f%z')
|
120
|
+
return value
|
121
|
+
return process
|
122
|
+
|
123
|
+
def result_processor(self, dialect, coltype):
|
124
|
+
def process(value):
|
125
|
+
if not value:
|
126
|
+
return
|
127
|
+
try:
|
128
|
+
return datetime.utcfromtimestamp(value / 1e3)
|
129
|
+
except TypeError:
|
130
|
+
pass
|
131
|
+
|
132
|
+
# Crate doesn't really have datetime or date types but a
|
133
|
+
# timestamp type. The "date" mapping (conversion to long)
|
134
|
+
# is only applied if the schema definition for the column exists
|
135
|
+
# and if the sql insert statement was used.
|
136
|
+
# In case of dynamic mapping or using the rest indexing endpoint
|
137
|
+
# the date will be returned in the format it was inserted.
|
138
|
+
log.warning(
|
139
|
+
"Received timestamp isn't a long value."
|
140
|
+
"Trying to parse as datetime string and then as date string")
|
141
|
+
try:
|
142
|
+
return datetime.strptime(value, '%Y-%m-%dT%H:%M:%S.%fZ')
|
143
|
+
except ValueError:
|
144
|
+
return datetime.strptime(value, '%Y-%m-%d')
|
145
|
+
return process
|
146
|
+
|
147
|
+
|
148
|
+
colspecs = {
|
149
|
+
sqltypes.Date: Date,
|
150
|
+
sqltypes.DateTime: DateTime,
|
151
|
+
sqltypes.TIMESTAMP: DateTime,
|
152
|
+
}
|
153
|
+
|
154
|
+
|
155
|
+
if SA_VERSION >= SA_2_0:
|
156
|
+
from .compat.core20 import CrateCompilerSA20
|
157
|
+
statement_compiler = CrateCompilerSA20
|
158
|
+
elif SA_VERSION >= SA_1_4:
|
159
|
+
from .compat.core14 import CrateCompilerSA14
|
160
|
+
statement_compiler = CrateCompilerSA14
|
161
|
+
else:
|
162
|
+
from .compat.core10 import CrateCompilerSA10
|
163
|
+
statement_compiler = CrateCompilerSA10
|
164
|
+
|
165
|
+
|
166
|
+
class CrateDialect(default.DefaultDialect):
|
167
|
+
name = 'crate'
|
168
|
+
driver = 'crate-python'
|
169
|
+
default_paramstyle = 'qmark'
|
170
|
+
statement_compiler = statement_compiler
|
171
|
+
ddl_compiler = CrateDDLCompiler
|
172
|
+
type_compiler = CrateTypeCompiler
|
173
|
+
preparer = CrateIdentifierPreparer
|
174
|
+
use_insertmanyvalues = True
|
175
|
+
use_insertmanyvalues_wo_returning = True
|
176
|
+
supports_multivalues_insert = True
|
177
|
+
supports_native_boolean = True
|
178
|
+
supports_statement_cache = True
|
179
|
+
colspecs = colspecs
|
180
|
+
implicit_returning = True
|
181
|
+
insert_returning = True
|
182
|
+
update_returning = True
|
183
|
+
|
184
|
+
def __init__(self, **kwargs):
|
185
|
+
default.DefaultDialect.__init__(self, **kwargs)
|
186
|
+
|
187
|
+
# CrateDB does not need `OBJECT` types to be serialized as JSON.
|
188
|
+
# Corresponding data is forwarded 1:1, and will get marshalled
|
189
|
+
# by the low-level driver.
|
190
|
+
self._json_deserializer = lambda x: x
|
191
|
+
self._json_serializer = lambda x: x
|
192
|
+
|
193
|
+
# Currently, our SQL parser doesn't support unquoted column names that
|
194
|
+
# start with _. Adding it here causes sqlalchemy to quote such columns.
|
195
|
+
self.identifier_preparer.illegal_initial_characters.add('_')
|
196
|
+
|
197
|
+
def initialize(self, connection):
|
198
|
+
# get lowest server version
|
199
|
+
self.server_version_info = \
|
200
|
+
self._get_server_version_info(connection)
|
201
|
+
# get default schema name
|
202
|
+
self.default_schema_name = \
|
203
|
+
self._get_default_schema_name(connection)
|
204
|
+
|
205
|
+
def do_rollback(self, connection):
|
206
|
+
# if any exception is raised by the dbapi, sqlalchemy by default
|
207
|
+
# attempts to do a rollback crate doesn't support rollbacks.
|
208
|
+
# implementing this as noop seems to cause sqlalchemy to propagate the
|
209
|
+
# original exception to the user
|
210
|
+
pass
|
211
|
+
|
212
|
+
def connect(self, host=None, port=None, *args, **kwargs):
|
213
|
+
server = None
|
214
|
+
if host:
|
215
|
+
server = '{0}:{1}'.format(host, port or '4200')
|
216
|
+
if 'servers' in kwargs:
|
217
|
+
server = kwargs.pop('servers')
|
218
|
+
servers = to_list(server)
|
219
|
+
if servers:
|
220
|
+
use_ssl = asbool(kwargs.pop("ssl", False))
|
221
|
+
if use_ssl:
|
222
|
+
servers = ["https://" + server for server in servers]
|
223
|
+
return self.dbapi.connect(servers=servers, **kwargs)
|
224
|
+
return self.dbapi.connect(**kwargs)
|
225
|
+
|
226
|
+
def _get_default_schema_name(self, connection):
|
227
|
+
return 'doc'
|
228
|
+
|
229
|
+
def _get_effective_schema_name(self, connection):
|
230
|
+
schema_name_raw = connection.engine.url.query.get("schema")
|
231
|
+
schema_name = None
|
232
|
+
if isinstance(schema_name_raw, str):
|
233
|
+
schema_name = schema_name_raw
|
234
|
+
elif isinstance(schema_name_raw, tuple):
|
235
|
+
schema_name = schema_name_raw[0]
|
236
|
+
return schema_name
|
237
|
+
|
238
|
+
def _get_server_version_info(self, connection):
|
239
|
+
return tuple(connection.connection.lowest_server_version.version)
|
240
|
+
|
241
|
+
@classmethod
|
242
|
+
def import_dbapi(cls):
|
243
|
+
from crate import client
|
244
|
+
return client
|
245
|
+
|
246
|
+
@classmethod
|
247
|
+
def dbapi(cls):
|
248
|
+
return cls.import_dbapi()
|
249
|
+
|
250
|
+
def has_schema(self, connection, schema, **kw):
|
251
|
+
return schema in self.get_schema_names(connection, **kw)
|
252
|
+
|
253
|
+
def has_table(self, connection, table_name, schema=None, **kw):
|
254
|
+
return table_name in self.get_table_names(connection, schema=schema, **kw)
|
255
|
+
|
256
|
+
@reflection.cache
|
257
|
+
def get_schema_names(self, connection, **kw):
|
258
|
+
cursor = connection.exec_driver_sql(
|
259
|
+
"select schema_name "
|
260
|
+
"from information_schema.schemata "
|
261
|
+
"order by schema_name asc"
|
262
|
+
)
|
263
|
+
return [row[0] for row in cursor.fetchall()]
|
264
|
+
|
265
|
+
@reflection.cache
|
266
|
+
def get_table_names(self, connection, schema=None, **kw):
|
267
|
+
if schema is None:
|
268
|
+
schema = self._get_effective_schema_name(connection)
|
269
|
+
cursor = connection.exec_driver_sql(
|
270
|
+
"SELECT table_name FROM information_schema.tables "
|
271
|
+
"WHERE {0} = ? "
|
272
|
+
"AND table_type = 'BASE TABLE' "
|
273
|
+
"ORDER BY table_name ASC, {0} ASC".format(self.schema_column),
|
274
|
+
(schema or self.default_schema_name, )
|
275
|
+
)
|
276
|
+
return [row[0] for row in cursor.fetchall()]
|
277
|
+
|
278
|
+
@reflection.cache
|
279
|
+
def get_view_names(self, connection, schema=None, **kw):
|
280
|
+
cursor = connection.exec_driver_sql(
|
281
|
+
"SELECT table_name FROM information_schema.views "
|
282
|
+
"ORDER BY table_name ASC, {0} ASC".format(self.schema_column),
|
283
|
+
(schema or self.default_schema_name, )
|
284
|
+
)
|
285
|
+
return [row[0] for row in cursor.fetchall()]
|
286
|
+
|
287
|
+
@reflection.cache
|
288
|
+
def get_columns(self, connection, table_name, schema=None, **kw):
|
289
|
+
query = "SELECT column_name, data_type " \
|
290
|
+
"FROM information_schema.columns " \
|
291
|
+
"WHERE table_name = ? AND {0} = ? " \
|
292
|
+
"AND column_name !~ ?" \
|
293
|
+
.format(self.schema_column)
|
294
|
+
cursor = connection.exec_driver_sql(
|
295
|
+
query,
|
296
|
+
(table_name,
|
297
|
+
schema or self.default_schema_name,
|
298
|
+
r"(.*)\[\'(.*)\'\]") # regex to filter subscript
|
299
|
+
)
|
300
|
+
return [self._create_column_info(row) for row in cursor.fetchall()]
|
301
|
+
|
302
|
+
@reflection.cache
|
303
|
+
def get_pk_constraint(self, engine, table_name, schema=None, **kw):
|
304
|
+
if self.server_version_info >= (3, 0, 0):
|
305
|
+
query = """SELECT column_name
|
306
|
+
FROM information_schema.key_column_usage
|
307
|
+
WHERE table_name = ? AND table_schema = ?"""
|
308
|
+
|
309
|
+
def result_fun(result):
|
310
|
+
rows = result.fetchall()
|
311
|
+
return set(map(lambda el: el[0], rows))
|
312
|
+
|
313
|
+
elif self.server_version_info >= (2, 3, 0):
|
314
|
+
query = """SELECT column_name
|
315
|
+
FROM information_schema.key_column_usage
|
316
|
+
WHERE table_name = ? AND table_catalog = ?"""
|
317
|
+
|
318
|
+
def result_fun(result):
|
319
|
+
rows = result.fetchall()
|
320
|
+
return set(map(lambda el: el[0], rows))
|
321
|
+
|
322
|
+
else:
|
323
|
+
query = """SELECT constraint_name
|
324
|
+
FROM information_schema.table_constraints
|
325
|
+
WHERE table_name = ? AND {schema_col} = ?
|
326
|
+
AND constraint_type='PRIMARY_KEY'
|
327
|
+
""".format(schema_col=self.schema_column)
|
328
|
+
|
329
|
+
def result_fun(result):
|
330
|
+
rows = result.fetchone()
|
331
|
+
return set(rows[0] if rows else [])
|
332
|
+
|
333
|
+
pk_result = engine.exec_driver_sql(
|
334
|
+
query,
|
335
|
+
(table_name, schema or self.default_schema_name)
|
336
|
+
)
|
337
|
+
pks = result_fun(pk_result)
|
338
|
+
return {'constrained_columns': list(sorted(pks)),
|
339
|
+
'name': 'PRIMARY KEY'}
|
340
|
+
|
341
|
+
@reflection.cache
|
342
|
+
def get_foreign_keys(self, connection, table_name, schema=None,
|
343
|
+
postgresql_ignore_search_path=False, **kw):
|
344
|
+
# Crate doesn't support Foreign Keys, so this stays empty
|
345
|
+
return []
|
346
|
+
|
347
|
+
@reflection.cache
|
348
|
+
def get_indexes(self, connection, table_name, schema, **kw):
|
349
|
+
return []
|
350
|
+
|
351
|
+
@property
|
352
|
+
def schema_column(self):
|
353
|
+
return "table_schema"
|
354
|
+
|
355
|
+
def _create_column_info(self, row):
|
356
|
+
return {
|
357
|
+
'name': row[0],
|
358
|
+
'type': self._resolve_type(row[1]),
|
359
|
+
# In Crate every column is nullable except PK
|
360
|
+
# Primary Key Constraints are not nullable anyway, no matter what
|
361
|
+
# we return here, so it's fine to return always `True`
|
362
|
+
'nullable': True
|
363
|
+
}
|
364
|
+
|
365
|
+
def _resolve_type(self, type_):
|
366
|
+
return TYPES_MAP.get(type_, sqltypes.UserDefinedType)
|
367
|
+
|
368
|
+
def has_ilike_operator(self):
|
369
|
+
"""
|
370
|
+
Only CrateDB 4.1.0 and higher implements the `ILIKE` operator.
|
371
|
+
"""
|
372
|
+
server_version_info = self.server_version_info
|
373
|
+
return server_version_info is not None and server_version_info >= (4, 1, 0)
|
374
|
+
|
375
|
+
|
376
|
+
class DateTrunc(functions.GenericFunction):
|
377
|
+
name = "date_trunc"
|
378
|
+
type = sqltypes.TIMESTAMP
|
379
|
+
|
380
|
+
|
381
|
+
dialect = CrateDialect
|
@@ -0,0 +1,99 @@
|
|
1
|
+
# -*- coding: utf-8; -*-
|
2
|
+
#
|
3
|
+
# Licensed to CRATE Technology GmbH ("Crate") under one or more contributor
|
4
|
+
# license agreements. See the NOTICE file distributed with this work for
|
5
|
+
# additional information regarding copyright ownership. Crate licenses
|
6
|
+
# this file to you under the Apache License, Version 2.0 (the "License");
|
7
|
+
# you may not use this file except in compliance with the License. You may
|
8
|
+
# obtain a copy of the License at
|
9
|
+
#
|
10
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
11
|
+
#
|
12
|
+
# Unless required by applicable law or agreed to in writing, software
|
13
|
+
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
14
|
+
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
15
|
+
# License for the specific language governing permissions and limitations
|
16
|
+
# under the License.
|
17
|
+
#
|
18
|
+
# However, if you have executed another commercial license agreement
|
19
|
+
# with Crate these terms will supersede the license and you may use the
|
20
|
+
# software solely pursuant to the terms of the relevant commercial agreement.
|
21
|
+
|
22
|
+
from sqlalchemy.sql.expression import ColumnElement, literal
|
23
|
+
from sqlalchemy.ext.compiler import compiles
|
24
|
+
|
25
|
+
|
26
|
+
class Match(ColumnElement):
|
27
|
+
inherit_cache = True
|
28
|
+
|
29
|
+
def __init__(self, column, term, match_type=None, options=None):
|
30
|
+
super(Match, self).__init__()
|
31
|
+
self.column = column
|
32
|
+
self.term = term
|
33
|
+
self.match_type = match_type
|
34
|
+
self.options = options
|
35
|
+
|
36
|
+
def compile_column(self, compiler):
|
37
|
+
if isinstance(self.column, dict):
|
38
|
+
column = ', '.join(
|
39
|
+
sorted(["{0} {1}".format(compiler.process(k), v)
|
40
|
+
for k, v in self.column.items()])
|
41
|
+
)
|
42
|
+
return "({0})".format(column)
|
43
|
+
else:
|
44
|
+
return "{0}".format(compiler.process(self.column))
|
45
|
+
|
46
|
+
def compile_term(self, compiler):
|
47
|
+
return compiler.process(literal(self.term))
|
48
|
+
|
49
|
+
def compile_using(self, compiler):
|
50
|
+
if self.match_type:
|
51
|
+
using = "using {0}".format(self.match_type)
|
52
|
+
with_clause = self.with_clause()
|
53
|
+
if with_clause:
|
54
|
+
using = ' '.join([using, with_clause])
|
55
|
+
return using
|
56
|
+
if self.options:
|
57
|
+
raise ValueError("missing match_type. " +
|
58
|
+
"It's not allowed to specify options " +
|
59
|
+
"without match_type")
|
60
|
+
|
61
|
+
def with_clause(self):
|
62
|
+
if self.options:
|
63
|
+
options = ', '.join(
|
64
|
+
sorted(["{0}={1}".format(k, v)
|
65
|
+
for k, v in self.options.items()])
|
66
|
+
)
|
67
|
+
|
68
|
+
return "with ({0})".format(options)
|
69
|
+
|
70
|
+
|
71
|
+
def match(column, term, match_type=None, options=None):
|
72
|
+
"""Generates match predicate for fulltext search
|
73
|
+
|
74
|
+
:param column: A reference to a column or an index, or a subcolumn, or a
|
75
|
+
dictionary of subcolumns with boost values.
|
76
|
+
|
77
|
+
:param term: The term to match against. This string is analyzed and the
|
78
|
+
resulting tokens are compared to the index.
|
79
|
+
|
80
|
+
:param match_type (optional): The match type. Determine how the term is
|
81
|
+
applied and the score calculated.
|
82
|
+
|
83
|
+
:param options (optional): The match options. Specify match type behaviour.
|
84
|
+
(Not possible without a specified match type.) Match options must be
|
85
|
+
supplied as a dictionary.
|
86
|
+
"""
|
87
|
+
return Match(column, term, match_type, options)
|
88
|
+
|
89
|
+
|
90
|
+
@compiles(Match)
|
91
|
+
def compile_match(match, compiler, **kwargs):
|
92
|
+
func = "match(%s, %s)" % (
|
93
|
+
match.compile_column(compiler),
|
94
|
+
match.compile_term(compiler)
|
95
|
+
)
|
96
|
+
using = match.compile_using(compiler)
|
97
|
+
if using:
|
98
|
+
func = ' '.join([func, using])
|
99
|
+
return func
|
@@ -0,0 +1,28 @@
|
|
1
|
+
# -*- coding: utf-8; -*-
|
2
|
+
#
|
3
|
+
# Licensed to CRATE Technology GmbH ("Crate") under one or more contributor
|
4
|
+
# license agreements. See the NOTICE file distributed with this work for
|
5
|
+
# additional information regarding copyright ownership. Crate licenses
|
6
|
+
# this file to you under the Apache License, Version 2.0 (the "License");
|
7
|
+
# you may not use this file except in compliance with the License. You may
|
8
|
+
# obtain a copy of the License at
|
9
|
+
#
|
10
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
11
|
+
#
|
12
|
+
# Unless required by applicable law or agreed to in writing, software
|
13
|
+
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
14
|
+
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
15
|
+
# License for the specific language governing permissions and limitations
|
16
|
+
# under the License.
|
17
|
+
#
|
18
|
+
# However, if you have executed another commercial license agreement
|
19
|
+
# with Crate these terms will supersede the license and you may use the
|
20
|
+
# software solely pursuant to the terms of the relevant commercial agreement.
|
21
|
+
|
22
|
+
import sqlalchemy as sa
|
23
|
+
from verlib2 import Version
|
24
|
+
|
25
|
+
SA_VERSION = Version(sa.__version__)
|
26
|
+
|
27
|
+
SA_1_4 = Version('1.4.0b1')
|
28
|
+
SA_2_0 = Version('2.0.0')
|
@@ -0,0 +1,14 @@
|
|
1
|
+
from sqlalchemy_cratedb.support.pandas import insert_bulk, table_kwargs
|
2
|
+
from sqlalchemy_cratedb.support.polyfill import check_uniqueness_factory, refresh_after_dml, \
|
3
|
+
patch_autoincrement_timestamp
|
4
|
+
from sqlalchemy_cratedb.support.util import refresh_table, refresh_dirty
|
5
|
+
|
6
|
+
__all__ = [
|
7
|
+
check_uniqueness_factory,
|
8
|
+
insert_bulk,
|
9
|
+
patch_autoincrement_timestamp,
|
10
|
+
refresh_after_dml,
|
11
|
+
refresh_dirty,
|
12
|
+
refresh_table,
|
13
|
+
table_kwargs,
|
14
|
+
]
|
@@ -0,0 +1,111 @@
|
|
1
|
+
# -*- coding: utf-8; -*-
|
2
|
+
#
|
3
|
+
# Licensed to CRATE Technology GmbH ("Crate") under one or more contributor
|
4
|
+
# license agreements. See the NOTICE file distributed with this work for
|
5
|
+
# additional information regarding copyright ownership. Crate licenses
|
6
|
+
# this file to you under the Apache License, Version 2.0 (the "License");
|
7
|
+
# you may not use this file except in compliance with the License. You may
|
8
|
+
# obtain a copy of the License at
|
9
|
+
#
|
10
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
11
|
+
#
|
12
|
+
# Unless required by applicable law or agreed to in writing, software
|
13
|
+
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
14
|
+
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
15
|
+
# License for the specific language governing permissions and limitations
|
16
|
+
# under the License.
|
17
|
+
#
|
18
|
+
# However, if you have executed another commercial license agreement
|
19
|
+
# with Crate these terms will supersede the license and you may use the
|
20
|
+
# software solely pursuant to the terms of the relevant commercial agreement.
|
21
|
+
from contextlib import contextmanager
|
22
|
+
from typing import Any
|
23
|
+
from unittest.mock import patch
|
24
|
+
|
25
|
+
import logging
|
26
|
+
|
27
|
+
import sqlalchemy as sa
|
28
|
+
|
29
|
+
from sqlalchemy_cratedb import SA_VERSION, SA_2_0
|
30
|
+
|
31
|
+
logger = logging.getLogger(__name__)
|
32
|
+
|
33
|
+
|
34
|
+
def insert_bulk(pd_table, conn, keys, data_iter):
|
35
|
+
"""
|
36
|
+
Use CrateDB's "bulk operations" endpoint as a fast path for pandas' and Dask's `to_sql()` [1] method.
|
37
|
+
|
38
|
+
The idea is to break out of SQLAlchemy, compile the insert statement, and use the raw
|
39
|
+
DBAPI connection client, in order to invoke a request using `bulk_parameters` [2]::
|
40
|
+
|
41
|
+
cursor.execute(sql=sql, bulk_parameters=data)
|
42
|
+
|
43
|
+
The vanilla implementation, used by SQLAlchemy, is::
|
44
|
+
|
45
|
+
data = [dict(zip(keys, row)) for row in data_iter]
|
46
|
+
conn.execute(pd_table.table.insert(), data)
|
47
|
+
|
48
|
+
Batch chunking will happen outside of this function, for example [3] demonstrates
|
49
|
+
the relevant code in `pandas.io.sql`.
|
50
|
+
|
51
|
+
[1] https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.to_sql.html
|
52
|
+
[2] https://cratedb.com/docs/crate/reference/en/latest/interfaces/http.html#bulk-operations
|
53
|
+
[3] https://github.com/pandas-dev/pandas/blob/v2.0.1/pandas/io/sql.py#L1011-L1027
|
54
|
+
"""
|
55
|
+
|
56
|
+
# Compile SQL statement and materialize batch.
|
57
|
+
sql = str(pd_table.table.insert().compile(bind=conn))
|
58
|
+
data = list(data_iter)
|
59
|
+
|
60
|
+
# For debugging and tracing the batches running through this method.
|
61
|
+
if logger.level == logging.DEBUG:
|
62
|
+
logger.debug(f"Bulk SQL: {sql}")
|
63
|
+
logger.debug(f"Bulk records: {len(data)}")
|
64
|
+
# logger.debug(f"Bulk data: {data}")
|
65
|
+
|
66
|
+
# Invoke bulk insert operation.
|
67
|
+
cursor = conn._dbapi_connection.cursor()
|
68
|
+
cursor.execute(sql=sql, bulk_parameters=data)
|
69
|
+
cursor.close()
|
70
|
+
|
71
|
+
|
72
|
+
@contextmanager
|
73
|
+
def table_kwargs(**kwargs):
|
74
|
+
"""
|
75
|
+
Context manager for adding SQLAlchemy dialect-specific table options at runtime.
|
76
|
+
|
77
|
+
In certain cases where SQLAlchemy orchestration is implemented within a
|
78
|
+
framework, like at this spot [1] in pandas' `SQLTable._create_table_setup`,
|
79
|
+
it is not easily possible to forward SQLAlchemy dialect options at table
|
80
|
+
creation time.
|
81
|
+
|
82
|
+
In order to augment the SQL DDL statement to make it honor database-specific
|
83
|
+
dialect options, the only way to work around the unfortunate situation is by
|
84
|
+
monkey-patching the call to `sa.Table()` at runtime, relaying additional
|
85
|
+
dialect options through corresponding keyword arguments in their original
|
86
|
+
`<dialect>_<kwarg>` format [2].
|
87
|
+
|
88
|
+
[1] https://github.com/pandas-dev/pandas/blob/v2.2.2/pandas/io/sql.py#L1282-L1285
|
89
|
+
[2] https://docs.sqlalchemy.org/en/20/core/foundation.html#sqlalchemy.sql.base.DialectKWArgs.dialect_kwargs
|
90
|
+
"""
|
91
|
+
|
92
|
+
if SA_VERSION < SA_2_0:
|
93
|
+
_init_dist = sa.sql.schema.Table._init
|
94
|
+
|
95
|
+
def _init(self, name, metadata, *args, **kwargs_effective):
|
96
|
+
kwargs_effective.update(kwargs)
|
97
|
+
return _init_dist(self, name, metadata, *args, **kwargs_effective)
|
98
|
+
|
99
|
+
with patch("sqlalchemy.sql.schema.Table._init", _init):
|
100
|
+
yield
|
101
|
+
|
102
|
+
else:
|
103
|
+
new_dist = sa.sql.schema.Table._new
|
104
|
+
|
105
|
+
def _new(cls, *args: Any, **kw: Any) -> Any:
|
106
|
+
kw.update(kwargs)
|
107
|
+
table = new_dist(cls, *args, **kw)
|
108
|
+
return table
|
109
|
+
|
110
|
+
with patch("sqlalchemy.sql.schema.Table._new", _new):
|
111
|
+
yield
|