sqlalchemy-cratedb 0.41.0.dev0__py3-none-any.whl
Sign up to get free protection for your applications and to get access to all the features.
- sqlalchemy_cratedb/__init__.py +65 -0
- sqlalchemy_cratedb/compat/__init__.py +0 -0
- sqlalchemy_cratedb/compat/api13.py +152 -0
- sqlalchemy_cratedb/compat/core10.py +253 -0
- sqlalchemy_cratedb/compat/core14.py +337 -0
- sqlalchemy_cratedb/compat/core20.py +423 -0
- sqlalchemy_cratedb/compiler.py +361 -0
- sqlalchemy_cratedb/dialect.py +414 -0
- sqlalchemy_cratedb/predicate.py +96 -0
- sqlalchemy_cratedb/sa_version.py +28 -0
- sqlalchemy_cratedb/support/__init__.py +18 -0
- sqlalchemy_cratedb/support/pandas.py +110 -0
- sqlalchemy_cratedb/support/polyfill.py +130 -0
- sqlalchemy_cratedb/support/util.py +82 -0
- sqlalchemy_cratedb/type/__init__.py +13 -0
- sqlalchemy_cratedb/type/array.py +143 -0
- sqlalchemy_cratedb/type/geo.py +43 -0
- sqlalchemy_cratedb/type/object.py +94 -0
- sqlalchemy_cratedb/type/vector.py +176 -0
- sqlalchemy_cratedb-0.41.0.dev0.dist-info/LICENSE +178 -0
- sqlalchemy_cratedb-0.41.0.dev0.dist-info/METADATA +143 -0
- sqlalchemy_cratedb-0.41.0.dev0.dist-info/NOTICE +24 -0
- sqlalchemy_cratedb-0.41.0.dev0.dist-info/RECORD +26 -0
- sqlalchemy_cratedb-0.41.0.dev0.dist-info/WHEEL +5 -0
- sqlalchemy_cratedb-0.41.0.dev0.dist-info/entry_points.txt +2 -0
- sqlalchemy_cratedb-0.41.0.dev0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,414 @@
|
|
1
|
+
# -*- coding: utf-8; -*-
|
2
|
+
#
|
3
|
+
# Licensed to CRATE Technology GmbH ("Crate") under one or more contributor
|
4
|
+
# license agreements. See the NOTICE file distributed with this work for
|
5
|
+
# additional information regarding copyright ownership. Crate licenses
|
6
|
+
# this file to you under the Apache License, Version 2.0 (the "License");
|
7
|
+
# you may not use this file except in compliance with the License. You may
|
8
|
+
# obtain a copy of the License at
|
9
|
+
#
|
10
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
11
|
+
#
|
12
|
+
# Unless required by applicable law or agreed to in writing, software
|
13
|
+
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
14
|
+
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
15
|
+
# License for the specific language governing permissions and limitations
|
16
|
+
# under the License.
|
17
|
+
#
|
18
|
+
# However, if you have executed another commercial license agreement
|
19
|
+
# with Crate these terms will supersede the license and you may use the
|
20
|
+
# software solely pursuant to the terms of the relevant commercial agreement.
|
21
|
+
|
22
|
+
import logging
|
23
|
+
from datetime import date, datetime
|
24
|
+
|
25
|
+
from sqlalchemy import types as sqltypes
|
26
|
+
from sqlalchemy.engine import default, reflection
|
27
|
+
from sqlalchemy.sql import functions
|
28
|
+
from sqlalchemy.util import asbool, to_list
|
29
|
+
|
30
|
+
from .compiler import (
|
31
|
+
CrateDDLCompiler,
|
32
|
+
CrateIdentifierPreparer,
|
33
|
+
CrateTypeCompiler,
|
34
|
+
)
|
35
|
+
from .sa_version import SA_1_4, SA_2_0, SA_VERSION
|
36
|
+
from .type import FloatVector, ObjectArray, ObjectType
|
37
|
+
|
38
|
+
TYPES_MAP = {
|
39
|
+
"boolean": sqltypes.Boolean,
|
40
|
+
"short": sqltypes.SmallInteger,
|
41
|
+
"smallint": sqltypes.SmallInteger,
|
42
|
+
"timestamp": sqltypes.TIMESTAMP(timezone=False),
|
43
|
+
"timestamp with time zone": sqltypes.TIMESTAMP(timezone=True),
|
44
|
+
"object": ObjectType,
|
45
|
+
"integer": sqltypes.Integer,
|
46
|
+
"long": sqltypes.NUMERIC,
|
47
|
+
"bigint": sqltypes.NUMERIC,
|
48
|
+
"double": sqltypes.DECIMAL,
|
49
|
+
"double precision": sqltypes.DECIMAL,
|
50
|
+
"object_array": ObjectArray,
|
51
|
+
"float": sqltypes.Float,
|
52
|
+
"real": sqltypes.Float,
|
53
|
+
"string": sqltypes.String,
|
54
|
+
"text": sqltypes.String,
|
55
|
+
"float_vector": FloatVector,
|
56
|
+
}
|
57
|
+
|
58
|
+
# Needed for SQLAlchemy >= 1.1.
|
59
|
+
# TODO: Dissolve.
|
60
|
+
try:
|
61
|
+
from sqlalchemy.types import ARRAY
|
62
|
+
|
63
|
+
TYPES_MAP["integer_array"] = ARRAY(sqltypes.Integer)
|
64
|
+
TYPES_MAP["boolean_array"] = ARRAY(sqltypes.Boolean)
|
65
|
+
TYPES_MAP["short_array"] = ARRAY(sqltypes.SmallInteger)
|
66
|
+
TYPES_MAP["smallint_array"] = ARRAY(sqltypes.SmallInteger)
|
67
|
+
TYPES_MAP["timestamp_array"] = ARRAY(sqltypes.TIMESTAMP(timezone=False))
|
68
|
+
TYPES_MAP["timestamp with time zone_array"] = ARRAY(sqltypes.TIMESTAMP(timezone=True))
|
69
|
+
TYPES_MAP["long_array"] = ARRAY(sqltypes.NUMERIC)
|
70
|
+
TYPES_MAP["bigint_array"] = ARRAY(sqltypes.NUMERIC)
|
71
|
+
TYPES_MAP["double_array"] = ARRAY(sqltypes.DECIMAL)
|
72
|
+
TYPES_MAP["double precision_array"] = ARRAY(sqltypes.DECIMAL)
|
73
|
+
TYPES_MAP["float_array"] = ARRAY(sqltypes.Float)
|
74
|
+
TYPES_MAP["real_array"] = ARRAY(sqltypes.Float)
|
75
|
+
TYPES_MAP["string_array"] = ARRAY(sqltypes.String)
|
76
|
+
TYPES_MAP["text_array"] = ARRAY(sqltypes.String)
|
77
|
+
except Exception: # noqa: S110
|
78
|
+
pass
|
79
|
+
|
80
|
+
|
81
|
+
log = logging.getLogger(__name__)
|
82
|
+
|
83
|
+
|
84
|
+
class Date(sqltypes.Date):
|
85
|
+
def bind_processor(self, dialect):
|
86
|
+
def process(value):
|
87
|
+
if value is not None:
|
88
|
+
assert isinstance(value, date) # noqa: S101
|
89
|
+
return value.strftime("%Y-%m-%d")
|
90
|
+
return None
|
91
|
+
|
92
|
+
return process
|
93
|
+
|
94
|
+
def result_processor(self, dialect, coltype):
|
95
|
+
def process(value):
|
96
|
+
if not value:
|
97
|
+
return None
|
98
|
+
try:
|
99
|
+
return datetime.utcfromtimestamp(value / 1e3).date()
|
100
|
+
except TypeError:
|
101
|
+
pass
|
102
|
+
|
103
|
+
# Crate doesn't really have datetime or date types but a
|
104
|
+
# timestamp type. The "date" mapping (conversion to long)
|
105
|
+
# is only applied if the schema definition for the column exists
|
106
|
+
# and if the sql insert statement was used.
|
107
|
+
# In case of dynamic mapping or using the rest indexing endpoint
|
108
|
+
# the date will be returned in the format it was inserted.
|
109
|
+
log.warning(
|
110
|
+
"Received timestamp isn't a long value."
|
111
|
+
"Trying to parse as date string and then as datetime string"
|
112
|
+
)
|
113
|
+
try:
|
114
|
+
return datetime.strptime(value, "%Y-%m-%d").date()
|
115
|
+
except ValueError:
|
116
|
+
return datetime.strptime(value, "%Y-%m-%dT%H:%M:%S.%fZ").date()
|
117
|
+
|
118
|
+
return process
|
119
|
+
|
120
|
+
|
121
|
+
class DateTime(sqltypes.DateTime):
|
122
|
+
def bind_processor(self, dialect):
|
123
|
+
def process(value):
|
124
|
+
if isinstance(value, (datetime, date)):
|
125
|
+
return value.strftime("%Y-%m-%dT%H:%M:%S.%f%z")
|
126
|
+
return value
|
127
|
+
|
128
|
+
return process
|
129
|
+
|
130
|
+
def result_processor(self, dialect, coltype):
|
131
|
+
def process(value):
|
132
|
+
if not value:
|
133
|
+
return None
|
134
|
+
try:
|
135
|
+
return datetime.utcfromtimestamp(value / 1e3)
|
136
|
+
except TypeError:
|
137
|
+
pass
|
138
|
+
|
139
|
+
# Crate doesn't really have datetime or date types but a
|
140
|
+
# timestamp type. The "date" mapping (conversion to long)
|
141
|
+
# is only applied if the schema definition for the column exists
|
142
|
+
# and if the sql insert statement was used.
|
143
|
+
# In case of dynamic mapping or using the rest indexing endpoint
|
144
|
+
# the date will be returned in the format it was inserted.
|
145
|
+
log.warning(
|
146
|
+
"Received timestamp isn't a long value."
|
147
|
+
"Trying to parse as datetime string and then as date string"
|
148
|
+
)
|
149
|
+
try:
|
150
|
+
return datetime.strptime(value, "%Y-%m-%dT%H:%M:%S.%fZ")
|
151
|
+
except ValueError:
|
152
|
+
return datetime.strptime(value, "%Y-%m-%d")
|
153
|
+
|
154
|
+
return process
|
155
|
+
|
156
|
+
|
157
|
+
colspecs = {
|
158
|
+
sqltypes.Date: Date,
|
159
|
+
sqltypes.DateTime: DateTime,
|
160
|
+
sqltypes.TIMESTAMP: DateTime,
|
161
|
+
}
|
162
|
+
|
163
|
+
|
164
|
+
if SA_VERSION >= SA_2_0:
|
165
|
+
from .compat.core20 import CrateCompilerSA20
|
166
|
+
|
167
|
+
statement_compiler = CrateCompilerSA20
|
168
|
+
elif SA_VERSION >= SA_1_4:
|
169
|
+
from .compat.core14 import CrateCompilerSA14
|
170
|
+
|
171
|
+
statement_compiler = CrateCompilerSA14
|
172
|
+
else:
|
173
|
+
from .compat.core10 import CrateCompilerSA10
|
174
|
+
|
175
|
+
statement_compiler = CrateCompilerSA10
|
176
|
+
|
177
|
+
|
178
|
+
class CrateDialect(default.DefaultDialect):
|
179
|
+
name = "crate"
|
180
|
+
driver = "crate-python"
|
181
|
+
default_paramstyle = "qmark"
|
182
|
+
statement_compiler = statement_compiler
|
183
|
+
ddl_compiler = CrateDDLCompiler
|
184
|
+
type_compiler = CrateTypeCompiler
|
185
|
+
preparer = CrateIdentifierPreparer
|
186
|
+
use_insertmanyvalues = True
|
187
|
+
use_insertmanyvalues_wo_returning = True
|
188
|
+
supports_multivalues_insert = True
|
189
|
+
supports_native_boolean = True
|
190
|
+
supports_statement_cache = True
|
191
|
+
colspecs = colspecs
|
192
|
+
implicit_returning = True
|
193
|
+
insert_returning = True
|
194
|
+
update_returning = True
|
195
|
+
|
196
|
+
def __init__(self, **kwargs):
|
197
|
+
default.DefaultDialect.__init__(self, **kwargs)
|
198
|
+
|
199
|
+
# CrateDB does not need `OBJECT` types to be serialized as JSON.
|
200
|
+
# Corresponding data is forwarded 1:1, and will get marshalled
|
201
|
+
# by the low-level driver.
|
202
|
+
self._json_deserializer = lambda x: x
|
203
|
+
self._json_serializer = lambda x: x
|
204
|
+
|
205
|
+
# Currently, our SQL parser doesn't support unquoted column names that
|
206
|
+
# start with _. Adding it here causes sqlalchemy to quote such columns.
|
207
|
+
self.identifier_preparer.illegal_initial_characters.add("_")
|
208
|
+
|
209
|
+
def initialize(self, connection):
|
210
|
+
# get lowest server version
|
211
|
+
self.server_version_info = self._get_server_version_info(connection)
|
212
|
+
# get default schema name
|
213
|
+
self.default_schema_name = self._get_default_schema_name(connection)
|
214
|
+
|
215
|
+
def do_rollback(self, connection):
|
216
|
+
# if any exception is raised by the dbapi, sqlalchemy by default
|
217
|
+
# attempts to do a rollback crate doesn't support rollbacks.
|
218
|
+
# implementing this as noop seems to cause sqlalchemy to propagate the
|
219
|
+
# original exception to the user
|
220
|
+
pass
|
221
|
+
|
222
|
+
def connect(self, host=None, port=None, *args, **kwargs):
|
223
|
+
server = None
|
224
|
+
if host:
|
225
|
+
server = "{0}:{1}".format(host, port or "4200")
|
226
|
+
if "servers" in kwargs:
|
227
|
+
server = kwargs.pop("servers")
|
228
|
+
servers = to_list(server)
|
229
|
+
if servers:
|
230
|
+
use_ssl = asbool(kwargs.pop("ssl", False))
|
231
|
+
if use_ssl:
|
232
|
+
servers = ["https://" + server for server in servers]
|
233
|
+
return self.dbapi.connect(servers=servers, **kwargs)
|
234
|
+
return self.dbapi.connect(**kwargs)
|
235
|
+
|
236
|
+
def do_execute(self, cursor, statement, parameters, context=None):
|
237
|
+
"""
|
238
|
+
Slightly amended to store its response into the request context instance.
|
239
|
+
"""
|
240
|
+
result = cursor.execute(statement, parameters)
|
241
|
+
if context is not None:
|
242
|
+
context.last_result = result
|
243
|
+
|
244
|
+
def do_execute_no_params(self, cursor, statement, context=None):
|
245
|
+
"""
|
246
|
+
Slightly amended to store its response into the request context instance.
|
247
|
+
"""
|
248
|
+
result = cursor.execute(statement)
|
249
|
+
if context is not None:
|
250
|
+
context.last_result = result
|
251
|
+
|
252
|
+
def do_executemany(self, cursor, statement, parameters, context=None):
|
253
|
+
"""
|
254
|
+
Slightly amended to store its response into the request context instance.
|
255
|
+
"""
|
256
|
+
result = cursor.executemany(statement, parameters)
|
257
|
+
if context is not None:
|
258
|
+
context.last_result = result
|
259
|
+
|
260
|
+
def _get_default_schema_name(self, connection):
|
261
|
+
return "doc"
|
262
|
+
|
263
|
+
def _get_effective_schema_name(self, connection):
|
264
|
+
schema_name_raw = connection.engine.url.query.get("schema")
|
265
|
+
schema_name = None
|
266
|
+
if isinstance(schema_name_raw, str):
|
267
|
+
schema_name = schema_name_raw
|
268
|
+
elif isinstance(schema_name_raw, tuple):
|
269
|
+
schema_name = schema_name_raw[0]
|
270
|
+
return schema_name
|
271
|
+
|
272
|
+
def _get_server_version_info(self, connection):
|
273
|
+
return tuple(connection.connection.lowest_server_version.version)
|
274
|
+
|
275
|
+
@classmethod
|
276
|
+
def import_dbapi(cls):
|
277
|
+
from crate import client
|
278
|
+
|
279
|
+
return client
|
280
|
+
|
281
|
+
@classmethod
|
282
|
+
def dbapi(cls):
|
283
|
+
return cls.import_dbapi()
|
284
|
+
|
285
|
+
def has_schema(self, connection, schema, **kw):
|
286
|
+
return schema in self.get_schema_names(connection, **kw)
|
287
|
+
|
288
|
+
def has_table(self, connection, table_name, schema=None, **kw):
|
289
|
+
return table_name in self.get_table_names(connection, schema=schema, **kw)
|
290
|
+
|
291
|
+
@reflection.cache
|
292
|
+
def get_schema_names(self, connection, **kw):
|
293
|
+
cursor = connection.exec_driver_sql(
|
294
|
+
"select schema_name from information_schema.schemata order by schema_name asc"
|
295
|
+
)
|
296
|
+
return [row[0] for row in cursor.fetchall()]
|
297
|
+
|
298
|
+
@reflection.cache
|
299
|
+
def get_table_names(self, connection, schema=None, **kw):
|
300
|
+
if schema is None:
|
301
|
+
schema = self._get_effective_schema_name(connection)
|
302
|
+
cursor = connection.exec_driver_sql(
|
303
|
+
"SELECT table_name FROM information_schema.tables "
|
304
|
+
"WHERE {0} = ? "
|
305
|
+
"AND table_type = 'BASE TABLE' "
|
306
|
+
"ORDER BY table_name ASC, {0} ASC".format(self.schema_column),
|
307
|
+
(schema or self.default_schema_name,),
|
308
|
+
)
|
309
|
+
return [row[0] for row in cursor.fetchall()]
|
310
|
+
|
311
|
+
@reflection.cache
|
312
|
+
def get_view_names(self, connection, schema=None, **kw):
|
313
|
+
cursor = connection.exec_driver_sql(
|
314
|
+
"SELECT table_name FROM information_schema.views "
|
315
|
+
"ORDER BY table_name ASC, {0} ASC".format(self.schema_column),
|
316
|
+
(schema or self.default_schema_name,),
|
317
|
+
)
|
318
|
+
return [row[0] for row in cursor.fetchall()]
|
319
|
+
|
320
|
+
@reflection.cache
|
321
|
+
def get_columns(self, connection, table_name, schema=None, **kw):
|
322
|
+
query = (
|
323
|
+
"SELECT column_name, data_type "
|
324
|
+
"FROM information_schema.columns "
|
325
|
+
"WHERE table_name = ? AND {0} = ? "
|
326
|
+
"AND column_name !~ ?".format(self.schema_column)
|
327
|
+
)
|
328
|
+
cursor = connection.exec_driver_sql(
|
329
|
+
query,
|
330
|
+
(
|
331
|
+
table_name,
|
332
|
+
schema or self.default_schema_name,
|
333
|
+
r"(.*)\[\'(.*)\'\]",
|
334
|
+
), # regex to filter subscript
|
335
|
+
)
|
336
|
+
return [self._create_column_info(row) for row in cursor.fetchall()]
|
337
|
+
|
338
|
+
@reflection.cache
|
339
|
+
def get_pk_constraint(self, engine, table_name, schema=None, **kw):
|
340
|
+
if self.server_version_info >= (3, 0, 0):
|
341
|
+
query = """SELECT column_name
|
342
|
+
FROM information_schema.key_column_usage
|
343
|
+
WHERE table_name = ? AND table_schema = ?"""
|
344
|
+
|
345
|
+
def result_fun(result):
|
346
|
+
rows = result.fetchall()
|
347
|
+
return set(map(lambda el: el[0], rows))
|
348
|
+
|
349
|
+
elif self.server_version_info >= (2, 3, 0):
|
350
|
+
query = """SELECT column_name
|
351
|
+
FROM information_schema.key_column_usage
|
352
|
+
WHERE table_name = ? AND table_catalog = ?"""
|
353
|
+
|
354
|
+
def result_fun(result):
|
355
|
+
rows = result.fetchall()
|
356
|
+
return set(map(lambda el: el[0], rows))
|
357
|
+
|
358
|
+
else:
|
359
|
+
query = """SELECT constraint_name
|
360
|
+
FROM information_schema.table_constraints
|
361
|
+
WHERE table_name = ? AND {schema_col} = ?
|
362
|
+
AND constraint_type='PRIMARY_KEY'
|
363
|
+
""".format(schema_col=self.schema_column)
|
364
|
+
|
365
|
+
def result_fun(result):
|
366
|
+
rows = result.fetchone()
|
367
|
+
return set(rows[0] if rows else [])
|
368
|
+
|
369
|
+
pk_result = engine.exec_driver_sql(query, (table_name, schema or self.default_schema_name))
|
370
|
+
pks = result_fun(pk_result)
|
371
|
+
return {"constrained_columns": sorted(pks), "name": "PRIMARY KEY"}
|
372
|
+
|
373
|
+
@reflection.cache
|
374
|
+
def get_foreign_keys(
|
375
|
+
self, connection, table_name, schema=None, postgresql_ignore_search_path=False, **kw
|
376
|
+
):
|
377
|
+
# Crate doesn't support Foreign Keys, so this stays empty
|
378
|
+
return []
|
379
|
+
|
380
|
+
@reflection.cache
|
381
|
+
def get_indexes(self, connection, table_name, schema, **kw):
|
382
|
+
return []
|
383
|
+
|
384
|
+
@property
|
385
|
+
def schema_column(self):
|
386
|
+
return "table_schema"
|
387
|
+
|
388
|
+
def _create_column_info(self, row):
|
389
|
+
return {
|
390
|
+
"name": row[0],
|
391
|
+
"type": self._resolve_type(row[1]),
|
392
|
+
# In Crate every column is nullable except PK
|
393
|
+
# Primary Key Constraints are not nullable anyway, no matter what
|
394
|
+
# we return here, so it's fine to return always `True`
|
395
|
+
"nullable": True,
|
396
|
+
}
|
397
|
+
|
398
|
+
def _resolve_type(self, type_):
|
399
|
+
return TYPES_MAP.get(type_, sqltypes.UserDefinedType)
|
400
|
+
|
401
|
+
def has_ilike_operator(self):
|
402
|
+
"""
|
403
|
+
Only CrateDB 4.1.0 and higher implements the `ILIKE` operator.
|
404
|
+
"""
|
405
|
+
server_version_info = self.server_version_info
|
406
|
+
return server_version_info is not None and server_version_info >= (4, 1, 0)
|
407
|
+
|
408
|
+
|
409
|
+
class DateTrunc(functions.GenericFunction):
|
410
|
+
name = "date_trunc"
|
411
|
+
type = sqltypes.TIMESTAMP
|
412
|
+
|
413
|
+
|
414
|
+
dialect = CrateDialect
|
@@ -0,0 +1,96 @@
|
|
1
|
+
# -*- coding: utf-8; -*-
|
2
|
+
#
|
3
|
+
# Licensed to CRATE Technology GmbH ("Crate") under one or more contributor
|
4
|
+
# license agreements. See the NOTICE file distributed with this work for
|
5
|
+
# additional information regarding copyright ownership. Crate licenses
|
6
|
+
# this file to you under the Apache License, Version 2.0 (the "License");
|
7
|
+
# you may not use this file except in compliance with the License. You may
|
8
|
+
# obtain a copy of the License at
|
9
|
+
#
|
10
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
11
|
+
#
|
12
|
+
# Unless required by applicable law or agreed to in writing, software
|
13
|
+
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
14
|
+
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
15
|
+
# License for the specific language governing permissions and limitations
|
16
|
+
# under the License.
|
17
|
+
#
|
18
|
+
# However, if you have executed another commercial license agreement
|
19
|
+
# with Crate these terms will supersede the license and you may use the
|
20
|
+
# software solely pursuant to the terms of the relevant commercial agreement.
|
21
|
+
|
22
|
+
from sqlalchemy.ext.compiler import compiles
|
23
|
+
from sqlalchemy.sql.expression import ColumnElement, literal
|
24
|
+
|
25
|
+
|
26
|
+
class Match(ColumnElement):
|
27
|
+
inherit_cache = True
|
28
|
+
|
29
|
+
def __init__(self, column, term, match_type=None, options=None):
|
30
|
+
super(Match, self).__init__()
|
31
|
+
self.column = column
|
32
|
+
self.term = term
|
33
|
+
self.match_type = match_type
|
34
|
+
self.options = options
|
35
|
+
|
36
|
+
def compile_column(self, compiler):
|
37
|
+
if isinstance(self.column, dict):
|
38
|
+
column = ", ".join(
|
39
|
+
sorted(["{0} {1}".format(compiler.process(k), v) for k, v in self.column.items()])
|
40
|
+
)
|
41
|
+
return "({0})".format(column)
|
42
|
+
else:
|
43
|
+
return "{0}".format(compiler.process(self.column))
|
44
|
+
|
45
|
+
def compile_term(self, compiler):
|
46
|
+
return compiler.process(literal(self.term))
|
47
|
+
|
48
|
+
def compile_using(self, compiler):
|
49
|
+
if self.match_type:
|
50
|
+
using = "using {0}".format(self.match_type)
|
51
|
+
with_clause = self.with_clause()
|
52
|
+
if with_clause:
|
53
|
+
using = " ".join([using, with_clause])
|
54
|
+
return using
|
55
|
+
if self.options:
|
56
|
+
raise ValueError(
|
57
|
+
"missing match_type. "
|
58
|
+
+ "It's not allowed to specify options "
|
59
|
+
+ "without match_type"
|
60
|
+
)
|
61
|
+
return None
|
62
|
+
|
63
|
+
def with_clause(self):
|
64
|
+
if self.options:
|
65
|
+
options = ", ".join(sorted(["{0}={1}".format(k, v) for k, v in self.options.items()]))
|
66
|
+
|
67
|
+
return "with ({0})".format(options)
|
68
|
+
return None
|
69
|
+
|
70
|
+
|
71
|
+
def match(column, term, match_type=None, options=None):
|
72
|
+
"""Generates match predicate for fulltext search
|
73
|
+
|
74
|
+
:param column: A reference to a column or an index, or a subcolumn, or a
|
75
|
+
dictionary of subcolumns with boost values.
|
76
|
+
|
77
|
+
:param term: The term to match against. This string is analyzed and the
|
78
|
+
resulting tokens are compared to the index.
|
79
|
+
|
80
|
+
:param match_type (optional): The match type. Determine how the term is
|
81
|
+
applied and the score calculated.
|
82
|
+
|
83
|
+
:param options (optional): The match options. Specify match type behaviour.
|
84
|
+
(Not possible without a specified match type.) Match options must be
|
85
|
+
supplied as a dictionary.
|
86
|
+
"""
|
87
|
+
return Match(column, term, match_type, options)
|
88
|
+
|
89
|
+
|
90
|
+
@compiles(Match)
|
91
|
+
def compile_match(match, compiler, **kwargs):
|
92
|
+
func = "match(%s, %s)" % (match.compile_column(compiler), match.compile_term(compiler))
|
93
|
+
using = match.compile_using(compiler)
|
94
|
+
if using:
|
95
|
+
func = " ".join([func, using])
|
96
|
+
return func
|
@@ -0,0 +1,28 @@
|
|
1
|
+
# -*- coding: utf-8; -*-
|
2
|
+
#
|
3
|
+
# Licensed to CRATE Technology GmbH ("Crate") under one or more contributor
|
4
|
+
# license agreements. See the NOTICE file distributed with this work for
|
5
|
+
# additional information regarding copyright ownership. Crate licenses
|
6
|
+
# this file to you under the Apache License, Version 2.0 (the "License");
|
7
|
+
# you may not use this file except in compliance with the License. You may
|
8
|
+
# obtain a copy of the License at
|
9
|
+
#
|
10
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
11
|
+
#
|
12
|
+
# Unless required by applicable law or agreed to in writing, software
|
13
|
+
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
14
|
+
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
15
|
+
# License for the specific language governing permissions and limitations
|
16
|
+
# under the License.
|
17
|
+
#
|
18
|
+
# However, if you have executed another commercial license agreement
|
19
|
+
# with Crate these terms will supersede the license and you may use the
|
20
|
+
# software solely pursuant to the terms of the relevant commercial agreement.
|
21
|
+
|
22
|
+
import sqlalchemy as sa
|
23
|
+
from verlib2 import Version
|
24
|
+
|
25
|
+
SA_VERSION = Version(sa.__version__)
|
26
|
+
|
27
|
+
SA_1_4 = Version("1.4.0b1")
|
28
|
+
SA_2_0 = Version("2.0.0")
|
@@ -0,0 +1,18 @@
|
|
1
|
+
from sqlalchemy_cratedb.support.pandas import insert_bulk, table_kwargs
|
2
|
+
from sqlalchemy_cratedb.support.polyfill import (
|
3
|
+
check_uniqueness_factory,
|
4
|
+
patch_autoincrement_timestamp,
|
5
|
+
refresh_after_dml,
|
6
|
+
)
|
7
|
+
from sqlalchemy_cratedb.support.util import quote_relation_name, refresh_dirty, refresh_table
|
8
|
+
|
9
|
+
__all__ = [
|
10
|
+
check_uniqueness_factory,
|
11
|
+
insert_bulk,
|
12
|
+
patch_autoincrement_timestamp,
|
13
|
+
quote_relation_name,
|
14
|
+
refresh_after_dml,
|
15
|
+
refresh_dirty,
|
16
|
+
refresh_table,
|
17
|
+
table_kwargs,
|
18
|
+
]
|
@@ -0,0 +1,110 @@
|
|
1
|
+
# -*- coding: utf-8; -*-
|
2
|
+
#
|
3
|
+
# Licensed to CRATE Technology GmbH ("Crate") under one or more contributor
|
4
|
+
# license agreements. See the NOTICE file distributed with this work for
|
5
|
+
# additional information regarding copyright ownership. Crate licenses
|
6
|
+
# this file to you under the Apache License, Version 2.0 (the "License");
|
7
|
+
# you may not use this file except in compliance with the License. You may
|
8
|
+
# obtain a copy of the License at
|
9
|
+
#
|
10
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
11
|
+
#
|
12
|
+
# Unless required by applicable law or agreed to in writing, software
|
13
|
+
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
14
|
+
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
15
|
+
# License for the specific language governing permissions and limitations
|
16
|
+
# under the License.
|
17
|
+
#
|
18
|
+
# However, if you have executed another commercial license agreement
|
19
|
+
# with Crate these terms will supersede the license and you may use the
|
20
|
+
# software solely pursuant to the terms of the relevant commercial agreement.
|
21
|
+
import logging
|
22
|
+
from contextlib import contextmanager
|
23
|
+
from typing import Any
|
24
|
+
from unittest.mock import patch
|
25
|
+
|
26
|
+
import sqlalchemy as sa
|
27
|
+
|
28
|
+
from sqlalchemy_cratedb.sa_version import SA_2_0, SA_VERSION
|
29
|
+
|
30
|
+
logger = logging.getLogger(__name__)
|
31
|
+
|
32
|
+
|
33
|
+
def insert_bulk(pd_table, conn, keys, data_iter):
|
34
|
+
"""
|
35
|
+
Use CrateDB's "bulk operations" endpoint as a fast path for pandas' and Dask's `to_sql()` [1] method.
|
36
|
+
|
37
|
+
The idea is to break out of SQLAlchemy, compile the insert statement, and use the raw
|
38
|
+
DBAPI connection client, in order to invoke a request using `bulk_parameters` [2]::
|
39
|
+
|
40
|
+
cursor.execute(sql=sql, bulk_parameters=data)
|
41
|
+
|
42
|
+
The vanilla implementation, used by SQLAlchemy, is::
|
43
|
+
|
44
|
+
data = [dict(zip(keys, row)) for row in data_iter]
|
45
|
+
conn.execute(pd_table.table.insert(), data)
|
46
|
+
|
47
|
+
Batch chunking will happen outside of this function, for example [3] demonstrates
|
48
|
+
the relevant code in `pandas.io.sql`.
|
49
|
+
|
50
|
+
[1] https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.to_sql.html
|
51
|
+
[2] https://cratedb.com/docs/crate/reference/en/latest/interfaces/http.html#bulk-operations
|
52
|
+
[3] https://github.com/pandas-dev/pandas/blob/v2.0.1/pandas/io/sql.py#L1011-L1027
|
53
|
+
""" # noqa: E501
|
54
|
+
|
55
|
+
# Compile SQL statement and materialize batch.
|
56
|
+
sql = str(pd_table.table.insert().compile(bind=conn))
|
57
|
+
data = list(data_iter)
|
58
|
+
|
59
|
+
# For debugging and tracing the batches running through this method.
|
60
|
+
if logger.level == logging.DEBUG:
|
61
|
+
logger.debug(f"Bulk SQL: {sql}")
|
62
|
+
logger.debug(f"Bulk records: {len(data)}")
|
63
|
+
# logger.debug(f"Bulk data: {data}") # noqa: ERA001
|
64
|
+
|
65
|
+
# Invoke bulk insert operation.
|
66
|
+
cursor = conn._dbapi_connection.cursor()
|
67
|
+
cursor.execute(sql=sql, bulk_parameters=data)
|
68
|
+
cursor.close()
|
69
|
+
|
70
|
+
|
71
|
+
@contextmanager
|
72
|
+
def table_kwargs(**kwargs):
|
73
|
+
"""
|
74
|
+
Context manager for adding SQLAlchemy dialect-specific table options at runtime.
|
75
|
+
|
76
|
+
In certain cases where SQLAlchemy orchestration is implemented within a
|
77
|
+
framework, like at this spot [1] in pandas' `SQLTable._create_table_setup`,
|
78
|
+
it is not easily possible to forward SQLAlchemy dialect options at table
|
79
|
+
creation time.
|
80
|
+
|
81
|
+
In order to augment the SQL DDL statement to make it honor database-specific
|
82
|
+
dialect options, the only way to work around the unfortunate situation is by
|
83
|
+
monkey-patching the call to `sa.Table()` at runtime, relaying additional
|
84
|
+
dialect options through corresponding keyword arguments in their original
|
85
|
+
`<dialect>_<kwarg>` format [2].
|
86
|
+
|
87
|
+
[1] https://github.com/pandas-dev/pandas/blob/v2.2.2/pandas/io/sql.py#L1282-L1285
|
88
|
+
[2] https://docs.sqlalchemy.org/en/20/core/foundation.html#sqlalchemy.sql.base.DialectKWArgs.dialect_kwargs
|
89
|
+
"""
|
90
|
+
|
91
|
+
if SA_VERSION < SA_2_0:
|
92
|
+
_init_dist = sa.sql.schema.Table._init
|
93
|
+
|
94
|
+
def _init(self, name, metadata, *args, **kwargs_effective):
|
95
|
+
kwargs_effective.update(kwargs)
|
96
|
+
return _init_dist(self, name, metadata, *args, **kwargs_effective)
|
97
|
+
|
98
|
+
with patch("sqlalchemy.sql.schema.Table._init", _init):
|
99
|
+
yield
|
100
|
+
|
101
|
+
else:
|
102
|
+
new_dist = sa.sql.schema.Table._new
|
103
|
+
|
104
|
+
def _new(cls, *args: Any, **kw: Any) -> Any:
|
105
|
+
kw.update(kwargs)
|
106
|
+
table = new_dist(cls, *args, **kw)
|
107
|
+
return table
|
108
|
+
|
109
|
+
with patch("sqlalchemy.sql.schema.Table._new", _new):
|
110
|
+
yield
|