sqlalchemy-cratedb 0.41.0.dev0__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,414 @@
1
+ # -*- coding: utf-8; -*-
2
+ #
3
+ # Licensed to CRATE Technology GmbH ("Crate") under one or more contributor
4
+ # license agreements. See the NOTICE file distributed with this work for
5
+ # additional information regarding copyright ownership. Crate licenses
6
+ # this file to you under the Apache License, Version 2.0 (the "License");
7
+ # you may not use this file except in compliance with the License. You may
8
+ # obtain a copy of the License at
9
+ #
10
+ # http://www.apache.org/licenses/LICENSE-2.0
11
+ #
12
+ # Unless required by applicable law or agreed to in writing, software
13
+ # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
14
+ # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
15
+ # License for the specific language governing permissions and limitations
16
+ # under the License.
17
+ #
18
+ # However, if you have executed another commercial license agreement
19
+ # with Crate these terms will supersede the license and you may use the
20
+ # software solely pursuant to the terms of the relevant commercial agreement.
21
+
22
+ import logging
23
+ from datetime import date, datetime
24
+
25
+ from sqlalchemy import types as sqltypes
26
+ from sqlalchemy.engine import default, reflection
27
+ from sqlalchemy.sql import functions
28
+ from sqlalchemy.util import asbool, to_list
29
+
30
+ from .compiler import (
31
+ CrateDDLCompiler,
32
+ CrateIdentifierPreparer,
33
+ CrateTypeCompiler,
34
+ )
35
+ from .sa_version import SA_1_4, SA_2_0, SA_VERSION
36
+ from .type import FloatVector, ObjectArray, ObjectType
37
+
38
+ TYPES_MAP = {
39
+ "boolean": sqltypes.Boolean,
40
+ "short": sqltypes.SmallInteger,
41
+ "smallint": sqltypes.SmallInteger,
42
+ "timestamp": sqltypes.TIMESTAMP(timezone=False),
43
+ "timestamp with time zone": sqltypes.TIMESTAMP(timezone=True),
44
+ "object": ObjectType,
45
+ "integer": sqltypes.Integer,
46
+ "long": sqltypes.NUMERIC,
47
+ "bigint": sqltypes.NUMERIC,
48
+ "double": sqltypes.DECIMAL,
49
+ "double precision": sqltypes.DECIMAL,
50
+ "object_array": ObjectArray,
51
+ "float": sqltypes.Float,
52
+ "real": sqltypes.Float,
53
+ "string": sqltypes.String,
54
+ "text": sqltypes.String,
55
+ "float_vector": FloatVector,
56
+ }
57
+
58
+ # Needed for SQLAlchemy >= 1.1.
59
+ # TODO: Dissolve.
60
+ try:
61
+ from sqlalchemy.types import ARRAY
62
+
63
+ TYPES_MAP["integer_array"] = ARRAY(sqltypes.Integer)
64
+ TYPES_MAP["boolean_array"] = ARRAY(sqltypes.Boolean)
65
+ TYPES_MAP["short_array"] = ARRAY(sqltypes.SmallInteger)
66
+ TYPES_MAP["smallint_array"] = ARRAY(sqltypes.SmallInteger)
67
+ TYPES_MAP["timestamp_array"] = ARRAY(sqltypes.TIMESTAMP(timezone=False))
68
+ TYPES_MAP["timestamp with time zone_array"] = ARRAY(sqltypes.TIMESTAMP(timezone=True))
69
+ TYPES_MAP["long_array"] = ARRAY(sqltypes.NUMERIC)
70
+ TYPES_MAP["bigint_array"] = ARRAY(sqltypes.NUMERIC)
71
+ TYPES_MAP["double_array"] = ARRAY(sqltypes.DECIMAL)
72
+ TYPES_MAP["double precision_array"] = ARRAY(sqltypes.DECIMAL)
73
+ TYPES_MAP["float_array"] = ARRAY(sqltypes.Float)
74
+ TYPES_MAP["real_array"] = ARRAY(sqltypes.Float)
75
+ TYPES_MAP["string_array"] = ARRAY(sqltypes.String)
76
+ TYPES_MAP["text_array"] = ARRAY(sqltypes.String)
77
+ except Exception: # noqa: S110
78
+ pass
79
+
80
+
81
+ log = logging.getLogger(__name__)
82
+
83
+
84
+ class Date(sqltypes.Date):
85
+ def bind_processor(self, dialect):
86
+ def process(value):
87
+ if value is not None:
88
+ assert isinstance(value, date) # noqa: S101
89
+ return value.strftime("%Y-%m-%d")
90
+ return None
91
+
92
+ return process
93
+
94
+ def result_processor(self, dialect, coltype):
95
+ def process(value):
96
+ if not value:
97
+ return None
98
+ try:
99
+ return datetime.utcfromtimestamp(value / 1e3).date()
100
+ except TypeError:
101
+ pass
102
+
103
+ # Crate doesn't really have datetime or date types but a
104
+ # timestamp type. The "date" mapping (conversion to long)
105
+ # is only applied if the schema definition for the column exists
106
+ # and if the sql insert statement was used.
107
+ # In case of dynamic mapping or using the rest indexing endpoint
108
+ # the date will be returned in the format it was inserted.
109
+ log.warning(
110
+ "Received timestamp isn't a long value."
111
+ "Trying to parse as date string and then as datetime string"
112
+ )
113
+ try:
114
+ return datetime.strptime(value, "%Y-%m-%d").date()
115
+ except ValueError:
116
+ return datetime.strptime(value, "%Y-%m-%dT%H:%M:%S.%fZ").date()
117
+
118
+ return process
119
+
120
+
121
+ class DateTime(sqltypes.DateTime):
122
+ def bind_processor(self, dialect):
123
+ def process(value):
124
+ if isinstance(value, (datetime, date)):
125
+ return value.strftime("%Y-%m-%dT%H:%M:%S.%f%z")
126
+ return value
127
+
128
+ return process
129
+
130
+ def result_processor(self, dialect, coltype):
131
+ def process(value):
132
+ if not value:
133
+ return None
134
+ try:
135
+ return datetime.utcfromtimestamp(value / 1e3)
136
+ except TypeError:
137
+ pass
138
+
139
+ # Crate doesn't really have datetime or date types but a
140
+ # timestamp type. The "date" mapping (conversion to long)
141
+ # is only applied if the schema definition for the column exists
142
+ # and if the sql insert statement was used.
143
+ # In case of dynamic mapping or using the rest indexing endpoint
144
+ # the date will be returned in the format it was inserted.
145
+ log.warning(
146
+ "Received timestamp isn't a long value."
147
+ "Trying to parse as datetime string and then as date string"
148
+ )
149
+ try:
150
+ return datetime.strptime(value, "%Y-%m-%dT%H:%M:%S.%fZ")
151
+ except ValueError:
152
+ return datetime.strptime(value, "%Y-%m-%d")
153
+
154
+ return process
155
+
156
+
157
+ colspecs = {
158
+ sqltypes.Date: Date,
159
+ sqltypes.DateTime: DateTime,
160
+ sqltypes.TIMESTAMP: DateTime,
161
+ }
162
+
163
+
164
+ if SA_VERSION >= SA_2_0:
165
+ from .compat.core20 import CrateCompilerSA20
166
+
167
+ statement_compiler = CrateCompilerSA20
168
+ elif SA_VERSION >= SA_1_4:
169
+ from .compat.core14 import CrateCompilerSA14
170
+
171
+ statement_compiler = CrateCompilerSA14
172
+ else:
173
+ from .compat.core10 import CrateCompilerSA10
174
+
175
+ statement_compiler = CrateCompilerSA10
176
+
177
+
178
+ class CrateDialect(default.DefaultDialect):
179
+ name = "crate"
180
+ driver = "crate-python"
181
+ default_paramstyle = "qmark"
182
+ statement_compiler = statement_compiler
183
+ ddl_compiler = CrateDDLCompiler
184
+ type_compiler = CrateTypeCompiler
185
+ preparer = CrateIdentifierPreparer
186
+ use_insertmanyvalues = True
187
+ use_insertmanyvalues_wo_returning = True
188
+ supports_multivalues_insert = True
189
+ supports_native_boolean = True
190
+ supports_statement_cache = True
191
+ colspecs = colspecs
192
+ implicit_returning = True
193
+ insert_returning = True
194
+ update_returning = True
195
+
196
+ def __init__(self, **kwargs):
197
+ default.DefaultDialect.__init__(self, **kwargs)
198
+
199
+ # CrateDB does not need `OBJECT` types to be serialized as JSON.
200
+ # Corresponding data is forwarded 1:1, and will get marshalled
201
+ # by the low-level driver.
202
+ self._json_deserializer = lambda x: x
203
+ self._json_serializer = lambda x: x
204
+
205
+ # Currently, our SQL parser doesn't support unquoted column names that
206
+ # start with _. Adding it here causes sqlalchemy to quote such columns.
207
+ self.identifier_preparer.illegal_initial_characters.add("_")
208
+
209
+ def initialize(self, connection):
210
+ # get lowest server version
211
+ self.server_version_info = self._get_server_version_info(connection)
212
+ # get default schema name
213
+ self.default_schema_name = self._get_default_schema_name(connection)
214
+
215
+ def do_rollback(self, connection):
216
+ # if any exception is raised by the dbapi, sqlalchemy by default
217
+ # attempts to do a rollback crate doesn't support rollbacks.
218
+ # implementing this as noop seems to cause sqlalchemy to propagate the
219
+ # original exception to the user
220
+ pass
221
+
222
+ def connect(self, host=None, port=None, *args, **kwargs):
223
+ server = None
224
+ if host:
225
+ server = "{0}:{1}".format(host, port or "4200")
226
+ if "servers" in kwargs:
227
+ server = kwargs.pop("servers")
228
+ servers = to_list(server)
229
+ if servers:
230
+ use_ssl = asbool(kwargs.pop("ssl", False))
231
+ if use_ssl:
232
+ servers = ["https://" + server for server in servers]
233
+ return self.dbapi.connect(servers=servers, **kwargs)
234
+ return self.dbapi.connect(**kwargs)
235
+
236
+ def do_execute(self, cursor, statement, parameters, context=None):
237
+ """
238
+ Slightly amended to store its response into the request context instance.
239
+ """
240
+ result = cursor.execute(statement, parameters)
241
+ if context is not None:
242
+ context.last_result = result
243
+
244
+ def do_execute_no_params(self, cursor, statement, context=None):
245
+ """
246
+ Slightly amended to store its response into the request context instance.
247
+ """
248
+ result = cursor.execute(statement)
249
+ if context is not None:
250
+ context.last_result = result
251
+
252
+ def do_executemany(self, cursor, statement, parameters, context=None):
253
+ """
254
+ Slightly amended to store its response into the request context instance.
255
+ """
256
+ result = cursor.executemany(statement, parameters)
257
+ if context is not None:
258
+ context.last_result = result
259
+
260
+ def _get_default_schema_name(self, connection):
261
+ return "doc"
262
+
263
+ def _get_effective_schema_name(self, connection):
264
+ schema_name_raw = connection.engine.url.query.get("schema")
265
+ schema_name = None
266
+ if isinstance(schema_name_raw, str):
267
+ schema_name = schema_name_raw
268
+ elif isinstance(schema_name_raw, tuple):
269
+ schema_name = schema_name_raw[0]
270
+ return schema_name
271
+
272
+ def _get_server_version_info(self, connection):
273
+ return tuple(connection.connection.lowest_server_version.version)
274
+
275
+ @classmethod
276
+ def import_dbapi(cls):
277
+ from crate import client
278
+
279
+ return client
280
+
281
+ @classmethod
282
+ def dbapi(cls):
283
+ return cls.import_dbapi()
284
+
285
+ def has_schema(self, connection, schema, **kw):
286
+ return schema in self.get_schema_names(connection, **kw)
287
+
288
+ def has_table(self, connection, table_name, schema=None, **kw):
289
+ return table_name in self.get_table_names(connection, schema=schema, **kw)
290
+
291
+ @reflection.cache
292
+ def get_schema_names(self, connection, **kw):
293
+ cursor = connection.exec_driver_sql(
294
+ "select schema_name from information_schema.schemata order by schema_name asc"
295
+ )
296
+ return [row[0] for row in cursor.fetchall()]
297
+
298
+ @reflection.cache
299
+ def get_table_names(self, connection, schema=None, **kw):
300
+ if schema is None:
301
+ schema = self._get_effective_schema_name(connection)
302
+ cursor = connection.exec_driver_sql(
303
+ "SELECT table_name FROM information_schema.tables "
304
+ "WHERE {0} = ? "
305
+ "AND table_type = 'BASE TABLE' "
306
+ "ORDER BY table_name ASC, {0} ASC".format(self.schema_column),
307
+ (schema or self.default_schema_name,),
308
+ )
309
+ return [row[0] for row in cursor.fetchall()]
310
+
311
+ @reflection.cache
312
+ def get_view_names(self, connection, schema=None, **kw):
313
+ cursor = connection.exec_driver_sql(
314
+ "SELECT table_name FROM information_schema.views "
315
+ "ORDER BY table_name ASC, {0} ASC".format(self.schema_column),
316
+ (schema or self.default_schema_name,),
317
+ )
318
+ return [row[0] for row in cursor.fetchall()]
319
+
320
+ @reflection.cache
321
+ def get_columns(self, connection, table_name, schema=None, **kw):
322
+ query = (
323
+ "SELECT column_name, data_type "
324
+ "FROM information_schema.columns "
325
+ "WHERE table_name = ? AND {0} = ? "
326
+ "AND column_name !~ ?".format(self.schema_column)
327
+ )
328
+ cursor = connection.exec_driver_sql(
329
+ query,
330
+ (
331
+ table_name,
332
+ schema or self.default_schema_name,
333
+ r"(.*)\[\'(.*)\'\]",
334
+ ), # regex to filter subscript
335
+ )
336
+ return [self._create_column_info(row) for row in cursor.fetchall()]
337
+
338
+ @reflection.cache
339
+ def get_pk_constraint(self, engine, table_name, schema=None, **kw):
340
+ if self.server_version_info >= (3, 0, 0):
341
+ query = """SELECT column_name
342
+ FROM information_schema.key_column_usage
343
+ WHERE table_name = ? AND table_schema = ?"""
344
+
345
+ def result_fun(result):
346
+ rows = result.fetchall()
347
+ return set(map(lambda el: el[0], rows))
348
+
349
+ elif self.server_version_info >= (2, 3, 0):
350
+ query = """SELECT column_name
351
+ FROM information_schema.key_column_usage
352
+ WHERE table_name = ? AND table_catalog = ?"""
353
+
354
+ def result_fun(result):
355
+ rows = result.fetchall()
356
+ return set(map(lambda el: el[0], rows))
357
+
358
+ else:
359
+ query = """SELECT constraint_name
360
+ FROM information_schema.table_constraints
361
+ WHERE table_name = ? AND {schema_col} = ?
362
+ AND constraint_type='PRIMARY_KEY'
363
+ """.format(schema_col=self.schema_column)
364
+
365
+ def result_fun(result):
366
+ rows = result.fetchone()
367
+ return set(rows[0] if rows else [])
368
+
369
+ pk_result = engine.exec_driver_sql(query, (table_name, schema or self.default_schema_name))
370
+ pks = result_fun(pk_result)
371
+ return {"constrained_columns": sorted(pks), "name": "PRIMARY KEY"}
372
+
373
+ @reflection.cache
374
+ def get_foreign_keys(
375
+ self, connection, table_name, schema=None, postgresql_ignore_search_path=False, **kw
376
+ ):
377
+ # Crate doesn't support Foreign Keys, so this stays empty
378
+ return []
379
+
380
+ @reflection.cache
381
+ def get_indexes(self, connection, table_name, schema, **kw):
382
+ return []
383
+
384
+ @property
385
+ def schema_column(self):
386
+ return "table_schema"
387
+
388
+ def _create_column_info(self, row):
389
+ return {
390
+ "name": row[0],
391
+ "type": self._resolve_type(row[1]),
392
+ # In Crate every column is nullable except PK
393
+ # Primary Key Constraints are not nullable anyway, no matter what
394
+ # we return here, so it's fine to return always `True`
395
+ "nullable": True,
396
+ }
397
+
398
+ def _resolve_type(self, type_):
399
+ return TYPES_MAP.get(type_, sqltypes.UserDefinedType)
400
+
401
+ def has_ilike_operator(self):
402
+ """
403
+ Only CrateDB 4.1.0 and higher implements the `ILIKE` operator.
404
+ """
405
+ server_version_info = self.server_version_info
406
+ return server_version_info is not None and server_version_info >= (4, 1, 0)
407
+
408
+
409
+ class DateTrunc(functions.GenericFunction):
410
+ name = "date_trunc"
411
+ type = sqltypes.TIMESTAMP
412
+
413
+
414
+ dialect = CrateDialect
@@ -0,0 +1,96 @@
1
+ # -*- coding: utf-8; -*-
2
+ #
3
+ # Licensed to CRATE Technology GmbH ("Crate") under one or more contributor
4
+ # license agreements. See the NOTICE file distributed with this work for
5
+ # additional information regarding copyright ownership. Crate licenses
6
+ # this file to you under the Apache License, Version 2.0 (the "License");
7
+ # you may not use this file except in compliance with the License. You may
8
+ # obtain a copy of the License at
9
+ #
10
+ # http://www.apache.org/licenses/LICENSE-2.0
11
+ #
12
+ # Unless required by applicable law or agreed to in writing, software
13
+ # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
14
+ # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
15
+ # License for the specific language governing permissions and limitations
16
+ # under the License.
17
+ #
18
+ # However, if you have executed another commercial license agreement
19
+ # with Crate these terms will supersede the license and you may use the
20
+ # software solely pursuant to the terms of the relevant commercial agreement.
21
+
22
+ from sqlalchemy.ext.compiler import compiles
23
+ from sqlalchemy.sql.expression import ColumnElement, literal
24
+
25
+
26
+ class Match(ColumnElement):
27
+ inherit_cache = True
28
+
29
+ def __init__(self, column, term, match_type=None, options=None):
30
+ super(Match, self).__init__()
31
+ self.column = column
32
+ self.term = term
33
+ self.match_type = match_type
34
+ self.options = options
35
+
36
+ def compile_column(self, compiler):
37
+ if isinstance(self.column, dict):
38
+ column = ", ".join(
39
+ sorted(["{0} {1}".format(compiler.process(k), v) for k, v in self.column.items()])
40
+ )
41
+ return "({0})".format(column)
42
+ else:
43
+ return "{0}".format(compiler.process(self.column))
44
+
45
+ def compile_term(self, compiler):
46
+ return compiler.process(literal(self.term))
47
+
48
+ def compile_using(self, compiler):
49
+ if self.match_type:
50
+ using = "using {0}".format(self.match_type)
51
+ with_clause = self.with_clause()
52
+ if with_clause:
53
+ using = " ".join([using, with_clause])
54
+ return using
55
+ if self.options:
56
+ raise ValueError(
57
+ "missing match_type. "
58
+ + "It's not allowed to specify options "
59
+ + "without match_type"
60
+ )
61
+ return None
62
+
63
+ def with_clause(self):
64
+ if self.options:
65
+ options = ", ".join(sorted(["{0}={1}".format(k, v) for k, v in self.options.items()]))
66
+
67
+ return "with ({0})".format(options)
68
+ return None
69
+
70
+
71
+ def match(column, term, match_type=None, options=None):
72
+ """Generates match predicate for fulltext search
73
+
74
+ :param column: A reference to a column or an index, or a subcolumn, or a
75
+ dictionary of subcolumns with boost values.
76
+
77
+ :param term: The term to match against. This string is analyzed and the
78
+ resulting tokens are compared to the index.
79
+
80
+ :param match_type (optional): The match type. Determine how the term is
81
+ applied and the score calculated.
82
+
83
+ :param options (optional): The match options. Specify match type behaviour.
84
+ (Not possible without a specified match type.) Match options must be
85
+ supplied as a dictionary.
86
+ """
87
+ return Match(column, term, match_type, options)
88
+
89
+
90
+ @compiles(Match)
91
+ def compile_match(match, compiler, **kwargs):
92
+ func = "match(%s, %s)" % (match.compile_column(compiler), match.compile_term(compiler))
93
+ using = match.compile_using(compiler)
94
+ if using:
95
+ func = " ".join([func, using])
96
+ return func
@@ -0,0 +1,28 @@
1
+ # -*- coding: utf-8; -*-
2
+ #
3
+ # Licensed to CRATE Technology GmbH ("Crate") under one or more contributor
4
+ # license agreements. See the NOTICE file distributed with this work for
5
+ # additional information regarding copyright ownership. Crate licenses
6
+ # this file to you under the Apache License, Version 2.0 (the "License");
7
+ # you may not use this file except in compliance with the License. You may
8
+ # obtain a copy of the License at
9
+ #
10
+ # http://www.apache.org/licenses/LICENSE-2.0
11
+ #
12
+ # Unless required by applicable law or agreed to in writing, software
13
+ # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
14
+ # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
15
+ # License for the specific language governing permissions and limitations
16
+ # under the License.
17
+ #
18
+ # However, if you have executed another commercial license agreement
19
+ # with Crate these terms will supersede the license and you may use the
20
+ # software solely pursuant to the terms of the relevant commercial agreement.
21
+
22
+ import sqlalchemy as sa
23
+ from verlib2 import Version
24
+
25
+ SA_VERSION = Version(sa.__version__)
26
+
27
+ SA_1_4 = Version("1.4.0b1")
28
+ SA_2_0 = Version("2.0.0")
@@ -0,0 +1,18 @@
1
+ from sqlalchemy_cratedb.support.pandas import insert_bulk, table_kwargs
2
+ from sqlalchemy_cratedb.support.polyfill import (
3
+ check_uniqueness_factory,
4
+ patch_autoincrement_timestamp,
5
+ refresh_after_dml,
6
+ )
7
+ from sqlalchemy_cratedb.support.util import quote_relation_name, refresh_dirty, refresh_table
8
+
9
+ __all__ = [
10
+ check_uniqueness_factory,
11
+ insert_bulk,
12
+ patch_autoincrement_timestamp,
13
+ quote_relation_name,
14
+ refresh_after_dml,
15
+ refresh_dirty,
16
+ refresh_table,
17
+ table_kwargs,
18
+ ]
@@ -0,0 +1,110 @@
1
+ # -*- coding: utf-8; -*-
2
+ #
3
+ # Licensed to CRATE Technology GmbH ("Crate") under one or more contributor
4
+ # license agreements. See the NOTICE file distributed with this work for
5
+ # additional information regarding copyright ownership. Crate licenses
6
+ # this file to you under the Apache License, Version 2.0 (the "License");
7
+ # you may not use this file except in compliance with the License. You may
8
+ # obtain a copy of the License at
9
+ #
10
+ # http://www.apache.org/licenses/LICENSE-2.0
11
+ #
12
+ # Unless required by applicable law or agreed to in writing, software
13
+ # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
14
+ # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
15
+ # License for the specific language governing permissions and limitations
16
+ # under the License.
17
+ #
18
+ # However, if you have executed another commercial license agreement
19
+ # with Crate these terms will supersede the license and you may use the
20
+ # software solely pursuant to the terms of the relevant commercial agreement.
21
+ import logging
22
+ from contextlib import contextmanager
23
+ from typing import Any
24
+ from unittest.mock import patch
25
+
26
+ import sqlalchemy as sa
27
+
28
+ from sqlalchemy_cratedb.sa_version import SA_2_0, SA_VERSION
29
+
30
+ logger = logging.getLogger(__name__)
31
+
32
+
33
+ def insert_bulk(pd_table, conn, keys, data_iter):
34
+ """
35
+ Use CrateDB's "bulk operations" endpoint as a fast path for pandas' and Dask's `to_sql()` [1] method.
36
+
37
+ The idea is to break out of SQLAlchemy, compile the insert statement, and use the raw
38
+ DBAPI connection client, in order to invoke a request using `bulk_parameters` [2]::
39
+
40
+ cursor.execute(sql=sql, bulk_parameters=data)
41
+
42
+ The vanilla implementation, used by SQLAlchemy, is::
43
+
44
+ data = [dict(zip(keys, row)) for row in data_iter]
45
+ conn.execute(pd_table.table.insert(), data)
46
+
47
+ Batch chunking will happen outside of this function, for example [3] demonstrates
48
+ the relevant code in `pandas.io.sql`.
49
+
50
+ [1] https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.to_sql.html
51
+ [2] https://cratedb.com/docs/crate/reference/en/latest/interfaces/http.html#bulk-operations
52
+ [3] https://github.com/pandas-dev/pandas/blob/v2.0.1/pandas/io/sql.py#L1011-L1027
53
+ """ # noqa: E501
54
+
55
+ # Compile SQL statement and materialize batch.
56
+ sql = str(pd_table.table.insert().compile(bind=conn))
57
+ data = list(data_iter)
58
+
59
+ # For debugging and tracing the batches running through this method.
60
+ if logger.level == logging.DEBUG:
61
+ logger.debug(f"Bulk SQL: {sql}")
62
+ logger.debug(f"Bulk records: {len(data)}")
63
+ # logger.debug(f"Bulk data: {data}") # noqa: ERA001
64
+
65
+ # Invoke bulk insert operation.
66
+ cursor = conn._dbapi_connection.cursor()
67
+ cursor.execute(sql=sql, bulk_parameters=data)
68
+ cursor.close()
69
+
70
+
71
+ @contextmanager
72
+ def table_kwargs(**kwargs):
73
+ """
74
+ Context manager for adding SQLAlchemy dialect-specific table options at runtime.
75
+
76
+ In certain cases where SQLAlchemy orchestration is implemented within a
77
+ framework, like at this spot [1] in pandas' `SQLTable._create_table_setup`,
78
+ it is not easily possible to forward SQLAlchemy dialect options at table
79
+ creation time.
80
+
81
+ In order to augment the SQL DDL statement to make it honor database-specific
82
+ dialect options, the only way to work around the unfortunate situation is by
83
+ monkey-patching the call to `sa.Table()` at runtime, relaying additional
84
+ dialect options through corresponding keyword arguments in their original
85
+ `<dialect>_<kwarg>` format [2].
86
+
87
+ [1] https://github.com/pandas-dev/pandas/blob/v2.2.2/pandas/io/sql.py#L1282-L1285
88
+ [2] https://docs.sqlalchemy.org/en/20/core/foundation.html#sqlalchemy.sql.base.DialectKWArgs.dialect_kwargs
89
+ """
90
+
91
+ if SA_VERSION < SA_2_0:
92
+ _init_dist = sa.sql.schema.Table._init
93
+
94
+ def _init(self, name, metadata, *args, **kwargs_effective):
95
+ kwargs_effective.update(kwargs)
96
+ return _init_dist(self, name, metadata, *args, **kwargs_effective)
97
+
98
+ with patch("sqlalchemy.sql.schema.Table._init", _init):
99
+ yield
100
+
101
+ else:
102
+ new_dist = sa.sql.schema.Table._new
103
+
104
+ def _new(cls, *args: Any, **kw: Any) -> Any:
105
+ kw.update(kwargs)
106
+ table = new_dist(cls, *args, **kw)
107
+ return table
108
+
109
+ with patch("sqlalchemy.sql.schema.Table._new", _new):
110
+ yield