sqlglot 27.29.0__py3-none-any.whl → 28.4.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sqlglot/__main__.py +6 -4
- sqlglot/_version.py +2 -2
- sqlglot/dialects/bigquery.py +116 -295
- sqlglot/dialects/clickhouse.py +67 -2
- sqlglot/dialects/databricks.py +38 -1
- sqlglot/dialects/dialect.py +327 -286
- sqlglot/dialects/dremio.py +4 -1
- sqlglot/dialects/duckdb.py +718 -22
- sqlglot/dialects/exasol.py +243 -10
- sqlglot/dialects/hive.py +8 -8
- sqlglot/dialects/mysql.py +11 -2
- sqlglot/dialects/oracle.py +29 -0
- sqlglot/dialects/postgres.py +46 -24
- sqlglot/dialects/presto.py +47 -16
- sqlglot/dialects/redshift.py +16 -0
- sqlglot/dialects/risingwave.py +3 -0
- sqlglot/dialects/singlestore.py +12 -3
- sqlglot/dialects/snowflake.py +199 -271
- sqlglot/dialects/spark.py +2 -2
- sqlglot/dialects/spark2.py +11 -48
- sqlglot/dialects/sqlite.py +9 -0
- sqlglot/dialects/teradata.py +5 -8
- sqlglot/dialects/trino.py +6 -0
- sqlglot/dialects/tsql.py +61 -25
- sqlglot/diff.py +4 -2
- sqlglot/errors.py +69 -0
- sqlglot/expressions.py +484 -84
- sqlglot/generator.py +143 -41
- sqlglot/helper.py +2 -2
- sqlglot/optimizer/annotate_types.py +247 -140
- sqlglot/optimizer/canonicalize.py +6 -1
- sqlglot/optimizer/eliminate_joins.py +1 -1
- sqlglot/optimizer/eliminate_subqueries.py +2 -2
- sqlglot/optimizer/merge_subqueries.py +5 -5
- sqlglot/optimizer/normalize.py +20 -13
- sqlglot/optimizer/normalize_identifiers.py +17 -3
- sqlglot/optimizer/optimizer.py +4 -0
- sqlglot/optimizer/pushdown_predicates.py +1 -1
- sqlglot/optimizer/qualify.py +14 -6
- sqlglot/optimizer/qualify_columns.py +113 -352
- sqlglot/optimizer/qualify_tables.py +112 -70
- sqlglot/optimizer/resolver.py +374 -0
- sqlglot/optimizer/scope.py +27 -16
- sqlglot/optimizer/simplify.py +1074 -964
- sqlglot/optimizer/unnest_subqueries.py +12 -2
- sqlglot/parser.py +276 -160
- sqlglot/planner.py +2 -2
- sqlglot/schema.py +15 -4
- sqlglot/tokens.py +42 -7
- sqlglot/transforms.py +77 -22
- sqlglot/typing/__init__.py +316 -0
- sqlglot/typing/bigquery.py +376 -0
- sqlglot/typing/hive.py +12 -0
- sqlglot/typing/presto.py +24 -0
- sqlglot/typing/snowflake.py +505 -0
- sqlglot/typing/spark2.py +58 -0
- sqlglot/typing/tsql.py +9 -0
- {sqlglot-27.29.0.dist-info → sqlglot-28.4.1.dist-info}/METADATA +2 -2
- sqlglot-28.4.1.dist-info/RECORD +92 -0
- sqlglot-27.29.0.dist-info/RECORD +0 -84
- {sqlglot-27.29.0.dist-info → sqlglot-28.4.1.dist-info}/WHEEL +0 -0
- {sqlglot-27.29.0.dist-info → sqlglot-28.4.1.dist-info}/licenses/LICENSE +0 -0
- {sqlglot-27.29.0.dist-info → sqlglot-28.4.1.dist-info}/top_level.txt +0 -0
sqlglot/__main__.py
CHANGED
|
@@ -5,6 +5,7 @@ import sys
|
|
|
5
5
|
import typing as t
|
|
6
6
|
|
|
7
7
|
import sqlglot
|
|
8
|
+
from sqlglot.helper import to_bool
|
|
8
9
|
|
|
9
10
|
parser = argparse.ArgumentParser(description="Transpile SQL")
|
|
10
11
|
parser.add_argument(
|
|
@@ -28,10 +29,11 @@ parser.add_argument(
|
|
|
28
29
|
help="Dialect to write default is generic",
|
|
29
30
|
)
|
|
30
31
|
parser.add_argument(
|
|
31
|
-
"--
|
|
32
|
+
"--identify",
|
|
32
33
|
dest="identify",
|
|
33
|
-
|
|
34
|
-
|
|
34
|
+
type=str,
|
|
35
|
+
default="safe",
|
|
36
|
+
help="Whether to quote identifiers (safe, true, false)",
|
|
35
37
|
)
|
|
36
38
|
parser.add_argument(
|
|
37
39
|
"--no-pretty",
|
|
@@ -87,7 +89,7 @@ else:
|
|
|
87
89
|
sql,
|
|
88
90
|
read=args.read,
|
|
89
91
|
write=args.write,
|
|
90
|
-
identify=args.identify,
|
|
92
|
+
identify="safe" if args.identify == "safe" else to_bool(args.identify),
|
|
91
93
|
pretty=args.pretty,
|
|
92
94
|
error_level=error_level,
|
|
93
95
|
)
|
sqlglot/_version.py
CHANGED
|
@@ -28,7 +28,7 @@ version_tuple: VERSION_TUPLE
|
|
|
28
28
|
commit_id: COMMIT_ID
|
|
29
29
|
__commit_id__: COMMIT_ID
|
|
30
30
|
|
|
31
|
-
__version__ = version = '
|
|
32
|
-
__version_tuple__ = version_tuple = (
|
|
31
|
+
__version__ = version = '28.4.1'
|
|
32
|
+
__version_tuple__ = version_tuple = (28, 4, 1)
|
|
33
33
|
|
|
34
34
|
__commit_id__ = commit_id = None
|
sqlglot/dialects/bigquery.py
CHANGED
|
@@ -12,7 +12,6 @@ from sqlglot._typing import E
|
|
|
12
12
|
from sqlglot.dialects.dialect import (
|
|
13
13
|
Dialect,
|
|
14
14
|
NormalizationStrategy,
|
|
15
|
-
annotate_with_type_lambda,
|
|
16
15
|
arg_max_or_min_no_count,
|
|
17
16
|
binary_from_function,
|
|
18
17
|
date_add_interval_sql,
|
|
@@ -33,10 +32,12 @@ from sqlglot.dialects.dialect import (
|
|
|
33
32
|
unit_to_var,
|
|
34
33
|
strposition_sql,
|
|
35
34
|
groupconcat_sql,
|
|
35
|
+
sha2_digest_sql,
|
|
36
36
|
)
|
|
37
|
+
from sqlglot.generator import unsupported_args
|
|
37
38
|
from sqlglot.helper import seq_get, split_num_words
|
|
38
39
|
from sqlglot.tokens import TokenType
|
|
39
|
-
from sqlglot.
|
|
40
|
+
from sqlglot.typing.bigquery import EXPRESSION_METADATA
|
|
40
41
|
|
|
41
42
|
if t.TYPE_CHECKING:
|
|
42
43
|
from sqlglot._typing import Lit
|
|
@@ -241,10 +242,28 @@ def _build_datetime(args: t.List) -> exp.Func:
|
|
|
241
242
|
return exp.TimestampFromParts.from_arg_list(args)
|
|
242
243
|
|
|
243
244
|
|
|
245
|
+
def build_date_diff(args: t.List) -> exp.Expression:
|
|
246
|
+
expr = exp.DateDiff(
|
|
247
|
+
this=seq_get(args, 0),
|
|
248
|
+
expression=seq_get(args, 1),
|
|
249
|
+
unit=seq_get(args, 2),
|
|
250
|
+
date_part_boundary=True,
|
|
251
|
+
)
|
|
252
|
+
|
|
253
|
+
# Normalize plain WEEK to WEEK(SUNDAY) to preserve the semantic in the AST to facilitate transpilation
|
|
254
|
+
# This is done post exp.DateDiff construction since the TimeUnit mixin performs canonicalizations in its constructor too
|
|
255
|
+
unit = expr.args.get("unit")
|
|
256
|
+
|
|
257
|
+
if isinstance(unit, exp.Var) and unit.name.upper() == "WEEK":
|
|
258
|
+
expr.set("unit", exp.WeekStart(this=exp.var("SUNDAY")))
|
|
259
|
+
|
|
260
|
+
return expr
|
|
261
|
+
|
|
262
|
+
|
|
244
263
|
def _build_regexp_extract(
|
|
245
264
|
expr_type: t.Type[E], default_group: t.Optional[exp.Expression] = None
|
|
246
|
-
) -> t.Callable[[t.List], E]:
|
|
247
|
-
def _builder(args: t.List) -> E:
|
|
265
|
+
) -> t.Callable[[t.List, BigQuery], E]:
|
|
266
|
+
def _builder(args: t.List, dialect: BigQuery) -> E:
|
|
248
267
|
try:
|
|
249
268
|
group = re.compile(args[1].name).groups == 1
|
|
250
269
|
except re.error:
|
|
@@ -257,6 +276,11 @@ def _build_regexp_extract(
|
|
|
257
276
|
position=seq_get(args, 2),
|
|
258
277
|
occurrence=seq_get(args, 3),
|
|
259
278
|
group=exp.Literal.number(1) if group else default_group,
|
|
279
|
+
**(
|
|
280
|
+
{"null_if_pos_overflow": dialect.REGEXP_EXTRACT_POSITION_OVERFLOW_RETURNS_NULL}
|
|
281
|
+
if expr_type is exp.RegexpExtract
|
|
282
|
+
else {}
|
|
283
|
+
),
|
|
260
284
|
)
|
|
261
285
|
|
|
262
286
|
return _builder
|
|
@@ -290,59 +314,6 @@ def _str_to_datetime_sql(
|
|
|
290
314
|
return self.func(f"PARSE_{dtype}", fmt, this, expression.args.get("zone"))
|
|
291
315
|
|
|
292
316
|
|
|
293
|
-
def _annotate_math_functions(self: TypeAnnotator, expression: E) -> E:
|
|
294
|
-
"""
|
|
295
|
-
Many BigQuery math functions such as CEIL, FLOOR etc follow this return type convention:
|
|
296
|
-
+---------+---------+---------+------------+---------+
|
|
297
|
-
| INPUT | INT64 | NUMERIC | BIGNUMERIC | FLOAT64 |
|
|
298
|
-
+---------+---------+---------+------------+---------+
|
|
299
|
-
| OUTPUT | FLOAT64 | NUMERIC | BIGNUMERIC | FLOAT64 |
|
|
300
|
-
+---------+---------+---------+------------+---------+
|
|
301
|
-
"""
|
|
302
|
-
self._annotate_args(expression)
|
|
303
|
-
|
|
304
|
-
this: exp.Expression = expression.this
|
|
305
|
-
|
|
306
|
-
self._set_type(
|
|
307
|
-
expression,
|
|
308
|
-
exp.DataType.Type.DOUBLE if this.is_type(*exp.DataType.INTEGER_TYPES) else this.type,
|
|
309
|
-
)
|
|
310
|
-
return expression
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
def _annotate_by_args_with_coerce(self: TypeAnnotator, expression: E) -> E:
|
|
314
|
-
"""
|
|
315
|
-
+------------+------------+------------+-------------+---------+
|
|
316
|
-
| INPUT | INT64 | NUMERIC | BIGNUMERIC | FLOAT64 |
|
|
317
|
-
+------------+------------+------------+-------------+---------+
|
|
318
|
-
| INT64 | INT64 | NUMERIC | BIGNUMERIC | FLOAT64 |
|
|
319
|
-
| NUMERIC | NUMERIC | NUMERIC | BIGNUMERIC | FLOAT64 |
|
|
320
|
-
| BIGNUMERIC | BIGNUMERIC | BIGNUMERIC | BIGNUMERIC | FLOAT64 |
|
|
321
|
-
| FLOAT64 | FLOAT64 | FLOAT64 | FLOAT64 | FLOAT64 |
|
|
322
|
-
+------------+------------+------------+-------------+---------+
|
|
323
|
-
"""
|
|
324
|
-
self._annotate_args(expression)
|
|
325
|
-
|
|
326
|
-
self._set_type(expression, self._maybe_coerce(expression.this.type, expression.expression.type))
|
|
327
|
-
return expression
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
def _annotate_by_args_approx_top(self: TypeAnnotator, expression: exp.ApproxTopK) -> exp.ApproxTopK:
|
|
331
|
-
self._annotate_args(expression)
|
|
332
|
-
|
|
333
|
-
struct_type = exp.DataType(
|
|
334
|
-
this=exp.DataType.Type.STRUCT,
|
|
335
|
-
expressions=[expression.this.type, exp.DataType(this=exp.DataType.Type.BIGINT)],
|
|
336
|
-
nested=True,
|
|
337
|
-
)
|
|
338
|
-
self._set_type(
|
|
339
|
-
expression,
|
|
340
|
-
exp.DataType(this=exp.DataType.Type.ARRAY, expressions=[struct_type], nested=True),
|
|
341
|
-
)
|
|
342
|
-
|
|
343
|
-
return expression
|
|
344
|
-
|
|
345
|
-
|
|
346
317
|
@unsupported_args("ins_cost", "del_cost", "sub_cost")
|
|
347
318
|
def _levenshtein_sql(self: BigQuery.Generator, expression: exp.Levenshtein) -> str:
|
|
348
319
|
max_dist = expression.args.get("max_dist")
|
|
@@ -363,11 +334,11 @@ def _build_levenshtein(args: t.List) -> exp.Levenshtein:
|
|
|
363
334
|
|
|
364
335
|
def _build_format_time(expr_type: t.Type[exp.Expression]) -> t.Callable[[t.List], exp.TimeToStr]:
|
|
365
336
|
def _builder(args: t.List) -> exp.TimeToStr:
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
format=seq_get(args, 0),
|
|
369
|
-
zone=seq_get(args, 2),
|
|
337
|
+
formatted_time = build_formatted_time(exp.TimeToStr, "bigquery")(
|
|
338
|
+
[expr_type(this=seq_get(args, 1)), seq_get(args, 0)]
|
|
370
339
|
)
|
|
340
|
+
formatted_time.set("zone", seq_get(args, 2))
|
|
341
|
+
return formatted_time
|
|
371
342
|
|
|
372
343
|
return _builder
|
|
373
344
|
|
|
@@ -398,44 +369,6 @@ def _json_extract_sql(self: BigQuery.Generator, expression: JSON_EXTRACT_TYPE) -
|
|
|
398
369
|
return sql
|
|
399
370
|
|
|
400
371
|
|
|
401
|
-
def _annotate_concat(self: TypeAnnotator, expression: exp.Concat) -> exp.Concat:
|
|
402
|
-
annotated = self._annotate_by_args(expression, "expressions")
|
|
403
|
-
|
|
404
|
-
# Args must be BYTES or types that can be cast to STRING, return type is either BYTES or STRING
|
|
405
|
-
# https://cloud.google.com/bigquery/docs/reference/standard-sql/string_functions#concat
|
|
406
|
-
if not annotated.is_type(exp.DataType.Type.BINARY, exp.DataType.Type.UNKNOWN):
|
|
407
|
-
annotated.type = exp.DataType.Type.VARCHAR
|
|
408
|
-
|
|
409
|
-
return annotated
|
|
410
|
-
|
|
411
|
-
|
|
412
|
-
def _annotate_array(self: TypeAnnotator, expression: exp.Array) -> exp.Array:
|
|
413
|
-
array_args = expression.expressions
|
|
414
|
-
|
|
415
|
-
# BigQuery behaves as follows:
|
|
416
|
-
#
|
|
417
|
-
# SELECT t, TYPEOF(t) FROM (SELECT 'foo') AS t -- foo, STRUCT<STRING>
|
|
418
|
-
# SELECT ARRAY(SELECT 'foo'), TYPEOF(ARRAY(SELECT 'foo')) -- foo, ARRAY<STRING>
|
|
419
|
-
if (
|
|
420
|
-
len(array_args) == 1
|
|
421
|
-
and isinstance(select := array_args[0].unnest(), exp.Select)
|
|
422
|
-
and (query_type := select.meta.get("query_type")) is not None
|
|
423
|
-
and query_type.is_type(exp.DataType.Type.STRUCT)
|
|
424
|
-
and len(query_type.expressions) == 1
|
|
425
|
-
and isinstance(col_def := query_type.expressions[0], exp.ColumnDef)
|
|
426
|
-
and (projection_type := col_def.kind) is not None
|
|
427
|
-
and not projection_type.is_type(exp.DataType.Type.UNKNOWN)
|
|
428
|
-
):
|
|
429
|
-
array_type = exp.DataType(
|
|
430
|
-
this=exp.DataType.Type.ARRAY,
|
|
431
|
-
expressions=[projection_type.copy()],
|
|
432
|
-
nested=True,
|
|
433
|
-
)
|
|
434
|
-
return self._annotate_with_type(expression, array_type)
|
|
435
|
-
|
|
436
|
-
return self._annotate_by_args(expression, "expressions", array=True)
|
|
437
|
-
|
|
438
|
-
|
|
439
372
|
class BigQuery(Dialect):
|
|
440
373
|
WEEK_OFFSET = -1
|
|
441
374
|
UNNEST_COLUMN_ONLY = True
|
|
@@ -444,8 +377,22 @@ class BigQuery(Dialect):
|
|
|
444
377
|
LOG_BASE_FIRST = False
|
|
445
378
|
HEX_LOWERCASE = True
|
|
446
379
|
FORCE_EARLY_ALIAS_REF_EXPANSION = True
|
|
380
|
+
EXPAND_ONLY_GROUP_ALIAS_REF = True
|
|
447
381
|
PRESERVE_ORIGINAL_NAMES = True
|
|
448
382
|
HEX_STRING_IS_INTEGER_TYPE = True
|
|
383
|
+
BYTE_STRING_IS_BYTES_TYPE = True
|
|
384
|
+
UUID_IS_STRING_TYPE = True
|
|
385
|
+
ANNOTATE_ALL_SCOPES = True
|
|
386
|
+
PROJECTION_ALIASES_SHADOW_SOURCE_NAMES = True
|
|
387
|
+
TABLES_REFERENCEABLE_AS_COLUMNS = True
|
|
388
|
+
SUPPORTS_STRUCT_STAR_EXPANSION = True
|
|
389
|
+
EXCLUDES_PSEUDOCOLUMNS_FROM_STAR = True
|
|
390
|
+
QUERY_RESULTS_ARE_STRUCTS = True
|
|
391
|
+
JSON_EXTRACT_SCALAR_SCALAR_ONLY = True
|
|
392
|
+
DEFAULT_NULL_TYPE = exp.DataType.Type.BIGINT
|
|
393
|
+
|
|
394
|
+
# https://docs.cloud.google.com/bigquery/docs/reference/standard-sql/string_functions#initcap
|
|
395
|
+
INITCAP_DEFAULT_DELIMITER_CHARS = ' \t\n\r\f\v\\[\\](){}/|<>!?@"^#$&~_,.:;*%+\\-'
|
|
449
396
|
|
|
450
397
|
# https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#case_sensitivity
|
|
451
398
|
NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE
|
|
@@ -455,9 +402,19 @@ class BigQuery(Dialect):
|
|
|
455
402
|
|
|
456
403
|
# https://cloud.google.com/bigquery/docs/reference/standard-sql/format-elements#format_elements_date_time
|
|
457
404
|
TIME_MAPPING = {
|
|
405
|
+
"%x": "%m/%d/%y",
|
|
458
406
|
"%D": "%m/%d/%y",
|
|
459
407
|
"%E6S": "%S.%f",
|
|
460
408
|
"%e": "%-d",
|
|
409
|
+
"%F": "%Y-%m-%d",
|
|
410
|
+
"%T": "%H:%M:%S",
|
|
411
|
+
"%c": "%a %b %e %H:%M:%S %Y",
|
|
412
|
+
}
|
|
413
|
+
|
|
414
|
+
INVERSE_TIME_MAPPING = {
|
|
415
|
+
# Preserve %E6S instead of expanding to %T.%f - since both %E6S & %T.%f are semantically different in BigQuery
|
|
416
|
+
# %E6S is semantically different from %T.%f: %E6S works as a single atomic specifier for seconds with microseconds, while %T.%f expands incorrectly and fails to parse.
|
|
417
|
+
"%H:%M:%S.%f": "%H:%M:%E6S",
|
|
461
418
|
}
|
|
462
419
|
|
|
463
420
|
FORMAT_MAPPING = {
|
|
@@ -480,7 +437,13 @@ class BigQuery(Dialect):
|
|
|
480
437
|
# https://cloud.google.com/bigquery/docs/querying-partitioned-tables#query_an_ingestion-time_partitioned_table
|
|
481
438
|
# https://cloud.google.com/bigquery/docs/querying-wildcard-tables#scanning_a_range_of_tables_using_table_suffix
|
|
482
439
|
# https://cloud.google.com/bigquery/docs/query-cloud-storage-data#query_the_file_name_pseudo-column
|
|
483
|
-
PSEUDOCOLUMNS = {
|
|
440
|
+
PSEUDOCOLUMNS = {
|
|
441
|
+
"_PARTITIONTIME",
|
|
442
|
+
"_PARTITIONDATE",
|
|
443
|
+
"_TABLE_SUFFIX",
|
|
444
|
+
"_FILE_NAME",
|
|
445
|
+
"_DBT_MAX_PARTITION",
|
|
446
|
+
}
|
|
484
447
|
|
|
485
448
|
# All set operations require either a DISTINCT or ALL specifier
|
|
486
449
|
SET_OP_DISTINCT_BY_DEFAULT = dict.fromkeys((exp.Except, exp.Intersect, exp.Union), None)
|
|
@@ -492,200 +455,16 @@ class BigQuery(Dialect):
|
|
|
492
455
|
}
|
|
493
456
|
COERCES_TO[exp.DataType.Type.DECIMAL] |= {exp.DataType.Type.BIGDECIMAL}
|
|
494
457
|
COERCES_TO[exp.DataType.Type.BIGINT] |= {exp.DataType.Type.BIGDECIMAL}
|
|
495
|
-
|
|
496
|
-
|
|
497
|
-
|
|
498
|
-
|
|
499
|
-
exp.DataType.Type.
|
|
500
|
-
|
|
501
|
-
exp.Ascii,
|
|
502
|
-
exp.BitwiseAndAgg,
|
|
503
|
-
exp.BitwiseOrAgg,
|
|
504
|
-
exp.BitwiseXorAgg,
|
|
505
|
-
exp.BitwiseCount,
|
|
506
|
-
exp.ByteLength,
|
|
507
|
-
exp.DenseRank,
|
|
508
|
-
exp.FarmFingerprint,
|
|
509
|
-
exp.Grouping,
|
|
510
|
-
exp.LaxInt64,
|
|
511
|
-
exp.Length,
|
|
512
|
-
exp.Ntile,
|
|
513
|
-
exp.Rank,
|
|
514
|
-
exp.RangeBucket,
|
|
515
|
-
exp.RegexpInstr,
|
|
516
|
-
exp.RowNumber,
|
|
517
|
-
exp.Unicode,
|
|
518
|
-
},
|
|
519
|
-
exp.DataType.Type.BINARY: {
|
|
520
|
-
*Dialect.TYPE_TO_EXPRESSIONS[exp.DataType.Type.BINARY],
|
|
521
|
-
exp.ByteString,
|
|
522
|
-
exp.CodePointsToBytes,
|
|
523
|
-
exp.MD5Digest,
|
|
524
|
-
exp.SHA,
|
|
525
|
-
exp.SHA2,
|
|
526
|
-
exp.Unhex,
|
|
527
|
-
},
|
|
528
|
-
exp.DataType.Type.BOOLEAN: {
|
|
529
|
-
*Dialect.TYPE_TO_EXPRESSIONS[exp.DataType.Type.BOOLEAN],
|
|
530
|
-
exp.IsInf,
|
|
531
|
-
exp.IsNan,
|
|
532
|
-
exp.JSONBool,
|
|
533
|
-
exp.LaxBool,
|
|
534
|
-
},
|
|
535
|
-
exp.DataType.Type.DATE: {
|
|
536
|
-
*Dialect.TYPE_TO_EXPRESSIONS[exp.DataType.Type.DATE],
|
|
537
|
-
exp.DateFromUnixDate,
|
|
538
|
-
},
|
|
539
|
-
exp.DataType.Type.DATETIME: {
|
|
540
|
-
*Dialect.TYPE_TO_EXPRESSIONS[exp.DataType.Type.DATETIME],
|
|
541
|
-
exp.ParseDatetime,
|
|
542
|
-
exp.TimestampFromParts,
|
|
543
|
-
},
|
|
544
|
-
exp.DataType.Type.DECIMAL: {
|
|
545
|
-
exp.ParseNumeric,
|
|
546
|
-
},
|
|
547
|
-
exp.DataType.Type.BIGDECIMAL: {
|
|
548
|
-
exp.ParseBignumeric,
|
|
549
|
-
},
|
|
550
|
-
exp.DataType.Type.DOUBLE: {
|
|
551
|
-
*Dialect.TYPE_TO_EXPRESSIONS[exp.DataType.Type.DOUBLE],
|
|
552
|
-
exp.Acos,
|
|
553
|
-
exp.Acosh,
|
|
554
|
-
exp.Asin,
|
|
555
|
-
exp.Asinh,
|
|
556
|
-
exp.Atan,
|
|
557
|
-
exp.Atanh,
|
|
558
|
-
exp.Atan2,
|
|
559
|
-
exp.Cbrt,
|
|
560
|
-
exp.Corr,
|
|
561
|
-
exp.Cot,
|
|
562
|
-
exp.CosineDistance,
|
|
563
|
-
exp.Coth,
|
|
564
|
-
exp.CovarPop,
|
|
565
|
-
exp.CovarSamp,
|
|
566
|
-
exp.Csc,
|
|
567
|
-
exp.Csch,
|
|
568
|
-
exp.CumeDist,
|
|
569
|
-
exp.EuclideanDistance,
|
|
570
|
-
exp.Float64,
|
|
571
|
-
exp.LaxFloat64,
|
|
572
|
-
exp.PercentRank,
|
|
573
|
-
exp.Rand,
|
|
574
|
-
exp.Sec,
|
|
575
|
-
exp.Sech,
|
|
576
|
-
exp.Sin,
|
|
577
|
-
exp.Sinh,
|
|
578
|
-
},
|
|
579
|
-
exp.DataType.Type.JSON: {
|
|
580
|
-
*Dialect.TYPE_TO_EXPRESSIONS[exp.DataType.Type.JSON],
|
|
581
|
-
exp.JSONArray,
|
|
582
|
-
exp.JSONArrayAppend,
|
|
583
|
-
exp.JSONArrayInsert,
|
|
584
|
-
exp.JSONObject,
|
|
585
|
-
exp.JSONRemove,
|
|
586
|
-
exp.JSONSet,
|
|
587
|
-
exp.JSONStripNulls,
|
|
588
|
-
},
|
|
589
|
-
exp.DataType.Type.TIME: {
|
|
590
|
-
*Dialect.TYPE_TO_EXPRESSIONS[exp.DataType.Type.TIME],
|
|
591
|
-
exp.ParseTime,
|
|
592
|
-
exp.TimeFromParts,
|
|
593
|
-
exp.TimeTrunc,
|
|
594
|
-
exp.TsOrDsToTime,
|
|
595
|
-
},
|
|
596
|
-
exp.DataType.Type.VARCHAR: {
|
|
597
|
-
*Dialect.TYPE_TO_EXPRESSIONS[exp.DataType.Type.VARCHAR],
|
|
598
|
-
exp.CodePointsToString,
|
|
599
|
-
exp.Format,
|
|
600
|
-
exp.JSONExtractScalar,
|
|
601
|
-
exp.JSONType,
|
|
602
|
-
exp.LaxString,
|
|
603
|
-
exp.LowerHex,
|
|
604
|
-
exp.Normalize,
|
|
605
|
-
exp.SafeConvertBytesToString,
|
|
606
|
-
exp.Soundex,
|
|
607
|
-
exp.Uuid,
|
|
608
|
-
},
|
|
609
|
-
exp.DataType.Type.TIMESTAMPTZ: Dialect.TYPE_TO_EXPRESSIONS[exp.DataType.Type.TIMESTAMP],
|
|
610
|
-
}
|
|
611
|
-
TYPE_TO_EXPRESSIONS.pop(exp.DataType.Type.TIMESTAMP)
|
|
612
|
-
|
|
613
|
-
ANNOTATORS = {
|
|
614
|
-
**Dialect.ANNOTATORS,
|
|
615
|
-
**{
|
|
616
|
-
expr_type: annotate_with_type_lambda(data_type)
|
|
617
|
-
for data_type, expressions in TYPE_TO_EXPRESSIONS.items()
|
|
618
|
-
for expr_type in expressions
|
|
619
|
-
},
|
|
620
|
-
**{
|
|
621
|
-
expr_type: lambda self, e: _annotate_math_functions(self, e)
|
|
622
|
-
for expr_type in (exp.Floor, exp.Ceil, exp.Log, exp.Ln, exp.Sqrt, exp.Exp, exp.Round)
|
|
623
|
-
},
|
|
624
|
-
**{
|
|
625
|
-
expr_type: lambda self, e: self._annotate_by_args(e, "this")
|
|
626
|
-
for expr_type in (
|
|
627
|
-
exp.Abs,
|
|
628
|
-
exp.ArgMax,
|
|
629
|
-
exp.ArgMin,
|
|
630
|
-
exp.DateTrunc,
|
|
631
|
-
exp.DatetimeTrunc,
|
|
632
|
-
exp.FirstValue,
|
|
633
|
-
exp.GroupConcat,
|
|
634
|
-
exp.IgnoreNulls,
|
|
635
|
-
exp.JSONExtract,
|
|
636
|
-
exp.Lead,
|
|
637
|
-
exp.Left,
|
|
638
|
-
exp.Lower,
|
|
639
|
-
exp.NthValue,
|
|
640
|
-
exp.Pad,
|
|
641
|
-
exp.PercentileDisc,
|
|
642
|
-
exp.RegexpExtract,
|
|
643
|
-
exp.RegexpReplace,
|
|
644
|
-
exp.Repeat,
|
|
645
|
-
exp.Replace,
|
|
646
|
-
exp.RespectNulls,
|
|
647
|
-
exp.Reverse,
|
|
648
|
-
exp.Right,
|
|
649
|
-
exp.SafeNegate,
|
|
650
|
-
exp.Sign,
|
|
651
|
-
exp.Substring,
|
|
652
|
-
exp.TimestampTrunc,
|
|
653
|
-
exp.Translate,
|
|
654
|
-
exp.Trim,
|
|
655
|
-
exp.Upper,
|
|
656
|
-
)
|
|
657
|
-
},
|
|
658
|
-
exp.ApproxTopSum: lambda self, e: _annotate_by_args_approx_top(self, e),
|
|
659
|
-
exp.ApproxTopK: lambda self, e: _annotate_by_args_approx_top(self, e),
|
|
660
|
-
exp.ApproxQuantiles: lambda self, e: self._annotate_by_args(e, "this", array=True),
|
|
661
|
-
exp.Array: _annotate_array,
|
|
662
|
-
exp.ArrayConcat: lambda self, e: self._annotate_by_args(e, "this", "expressions"),
|
|
663
|
-
exp.Concat: _annotate_concat,
|
|
664
|
-
exp.GenerateTimestampArray: lambda self, e: self._annotate_with_type(
|
|
665
|
-
e, exp.DataType.build("ARRAY<TIMESTAMP>", dialect="bigquery")
|
|
666
|
-
),
|
|
667
|
-
exp.JSONExtractArray: lambda self, e: self._annotate_by_args(e, "this", array=True),
|
|
668
|
-
exp.JSONFormat: lambda self, e: self._annotate_with_type(
|
|
669
|
-
e, exp.DataType.Type.JSON if e.args.get("to_json") else exp.DataType.Type.VARCHAR
|
|
670
|
-
),
|
|
671
|
-
exp.JSONKeysAtDepth: lambda self, e: self._annotate_with_type(
|
|
672
|
-
e, exp.DataType.build("ARRAY<VARCHAR>", dialect="bigquery")
|
|
673
|
-
),
|
|
674
|
-
exp.JSONValueArray: lambda self, e: self._annotate_with_type(
|
|
675
|
-
e, exp.DataType.build("ARRAY<VARCHAR>", dialect="bigquery")
|
|
676
|
-
),
|
|
677
|
-
exp.Lag: lambda self, e: self._annotate_by_args(e, "this", "default"),
|
|
678
|
-
exp.PercentileCont: lambda self, e: _annotate_by_args_with_coerce(self, e),
|
|
679
|
-
exp.RegexpExtractAll: lambda self, e: self._annotate_by_args(e, "this", array=True),
|
|
680
|
-
exp.SafeAdd: lambda self, e: _annotate_by_args_with_coerce(self, e),
|
|
681
|
-
exp.SafeMultiply: lambda self, e: _annotate_by_args_with_coerce(self, e),
|
|
682
|
-
exp.SafeSubtract: lambda self, e: _annotate_by_args_with_coerce(self, e),
|
|
683
|
-
exp.Split: lambda self, e: self._annotate_by_args(e, "this", array=True),
|
|
684
|
-
exp.ToCodePoints: lambda self, e: self._annotate_with_type(
|
|
685
|
-
e, exp.DataType.build("ARRAY<BIGINT>", dialect="bigquery")
|
|
686
|
-
),
|
|
458
|
+
COERCES_TO[exp.DataType.Type.VARCHAR] |= {
|
|
459
|
+
exp.DataType.Type.DATE,
|
|
460
|
+
exp.DataType.Type.DATETIME,
|
|
461
|
+
exp.DataType.Type.TIME,
|
|
462
|
+
exp.DataType.Type.TIMESTAMP,
|
|
463
|
+
exp.DataType.Type.TIMESTAMPTZ,
|
|
687
464
|
}
|
|
688
465
|
|
|
466
|
+
EXPRESSION_METADATA = EXPRESSION_METADATA.copy()
|
|
467
|
+
|
|
689
468
|
def normalize_identifier(self, expression: E) -> E:
|
|
690
469
|
if (
|
|
691
470
|
isinstance(expression, exp.Identifier)
|
|
@@ -809,6 +588,7 @@ class BigQuery(Dialect):
|
|
|
809
588
|
"CONTAINS_SUBSTR": _build_contains_substring,
|
|
810
589
|
"DATE": _build_date,
|
|
811
590
|
"DATE_ADD": build_date_delta_with_interval(exp.DateAdd),
|
|
591
|
+
"DATE_DIFF": build_date_diff,
|
|
812
592
|
"DATE_SUB": build_date_delta_with_interval(exp.DateSub),
|
|
813
593
|
"DATE_TRUNC": lambda args: exp.DateTrunc(
|
|
814
594
|
unit=seq_get(args, 1),
|
|
@@ -822,6 +602,12 @@ class BigQuery(Dialect):
|
|
|
822
602
|
"EDIT_DISTANCE": _build_levenshtein,
|
|
823
603
|
"FORMAT_DATE": _build_format_time(exp.TsOrDsToDate),
|
|
824
604
|
"GENERATE_ARRAY": exp.GenerateSeries.from_arg_list,
|
|
605
|
+
"GREATEST": lambda args: exp.Greatest(
|
|
606
|
+
this=seq_get(args, 0), expressions=args[1:], null_if_any_null=True
|
|
607
|
+
),
|
|
608
|
+
"LEAST": lambda args: exp.Least(
|
|
609
|
+
this=seq_get(args, 0), expressions=args[1:], null_if_any_null=True
|
|
610
|
+
),
|
|
825
611
|
"JSON_EXTRACT_SCALAR": _build_extract_json_with_default_path(exp.JSONExtractScalar),
|
|
826
612
|
"JSON_EXTRACT_ARRAY": _build_extract_json_with_default_path(exp.JSONExtractArray),
|
|
827
613
|
"JSON_EXTRACT_STRING_ARRAY": _build_extract_json_with_default_path(exp.JSONValueArray),
|
|
@@ -833,6 +619,7 @@ class BigQuery(Dialect):
|
|
|
833
619
|
"JSON_VALUE_ARRAY": _build_extract_json_with_default_path(exp.JSONValueArray),
|
|
834
620
|
"LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True),
|
|
835
621
|
"MD5": exp.MD5Digest.from_arg_list,
|
|
622
|
+
"SHA1": exp.SHA1Digest.from_arg_list,
|
|
836
623
|
"NORMALIZE_AND_CASEFOLD": lambda args: exp.Normalize(
|
|
837
624
|
this=seq_get(args, 0), form=seq_get(args, 1), is_casefold=True
|
|
838
625
|
),
|
|
@@ -854,7 +641,9 @@ class BigQuery(Dialect):
|
|
|
854
641
|
"REGEXP_EXTRACT_ALL": _build_regexp_extract(
|
|
855
642
|
exp.RegexpExtractAll, default_group=exp.Literal.number(0)
|
|
856
643
|
),
|
|
857
|
-
"SHA256": lambda args: exp.
|
|
644
|
+
"SHA256": lambda args: exp.SHA2Digest(
|
|
645
|
+
this=seq_get(args, 0), length=exp.Literal.number(256)
|
|
646
|
+
),
|
|
858
647
|
"SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)),
|
|
859
648
|
"SPLIT": lambda args: exp.Split(
|
|
860
649
|
# https://cloud.google.com/bigquery/docs/reference/standard-sql/string_functions#split
|
|
@@ -890,7 +679,11 @@ class BigQuery(Dialect):
|
|
|
890
679
|
|
|
891
680
|
FUNCTION_PARSERS = {
|
|
892
681
|
**parser.Parser.FUNCTION_PARSERS,
|
|
893
|
-
"ARRAY": lambda self: self.expression(
|
|
682
|
+
"ARRAY": lambda self: self.expression(
|
|
683
|
+
exp.Array,
|
|
684
|
+
expressions=[self._parse_statement()],
|
|
685
|
+
struct_name_inheritance=True,
|
|
686
|
+
),
|
|
894
687
|
"JSON_ARRAY": lambda self: self.expression(
|
|
895
688
|
exp.JSONArray, expressions=self._parse_csv(self._parse_bitwise)
|
|
896
689
|
),
|
|
@@ -1126,6 +919,9 @@ class BigQuery(Dialect):
|
|
|
1126
919
|
) -> t.Optional[exp.Expression]:
|
|
1127
920
|
bracket = super()._parse_bracket(this)
|
|
1128
921
|
|
|
922
|
+
if isinstance(bracket, exp.Array):
|
|
923
|
+
bracket.set("struct_name_inheritance", True)
|
|
924
|
+
|
|
1129
925
|
if this is bracket:
|
|
1130
926
|
return bracket
|
|
1131
927
|
|
|
@@ -1272,6 +1068,24 @@ class BigQuery(Dialect):
|
|
|
1272
1068
|
this=self._match_text_seq("AS") and self._parse_select(),
|
|
1273
1069
|
)
|
|
1274
1070
|
|
|
1071
|
+
def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
|
|
1072
|
+
this = super()._parse_column_ops(this)
|
|
1073
|
+
|
|
1074
|
+
if isinstance(this, exp.Dot):
|
|
1075
|
+
prefix_name = this.this.name.upper()
|
|
1076
|
+
func_name = this.name.upper()
|
|
1077
|
+
if prefix_name == "NET":
|
|
1078
|
+
if func_name == "HOST":
|
|
1079
|
+
this = self.expression(
|
|
1080
|
+
exp.NetHost, this=seq_get(this.expression.expressions, 0)
|
|
1081
|
+
)
|
|
1082
|
+
elif prefix_name == "SAFE":
|
|
1083
|
+
if func_name == "TIMESTAMP":
|
|
1084
|
+
this = _build_timestamp(this.expression.expressions)
|
|
1085
|
+
this.set("safe", True)
|
|
1086
|
+
|
|
1087
|
+
return this
|
|
1088
|
+
|
|
1275
1089
|
class Generator(generator.Generator):
|
|
1276
1090
|
INTERVAL_ALLOWS_PLURAL_FORM = False
|
|
1277
1091
|
JOIN_HINTS = False
|
|
@@ -1347,7 +1161,7 @@ class BigQuery(Dialect):
|
|
|
1347
1161
|
),
|
|
1348
1162
|
exp.GenerateSeries: rename_func("GENERATE_ARRAY"),
|
|
1349
1163
|
exp.GroupConcat: lambda self, e: groupconcat_sql(
|
|
1350
|
-
self, e, func_name="STRING_AGG", within_group=False
|
|
1164
|
+
self, e, func_name="STRING_AGG", within_group=False, sep=None
|
|
1351
1165
|
),
|
|
1352
1166
|
exp.Hex: lambda self, e: self.func("UPPER", self.func("TO_HEX", self.sql(e, "this"))),
|
|
1353
1167
|
exp.HexString: lambda self, e: self.hexstring_sql(e, binary_function_repr="FROM_HEX"),
|
|
@@ -1406,6 +1220,8 @@ class BigQuery(Dialect):
|
|
|
1406
1220
|
),
|
|
1407
1221
|
exp.SHA: rename_func("SHA1"),
|
|
1408
1222
|
exp.SHA2: sha256_sql,
|
|
1223
|
+
exp.SHA1Digest: rename_func("SHA1"),
|
|
1224
|
+
exp.SHA2Digest: sha2_digest_sql,
|
|
1409
1225
|
exp.StabilityProperty: lambda self, e: (
|
|
1410
1226
|
"DETERMINISTIC" if e.name == "IMMUTABLE" else "NOT DETERMINISTIC"
|
|
1411
1227
|
),
|
|
@@ -1417,6 +1233,7 @@ class BigQuery(Dialect):
|
|
|
1417
1233
|
),
|
|
1418
1234
|
exp.StrToDate: _str_to_datetime_sql,
|
|
1419
1235
|
exp.StrToTime: _str_to_datetime_sql,
|
|
1236
|
+
exp.SessionUser: lambda *_: "SESSION_USER()",
|
|
1420
1237
|
exp.TimeAdd: date_add_interval_sql("TIME", "ADD"),
|
|
1421
1238
|
exp.TimeFromParts: rename_func("TIME"),
|
|
1422
1239
|
exp.TimestampFromParts: rename_func("DATETIME"),
|
|
@@ -1734,3 +1551,7 @@ class BigQuery(Dialect):
|
|
|
1734
1551
|
kind = f" {kind}" if kind else ""
|
|
1735
1552
|
|
|
1736
1553
|
return f"{variables}{kind}{default}"
|
|
1554
|
+
|
|
1555
|
+
def timestamp_sql(self, expression: exp.Timestamp) -> str:
|
|
1556
|
+
prefix = "SAFE." if expression.args.get("safe") else ""
|
|
1557
|
+
return self.func(f"{prefix}TIMESTAMP", expression.this, expression.args.get("zone"))
|