sqlglot 26.30.0__py3-none-any.whl → 26.32.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1165,3 +1165,48 @@ class DuckDB(Dialect):
1165
1165
  def autoincrementcolumnconstraint_sql(self, _) -> str:
1166
1166
  self.unsupported("The AUTOINCREMENT column constraint is not supported by DuckDB")
1167
1167
  return ""
1168
+
1169
+ def aliases_sql(self, expression: exp.Aliases) -> str:
1170
+ this = expression.this
1171
+ if isinstance(this, exp.Posexplode):
1172
+ return self.posexplode_sql(this)
1173
+
1174
+ return super().aliases_sql(expression)
1175
+
1176
+ def posexplode_sql(self, expression: exp.Posexplode) -> str:
1177
+ this = expression.this
1178
+ parent = expression.parent
1179
+
1180
+ # The default Spark aliases are "pos" and "col", unless specified otherwise
1181
+ pos, col = exp.to_identifier("pos"), exp.to_identifier("col")
1182
+
1183
+ if isinstance(parent, exp.Aliases):
1184
+ # Column case: SELECT POSEXPLODE(col) [AS (a, b)]
1185
+ pos, col = parent.expressions
1186
+ elif isinstance(parent, exp.Table):
1187
+ # Table case: SELECT * FROM POSEXPLODE(col) [AS (a, b)]
1188
+ alias = parent.args.get("alias")
1189
+ if alias:
1190
+ pos, col = alias.columns or [pos, col]
1191
+ alias.pop()
1192
+
1193
+ # Translate POSEXPLODE to UNNEST + GENERATE_SUBSCRIPTS
1194
+ # Note: In Spark pos is 0-indexed, but in DuckDB it's 1-indexed, so we subtract 1 from GENERATE_SUBSCRIPTS
1195
+ unnest_sql = self.sql(exp.Unnest(expressions=[this], alias=col))
1196
+ gen_subscripts = self.sql(
1197
+ exp.Alias(
1198
+ this=exp.Anonymous(
1199
+ this="GENERATE_SUBSCRIPTS", expressions=[this, exp.Literal.number(1)]
1200
+ )
1201
+ - exp.Literal.number(1),
1202
+ alias=pos,
1203
+ )
1204
+ )
1205
+
1206
+ posexplode_sql = self.format_args(gen_subscripts, unnest_sql)
1207
+
1208
+ if isinstance(parent, exp.From) or (parent and isinstance(parent.parent, exp.From)):
1209
+ # SELECT * FROM POSEXPLODE(col) -> SELECT * FROM (SELECT GENERATE_SUBSCRIPTS(...), UNNEST(...))
1210
+ return self.sql(exp.Subquery(this=exp.Select(expressions=[posexplode_sql])))
1211
+
1212
+ return posexplode_sql
@@ -0,0 +1,89 @@
1
+ from __future__ import annotations
2
+ from sqlglot import exp, generator, parser
3
+ from sqlglot.dialects.dialect import Dialect, rename_func, binary_from_function
4
+ from sqlglot.helper import seq_get
5
+ from sqlglot.generator import unsupported_args
6
+
7
+
8
+ class Exasol(Dialect):
9
+ class Parser(parser.Parser):
10
+ FUNCTIONS = {
11
+ **parser.Parser.FUNCTIONS,
12
+ "BIT_AND": binary_from_function(exp.BitwiseAnd),
13
+ "BIT_OR": binary_from_function(exp.BitwiseOr),
14
+ "BIT_XOR": binary_from_function(exp.BitwiseXor),
15
+ "BIT_NOT": lambda args: exp.BitwiseNot(this=seq_get(args, 0)),
16
+ "BIT_LSHIFT": binary_from_function(exp.BitwiseLeftShift),
17
+ "BIT_RSHIFT": binary_from_function(exp.BitwiseRightShift),
18
+ "EVERY": lambda args: exp.All(this=seq_get(args, 0)),
19
+ "EDIT_DISTANCE": exp.Levenshtein.from_arg_list,
20
+ "REGEXP_REPLACE": lambda args: exp.RegexpReplace(
21
+ this=seq_get(args, 0),
22
+ expression=seq_get(args, 1),
23
+ replacement=seq_get(args, 2),
24
+ position=seq_get(args, 3),
25
+ occurrence=seq_get(args, 4),
26
+ ),
27
+ }
28
+
29
+ class Generator(generator.Generator):
30
+ # https://docs.exasol.com/db/latest/sql_references/data_types/datatypedetails.htm#StringDataType
31
+ STRING_TYPE_MAPPING = {
32
+ exp.DataType.Type.BLOB: "VARCHAR",
33
+ exp.DataType.Type.LONGBLOB: "VARCHAR",
34
+ exp.DataType.Type.LONGTEXT: "VARCHAR",
35
+ exp.DataType.Type.MEDIUMBLOB: "VARCHAR",
36
+ exp.DataType.Type.MEDIUMTEXT: "VARCHAR",
37
+ exp.DataType.Type.TINYBLOB: "VARCHAR",
38
+ exp.DataType.Type.TINYTEXT: "VARCHAR",
39
+ exp.DataType.Type.TEXT: "VARCHAR",
40
+ exp.DataType.Type.VARBINARY: "VARCHAR",
41
+ }
42
+
43
+ # https://docs.exasol.com/db/latest/sql_references/data_types/datatypealiases.htm
44
+ TYPE_MAPPING = {
45
+ **generator.Generator.TYPE_MAPPING,
46
+ **STRING_TYPE_MAPPING,
47
+ exp.DataType.Type.TINYINT: "SMALLINT",
48
+ exp.DataType.Type.MEDIUMINT: "INT",
49
+ exp.DataType.Type.DECIMAL32: "DECIMAL",
50
+ exp.DataType.Type.DECIMAL64: "DECIMAL",
51
+ exp.DataType.Type.DECIMAL128: "DECIMAL",
52
+ exp.DataType.Type.DECIMAL256: "DECIMAL",
53
+ exp.DataType.Type.DATETIME: "TIMESTAMP",
54
+ }
55
+
56
+ def datatype_sql(self, expression: exp.DataType) -> str:
57
+ # Exasol supports a fixed default precision of 3 for TIMESTAMP WITH LOCAL TIME ZONE
58
+ # and does not allow specifying a different custom precision
59
+ if expression.is_type(exp.DataType.Type.TIMESTAMPLTZ):
60
+ return "TIMESTAMP WITH LOCAL TIME ZONE"
61
+
62
+ return super().datatype_sql(expression)
63
+
64
+ TRANSFORMS = {
65
+ **generator.Generator.TRANSFORMS,
66
+ # https://docs.exasol.com/db/latest/sql_references/functions/alphabeticallistfunctions/every.htm
67
+ exp.All: rename_func("EVERY"),
68
+ # https://docs.exasol.com/db/latest/sql_references/functions/alphabeticallistfunctions/bit_and.htm
69
+ exp.BitwiseAnd: rename_func("BIT_AND"),
70
+ # https://docs.exasol.com/db/latest/sql_references/functions/alphabeticallistfunctions/bit_or.htm
71
+ exp.BitwiseOr: rename_func("BIT_OR"),
72
+ # https://docs.exasol.com/db/latest/sql_references/functions/alphabeticallistfunctions/bit_not.htm
73
+ exp.BitwiseNot: rename_func("BIT_NOT"),
74
+ # https://docs.exasol.com/db/latest/sql_references/functions/alphabeticallistfunctions/bit_lshift.htm
75
+ exp.BitwiseLeftShift: rename_func("BIT_LSHIFT"),
76
+ # https://docs.exasol.com/db/latest/sql_references/functions/alphabeticallistfunctions/bit_rshift.htm
77
+ exp.BitwiseRightShift: rename_func("BIT_RSHIFT"),
78
+ # https://docs.exasol.com/db/latest/sql_references/functions/alphabeticallistfunctions/bit_xor.htm
79
+ exp.BitwiseXor: rename_func("BIT_XOR"),
80
+ # https://docs.exasol.com/db/latest/sql_references/functions/alphabeticallistfunctions/every.htm
81
+ exp.All: rename_func("EVERY"),
82
+ # https://docs.exasol.com/db/latest/sql_references/functions/alphabeticallistfunctions/edit_distance.htm#EDIT_DISTANCE
83
+ exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost", "max_dist")(
84
+ rename_func("EDIT_DISTANCE")
85
+ ),
86
+ # https://docs.exasol.com/db/latest/sql_references/functions/alphabeticallistfunctions/mod.htm
87
+ exp.Mod: rename_func("MOD"),
88
+ exp.RegexpReplace: unsupported_args("modifiers")(rename_func("REGEXP_REPLACE")),
89
+ }
@@ -3,6 +3,7 @@ from __future__ import annotations
3
3
  from sqlglot import exp
4
4
  from sqlglot.dialects.dialect import NormalizationStrategy
5
5
  from sqlglot.dialects.tsql import TSQL
6
+ from sqlglot.tokens import TokenType
6
7
 
7
8
 
8
9
  class Fabric(TSQL):
@@ -28,61 +29,87 @@ class Fabric(TSQL):
28
29
  # Fabric is case-sensitive unlike T-SQL which is case-insensitive
29
30
  NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_SENSITIVE
30
31
 
32
+ class Tokenizer(TSQL.Tokenizer):
33
+ # Override T-SQL tokenizer to handle TIMESTAMP differently
34
+ # In T-SQL, TIMESTAMP is a synonym for ROWVERSION, but in Fabric we want it to be a datetime type
35
+ # Also add UTINYINT keyword mapping since T-SQL doesn't have it
36
+ KEYWORDS = {
37
+ **TSQL.Tokenizer.KEYWORDS,
38
+ "TIMESTAMP": TokenType.TIMESTAMP,
39
+ "UTINYINT": TokenType.UTINYINT,
40
+ }
41
+
31
42
  class Generator(TSQL.Generator):
32
43
  # Fabric-specific type mappings - override T-SQL types that aren't supported
33
44
  # Reference: https://learn.microsoft.com/en-us/fabric/data-warehouse/data-types
34
45
  TYPE_MAPPING = {
35
46
  **TSQL.Generator.TYPE_MAPPING,
36
- # Fabric doesn't support these types, map to alternatives
47
+ exp.DataType.Type.DATETIME: "DATETIME2",
48
+ exp.DataType.Type.DECIMAL: "DECIMAL",
49
+ exp.DataType.Type.IMAGE: "VARBINARY",
50
+ exp.DataType.Type.INT: "INT",
51
+ exp.DataType.Type.JSON: "VARCHAR",
37
52
  exp.DataType.Type.MONEY: "DECIMAL",
38
- exp.DataType.Type.SMALLMONEY: "DECIMAL",
39
- exp.DataType.Type.DATETIME: "DATETIME2(6)",
40
- exp.DataType.Type.SMALLDATETIME: "DATETIME2(6)",
41
53
  exp.DataType.Type.NCHAR: "CHAR",
42
54
  exp.DataType.Type.NVARCHAR: "VARCHAR",
43
- exp.DataType.Type.TEXT: "VARCHAR(MAX)",
44
- exp.DataType.Type.IMAGE: "VARBINARY",
55
+ exp.DataType.Type.ROWVERSION: "ROWVERSION",
56
+ exp.DataType.Type.SMALLDATETIME: "DATETIME2",
57
+ exp.DataType.Type.SMALLMONEY: "DECIMAL",
58
+ exp.DataType.Type.TIMESTAMP: "DATETIME2",
59
+ exp.DataType.Type.TIMESTAMPNTZ: "DATETIME2",
60
+ exp.DataType.Type.TIMESTAMPTZ: "DATETIMEOFFSET",
45
61
  exp.DataType.Type.TINYINT: "SMALLINT",
46
- exp.DataType.Type.UTINYINT: "SMALLINT", # T-SQL parses TINYINT as UTINYINT
47
- exp.DataType.Type.JSON: "VARCHAR",
62
+ exp.DataType.Type.UTINYINT: "SMALLINT",
63
+ exp.DataType.Type.UUID: "VARBINARY(MAX)",
48
64
  exp.DataType.Type.XML: "VARCHAR",
49
- exp.DataType.Type.UUID: "VARBINARY(MAX)", # UNIQUEIDENTIFIER has limitations in Fabric
50
- # Override T-SQL mappings that use different names in Fabric
51
- exp.DataType.Type.DECIMAL: "DECIMAL", # T-SQL uses NUMERIC
52
- exp.DataType.Type.DOUBLE: "FLOAT",
53
- exp.DataType.Type.INT: "INT", # T-SQL uses INTEGER
54
65
  }
55
66
 
56
67
  def datatype_sql(self, expression: exp.DataType) -> str:
57
- """
58
- Override datatype generation to handle Fabric-specific precision limitations.
59
-
60
- Fabric limits temporal types (TIME, DATETIME2, DATETIMEOFFSET) to max 6 digits precision.
61
- When no precision is specified, we default to 6 digits.
62
- """
63
- if expression.is_type(
64
- exp.DataType.Type.TIME,
65
- exp.DataType.Type.DATETIME2,
66
- exp.DataType.Type.TIMESTAMPTZ, # DATETIMEOFFSET in Fabric
68
+ # Check if this is a temporal type that needs precision handling. Fabric limits temporal
69
+ # types to max 6 digits precision. When no precision is specified, we default to 6 digits.
70
+ if (
71
+ expression.is_type(*exp.DataType.TEMPORAL_TYPES)
72
+ and expression.this != exp.DataType.Type.DATE
67
73
  ):
68
74
  # Get the current precision (first expression if it exists)
69
- precision = expression.find(exp.DataTypeParam)
75
+ precision_param = expression.find(exp.DataTypeParam)
76
+ target_precision = 6
70
77
 
71
- # Determine the target precision
72
- if precision is None:
73
- # No precision specified, default to 6
74
- target_precision = 6
75
- elif precision.this.is_int:
78
+ if precision_param and precision_param.this.is_int:
76
79
  # Cap precision at 6
77
- current_precision = precision.this.to_py()
80
+ current_precision = precision_param.this.to_py()
78
81
  target_precision = min(current_precision, 6)
82
+ else:
83
+ # If precision exists but is not an integer, default to 6
84
+ target_precision = 6
79
85
 
80
86
  # Create a new expression with the target precision
81
- new_expression = exp.DataType(
87
+ expression = exp.DataType(
82
88
  this=expression.this,
83
89
  expressions=[exp.DataTypeParam(this=exp.Literal.number(target_precision))],
84
90
  )
85
91
 
86
- return super().datatype_sql(new_expression)
87
-
88
92
  return super().datatype_sql(expression)
93
+
94
+ def unixtotime_sql(self, expression: exp.UnixToTime) -> str:
95
+ scale = expression.args.get("scale")
96
+ timestamp = expression.this
97
+
98
+ if scale not in (None, exp.UnixToTime.SECONDS):
99
+ self.unsupported(f"UnixToTime scale {scale} is not supported by Fabric")
100
+ return ""
101
+
102
+ # Convert unix timestamp (seconds) to microseconds and round to avoid decimals
103
+ microseconds = timestamp * exp.Literal.number("1e6")
104
+ rounded = exp.func("round", microseconds, 0)
105
+ rounded_ms_as_bigint = exp.cast(rounded, exp.DataType.Type.BIGINT)
106
+
107
+ # Create the base datetime as '1970-01-01' cast to DATETIME2(6)
108
+ epoch_start = exp.cast("'1970-01-01'", "datetime2(6)", dialect="fabric")
109
+
110
+ dateadd = exp.DateAdd(
111
+ this=epoch_start,
112
+ expression=rounded_ms_as_bigint,
113
+ unit=exp.Literal.string("MICROSECONDS"),
114
+ )
115
+ return self.sql(dateadd)
@@ -8,6 +8,7 @@ from sqlglot.dialects.dialect import (
8
8
  NormalizationStrategy,
9
9
  binary_from_function,
10
10
  bool_xor_sql,
11
+ build_replace_with_optional_replacement,
11
12
  date_trunc_to_time,
12
13
  datestrtodate_sql,
13
14
  encode_decode_sql,
@@ -30,6 +31,7 @@ from sqlglot.dialects.dialect import (
30
31
  sequence_sql,
31
32
  build_regexp_extract,
32
33
  explode_to_unnest_sql,
34
+ space_sql,
33
35
  )
34
36
  from sqlglot.dialects.hive import Hive
35
37
  from sqlglot.dialects.mysql import MySQL
@@ -360,6 +362,7 @@ class Presto(Dialect):
360
362
  expression=seq_get(args, 1),
361
363
  replacement=seq_get(args, 2) or exp.Literal.string(""),
362
364
  ),
365
+ "REPLACE": build_replace_with_optional_replacement,
363
366
  "ROW": exp.Struct.from_arg_list,
364
367
  "SEQUENCE": exp.GenerateSeries.from_arg_list,
365
368
  "SET_AGG": exp.ArrayUniqueAgg.from_arg_list,
@@ -367,6 +370,7 @@ class Presto(Dialect):
367
370
  "STRPOS": lambda args: exp.StrPosition(
368
371
  this=seq_get(args, 0), substr=seq_get(args, 1), occurrence=seq_get(args, 2)
369
372
  ),
373
+ "SLICE": exp.ArraySlice.from_arg_list,
370
374
  "TO_CHAR": _build_to_char,
371
375
  "TO_UNIXTIME": exp.TimeToUnix.from_arg_list,
372
376
  "TO_UTF8": lambda args: exp.Encode(
@@ -435,6 +439,7 @@ class Presto(Dialect):
435
439
  exp.ArrayContains: rename_func("CONTAINS"),
436
440
  exp.ArrayToString: rename_func("ARRAY_JOIN"),
437
441
  exp.ArrayUniqueAgg: rename_func("SET_AGG"),
442
+ exp.ArraySlice: rename_func("SLICE"),
438
443
  exp.AtTimeZone: rename_func("AT_TIMEZONE"),
439
444
  exp.BitwiseAnd: lambda self, e: self.func("BITWISE_AND", e.this, e.expression),
440
445
  exp.BitwiseLeftShift: lambda self, e: self.func(
@@ -501,6 +506,7 @@ class Presto(Dialect):
501
506
  amend_exploded_column_table,
502
507
  ]
503
508
  ),
509
+ exp.Space: space_sql,
504
510
  exp.SortArray: _no_sort_array,
505
511
  exp.StrPosition: lambda self, e: strposition_sql(self, e, supports_occurrence=True),
506
512
  exp.StrToDate: lambda self, e: f"CAST({_str_to_time_sql(self, e)} AS DATE)",
@@ -213,8 +213,7 @@ class Redshift(Postgres):
213
213
  exp.TableSample: no_tablesample_sql,
214
214
  exp.TsOrDsAdd: date_delta_sql("DATEADD"),
215
215
  exp.TsOrDsDiff: date_delta_sql("DATEDIFF"),
216
- exp.UnixToTime: lambda self,
217
- e: f"(TIMESTAMP 'epoch' + {self.sql(e.this)} * INTERVAL '1 SECOND')",
216
+ exp.UnixToTime: lambda self, e: self._unix_to_time_sql(e),
218
217
  }
219
218
 
220
219
  # Postgres maps exp.Pivot to no_pivot_sql, but Redshift support pivots
@@ -447,3 +446,12 @@ class Redshift(Postgres):
447
446
  def explode_sql(self, expression: exp.Explode) -> str:
448
447
  self.unsupported("Unsupported EXPLODE() function")
449
448
  return ""
449
+
450
+ def _unix_to_time_sql(self, expression: exp.UnixToTime) -> str:
451
+ scale = expression.args.get("scale")
452
+ this = self.sql(expression.this)
453
+
454
+ if scale is not None and scale != exp.UnixToTime.SECONDS and scale.is_int:
455
+ this = f"({this} / POWER(10, {scale.to_py()}))"
456
+
457
+ return f"(TIMESTAMP 'epoch' + {this} * INTERVAL '1 SECOND')"
@@ -9,6 +9,7 @@ from sqlglot.dialects.dialect import (
9
9
  build_timetostr_or_tochar,
10
10
  binary_from_function,
11
11
  build_default_decimal_type,
12
+ build_replace_with_optional_replacement,
12
13
  build_timestamp_from_parts,
13
14
  date_delta_sql,
14
15
  date_trunc_to_time,
@@ -484,6 +485,7 @@ class Snowflake(Dialect):
484
485
  "REGEXP_REPLACE": _build_regexp_replace,
485
486
  "REGEXP_SUBSTR": _build_regexp_extract(exp.RegexpExtract),
486
487
  "REGEXP_SUBSTR_ALL": _build_regexp_extract(exp.RegexpExtractAll),
488
+ "REPLACE": build_replace_with_optional_replacement,
487
489
  "RLIKE": exp.RegexpLike.from_arg_list,
488
490
  "SQUARE": lambda args: exp.Pow(this=seq_get(args, 0), expression=exp.Literal.number(2)),
489
491
  "TABLE": lambda args: exp.TableFromRows(this=seq_get(args, 0)),
@@ -1416,7 +1418,7 @@ class Snowflake(Dialect):
1416
1418
 
1417
1419
  def timetostr_sql(self, expression: exp.TimeToStr) -> str:
1418
1420
  this = expression.this
1419
- if not isinstance(this, exp.TsOrDsToTimestamp):
1421
+ if this.is_string:
1420
1422
  this = exp.cast(this, exp.DataType.Type.TIMESTAMP)
1421
1423
 
1422
1424
  return self.func("TO_CHAR", this, self.format_time(expression))
@@ -201,6 +201,7 @@ class Spark2(Hive):
201
201
  "SHIFTLEFT": binary_from_function(exp.BitwiseLeftShift),
202
202
  "SHIFTRIGHT": binary_from_function(exp.BitwiseRightShift),
203
203
  "STRING": _build_as_cast("string"),
204
+ "SLICE": exp.ArraySlice.from_arg_list,
204
205
  "TIMESTAMP": _build_as_cast("timestamp"),
205
206
  "TO_TIMESTAMP": lambda args: (
206
207
  _build_as_cast("timestamp")(args)
@@ -261,6 +262,7 @@ class Spark2(Hive):
261
262
  exp.ArraySum: lambda self,
262
263
  e: f"AGGREGATE({self.sql(e, 'this')}, 0, (acc, x) -> acc + x, acc -> acc)",
263
264
  exp.ArrayToString: rename_func("ARRAY_JOIN"),
265
+ exp.ArraySlice: rename_func("SLICE"),
264
266
  exp.AtTimeZone: lambda self, e: self.func(
265
267
  "FROM_UTC_TIMESTAMP", e.this, e.args.get("zone")
266
268
  ),
sqlglot/dialects/tsql.py CHANGED
@@ -612,6 +612,7 @@ class TSQL(Dialect):
612
612
  "SYSDATETIME": exp.CurrentTimestamp.from_arg_list,
613
613
  "SUSER_NAME": exp.CurrentUser.from_arg_list,
614
614
  "SUSER_SNAME": exp.CurrentUser.from_arg_list,
615
+ "SYSDATETIMEOFFSET": exp.CurrentTimestampLTZ.from_arg_list,
615
616
  "SYSTEM_USER": exp.CurrentUser.from_arg_list,
616
617
  "TIMEFROMPARTS": _build_timefromparts,
617
618
  "DATETRUNC": _build_datetrunc,
@@ -1020,6 +1021,7 @@ class TSQL(Dialect):
1020
1021
  exp.CTE: transforms.preprocess([qualify_derived_table_outputs]),
1021
1022
  exp.CurrentDate: rename_func("GETDATE"),
1022
1023
  exp.CurrentTimestamp: rename_func("GETDATE"),
1024
+ exp.CurrentTimestampLTZ: rename_func("SYSDATETIMEOFFSET"),
1023
1025
  exp.DateStrToDate: datestrtodate_sql,
1024
1026
  exp.Extract: rename_func("DATEPART"),
1025
1027
  exp.GeneratedAsIdentityColumnConstraint: generatedasidentitycolumnconstraint_sql,
@@ -1249,15 +1251,15 @@ class TSQL(Dialect):
1249
1251
  sql_with_ctes = self.prepend_ctes(expression, sql)
1250
1252
  sql_literal = self.sql(exp.Literal.string(sql_with_ctes))
1251
1253
  if kind == "SCHEMA":
1252
- return f"""IF NOT EXISTS (SELECT * FROM information_schema.schemata WHERE schema_name = {identifier}) EXEC({sql_literal})"""
1254
+ return f"""IF NOT EXISTS (SELECT * FROM INFORMATION_SCHEMA.SCHEMATA WHERE SCHEMA_NAME = {identifier}) EXEC({sql_literal})"""
1253
1255
  elif kind == "TABLE":
1254
1256
  assert table
1255
1257
  where = exp.and_(
1256
- exp.column("table_name").eq(table.name),
1257
- exp.column("table_schema").eq(table.db) if table.db else None,
1258
- exp.column("table_catalog").eq(table.catalog) if table.catalog else None,
1258
+ exp.column("TABLE_NAME").eq(table.name),
1259
+ exp.column("TABLE_SCHEMA").eq(table.db) if table.db else None,
1260
+ exp.column("TABLE_CATALOG").eq(table.catalog) if table.catalog else None,
1259
1261
  )
1260
- return f"""IF NOT EXISTS (SELECT * FROM information_schema.tables WHERE {where}) EXEC({sql_literal})"""
1262
+ return f"""IF NOT EXISTS (SELECT * FROM INFORMATION_SCHEMA.TABLES WHERE {where}) EXEC({sql_literal})"""
1261
1263
  elif kind == "INDEX":
1262
1264
  index = self.sql(exp.Literal.string(expression.this.text("this")))
1263
1265
  return f"""IF NOT EXISTS (SELECT * FROM sys.indexes WHERE object_id = object_id({identifier}) AND name = {index}) EXEC({sql_literal})"""
sqlglot/expressions.py CHANGED
@@ -5569,6 +5569,22 @@ class ArrayFilter(Func):
5569
5569
  _sql_names = ["FILTER", "ARRAY_FILTER"]
5570
5570
 
5571
5571
 
5572
+ class ArrayFirst(Func):
5573
+ pass
5574
+
5575
+
5576
+ class ArrayLast(Func):
5577
+ pass
5578
+
5579
+
5580
+ class ArrayReverse(Func):
5581
+ pass
5582
+
5583
+
5584
+ class ArraySlice(Func):
5585
+ arg_types = {"this": True, "start": True, "end": False, "step": False}
5586
+
5587
+
5572
5588
  class ArrayToString(Func):
5573
5589
  arg_types = {"this": True, "expression": True, "null": False}
5574
5590
  _sql_names = ["ARRAY_TO_STRING", "ARRAY_JOIN"]
@@ -5806,6 +5822,10 @@ class CurrentTimestamp(Func):
5806
5822
  arg_types = {"this": False, "sysdate": False}
5807
5823
 
5808
5824
 
5825
+ class CurrentTimestampLTZ(Func):
5826
+ arg_types = {}
5827
+
5828
+
5809
5829
  class CurrentSchema(Func):
5810
5830
  arg_types = {"this": False}
5811
5831
 
@@ -5846,8 +5866,6 @@ class DateTrunc(Func):
5846
5866
  unit_name = TimeUnit.UNABBREVIATED_UNIT_NAME[unit_name]
5847
5867
 
5848
5868
  args["unit"] = Literal.string(unit_name)
5849
- elif isinstance(unit, Week):
5850
- unit.set("this", Literal.string(unit.this.name.upper()))
5851
5869
 
5852
5870
  super().__init__(**args)
5853
5871
 
@@ -6669,6 +6687,11 @@ class Repeat(Func):
6669
6687
  arg_types = {"this": True, "times": True}
6670
6688
 
6671
6689
 
6690
+ # Some dialects like Snowflake support two argument replace
6691
+ class Replace(Func):
6692
+ arg_types = {"this": True, "expression": True, "replacement": False}
6693
+
6694
+
6672
6695
  # https://learn.microsoft.com/en-us/sql/t-sql/functions/round-transact-sql?view=sql-server-ver16
6673
6696
  # tsql third argument function == trunctaion if not 0
6674
6697
  class Round(Func):
@@ -6716,6 +6739,17 @@ class Substring(Func):
6716
6739
  arg_types = {"this": True, "start": False, "length": False}
6717
6740
 
6718
6741
 
6742
+ class SubstringIndex(Func):
6743
+ """
6744
+ SUBSTRING_INDEX(str, delim, count)
6745
+
6746
+ *count* > 0 → left slice before the *count*-th delimiter
6747
+ *count* < 0 → right slice after the |count|-th delimiter
6748
+ """
6749
+
6750
+ arg_types = {"this": True, "delimiter": True, "count": True}
6751
+
6752
+
6719
6753
  class StandardHash(Func):
6720
6754
  arg_types = {"this": True, "expression": False}
6721
6755
 
@@ -6772,6 +6806,14 @@ class FromBase(Func):
6772
6806
  arg_types = {"this": True, "expression": True}
6773
6807
 
6774
6808
 
6809
+ class Space(Func):
6810
+ """
6811
+ SPACE(n) → string consisting of n blank characters
6812
+ """
6813
+
6814
+ pass
6815
+
6816
+
6775
6817
  class Struct(Func):
6776
6818
  arg_types = {"expressions": False}
6777
6819
  is_var_len_args = True
sqlglot/generator.py CHANGED
@@ -3480,7 +3480,7 @@ class Generator(metaclass=_Generator):
3480
3480
 
3481
3481
  actions_list.append(action_sql)
3482
3482
 
3483
- actions_sql = self.format_args(*actions_list)
3483
+ actions_sql = self.format_args(*actions_list).lstrip("\n")
3484
3484
 
3485
3485
  exists = " IF EXISTS" if expression.args.get("exists") else ""
3486
3486
  on_cluster = self.sql(expression, "cluster")
@@ -3491,7 +3491,7 @@ class Generator(metaclass=_Generator):
3491
3491
  kind = self.sql(expression, "kind")
3492
3492
  not_valid = " NOT VALID" if expression.args.get("not_valid") else ""
3493
3493
 
3494
- return f"ALTER {kind}{exists}{only} {self.sql(expression, 'this')}{on_cluster} {actions_sql}{not_valid}{options}"
3494
+ return f"ALTER {kind}{exists}{only} {self.sql(expression, 'this')}{on_cluster}{self.sep()}{actions_sql}{not_valid}{options}"
3495
3495
 
3496
3496
  def add_column_sql(self, expression: exp.Expression) -> str:
3497
3497
  sql = self.sql(expression)
@@ -3510,7 +3510,7 @@ class Generator(metaclass=_Generator):
3510
3510
  return f"DROP{exists}{expressions}"
3511
3511
 
3512
3512
  def addconstraint_sql(self, expression: exp.AddConstraint) -> str:
3513
- return f"ADD {self.expressions(expression)}"
3513
+ return f"ADD {self.expressions(expression, indent=False)}"
3514
3514
 
3515
3515
  def addpartition_sql(self, expression: exp.AddPartition) -> str:
3516
3516
  exists = "IF NOT EXISTS " if expression.args.get("exists") else ""
sqlglot/jsonpath.py CHANGED
@@ -41,7 +41,7 @@ def parse(path: str, dialect: DialectType = None) -> exp.JSONPath:
41
41
  """Takes in a JSON path string and parses it into a JSONPath expression."""
42
42
  from sqlglot.dialects import Dialect
43
43
 
44
- jsonpath_tokenizer = Dialect.get_or_raise(dialect).jsonpath_tokenizer
44
+ jsonpath_tokenizer = Dialect.get_or_raise(dialect).jsonpath_tokenizer()
45
45
  tokens = jsonpath_tokenizer.tokenize(path)
46
46
  size = len(tokens)
47
47
 
@@ -329,6 +329,7 @@ class TypeAnnotator(metaclass=_TypeAnnotator):
329
329
  ],
330
330
  nested=True,
331
331
  )
332
+
332
333
  if not any(
333
334
  cd.kind.is_type(exp.DataType.Type.UNKNOWN)
334
335
  for cd in struct_type.expressions
@@ -630,3 +631,15 @@ class TypeAnnotator(metaclass=_TypeAnnotator):
630
631
  else:
631
632
  self._set_type(expression, exp.DataType.Type.INT)
632
633
  return expression
634
+
635
+ def _annotate_by_array_element(self, expression: exp.Expression) -> exp.Expression:
636
+ self._annotate_args(expression)
637
+
638
+ array_arg = expression.this
639
+ if array_arg.type.is_type(exp.DataType.Type.ARRAY):
640
+ element_type = seq_get(array_arg.type.expressions, 0) or exp.DataType.Type.UNKNOWN
641
+ self._set_type(expression, element_type)
642
+ else:
643
+ self._set_type(expression, exp.DataType.Type.UNKNOWN)
644
+
645
+ return expression
@@ -21,12 +21,13 @@ def pushdown_predicates(expression, dialect=None):
21
21
  Returns:
22
22
  sqlglot.Expression: optimized expression
23
23
  """
24
+ from sqlglot.dialects.athena import Athena
24
25
  from sqlglot.dialects.presto import Presto
25
26
 
26
27
  root = build_scope(expression)
27
28
 
28
29
  dialect = Dialect.get_or_raise(dialect)
29
- unnest_requires_cross_join = isinstance(dialect, Presto)
30
+ unnest_requires_cross_join = isinstance(dialect, (Athena, Presto))
30
31
 
31
32
  if root:
32
33
  scope_ref_count = root.ref_count()
@@ -358,7 +358,7 @@ class Scope:
358
358
  for expression in itertools.chain(self.derived_tables, self.udtfs):
359
359
  self._references.append(
360
360
  (
361
- expression.alias,
361
+ _get_source_alias(expression),
362
362
  expression if expression.args.get("pivots") else expression.unnest(),
363
363
  )
364
364
  )
@@ -785,7 +785,7 @@ def _traverse_tables(scope):
785
785
  # This shouldn't be a problem once qualify_columns runs, as it adds aliases on everything.
786
786
  # Until then, this means that only a single, unaliased derived table is allowed (rather,
787
787
  # the latest one wins.
788
- sources[expression.alias] = child_scope
788
+ sources[_get_source_alias(expression)] = child_scope
789
789
 
790
790
  # append the final child_scope yielded
791
791
  if child_scope:
@@ -825,7 +825,7 @@ def _traverse_udtfs(scope):
825
825
  ):
826
826
  yield child_scope
827
827
  top = child_scope
828
- sources[expression.alias] = child_scope
828
+ sources[_get_source_alias(expression)] = child_scope
829
829
 
830
830
  scope.subquery_scopes.append(top)
831
831
 
@@ -915,3 +915,13 @@ def find_in_scope(expression, expression_types, bfs=True):
915
915
  the criteria was found.
916
916
  """
917
917
  return next(find_all_in_scope(expression, expression_types, bfs=bfs), None)
918
+
919
+
920
+ def _get_source_alias(expression):
921
+ alias_arg = expression.args.get("alias")
922
+ alias_name = expression.alias
923
+
924
+ if not alias_name and isinstance(alias_arg, exp.TableAlias) and len(alias_arg.columns) == 1:
925
+ alias_name = alias_arg.columns[0].name
926
+
927
+ return alias_name
sqlglot/parser.py CHANGED
@@ -1895,7 +1895,7 @@ class Parser(metaclass=_Parser):
1895
1895
  stmt.add_comments(comments, prepend=True)
1896
1896
  return stmt
1897
1897
 
1898
- if self._match_set(self.dialect.tokenizer.COMMANDS):
1898
+ if self._match_set(self.dialect.tokenizer_class.COMMANDS):
1899
1899
  return self._parse_command()
1900
1900
 
1901
1901
  expression = self._parse_expression()
@@ -7362,8 +7362,9 @@ class Parser(metaclass=_Parser):
7362
7362
 
7363
7363
  return None
7364
7364
 
7365
- if not self.dialect.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN or self._match_text_seq(
7366
- "COLUMNS"
7365
+ if not self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False) and (
7366
+ not self.dialect.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN
7367
+ or self._match_text_seq("COLUMNS")
7367
7368
  ):
7368
7369
  schema = self._parse_schema()
7369
7370