sqlglotc 28.10.1.dev130__tar.gz → 29.0.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,9 +1,9 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: sqlglotc
3
- Version: 28.10.1.dev130
3
+ Version: 29.0.1
4
4
  Summary: mypyc-compiled extensions for sqlglot
5
5
  Author-email: Toby Mao <toby.mao@gmail.com>
6
- License: MIT
6
+ License-Expression: MIT
7
7
  Project-URL: Homepage, https://sqlglot.com/
8
8
  Project-URL: Repository, https://github.com/tobymao/sqlglot
9
9
  Requires-Python: >=3.9
@@ -3,7 +3,7 @@ name = "sqlglotc"
3
3
  dynamic = ["version"]
4
4
  description = "mypyc-compiled extensions for sqlglot"
5
5
  authors = [{ name = "Toby Mao", email = "toby.mao@gmail.com" }]
6
- license = {text = "MIT"}
6
+ license = "MIT"
7
7
  requires-python = ">= 3.9"
8
8
 
9
9
  [project.urls]
@@ -25,3 +25,7 @@ local_scheme = "no-local-version"
25
25
  [tool.mypy]
26
26
  # Allow mypyc to resolve sqlglot.* from the repo root (../sqlglot/) or sdist root (./sqlglot/).
27
27
  mypy_path = [".", ".."]
28
+
29
+ [[tool.mypy.overrides]]
30
+ module = "sqlglot._version"
31
+ ignore_missing_imports = true
@@ -10,7 +10,17 @@ sqlglot_src = os.path.join(here, "..", "sqlglot")
10
10
 
11
11
  from mypyc.build import mypycify
12
12
 
13
- SOURCE_FILES = ["expression_core.py", "helper.py", "trie.py", "tokenizer_core.py"]
13
+ SOURCE_FILES = [
14
+ "errors.py",
15
+ "expression_core.py",
16
+ "helper.py",
17
+ "parser_core.py",
18
+ "schema.py",
19
+ "serde.py",
20
+ "time.py",
21
+ "tokenizer_core.py",
22
+ "trie.py",
23
+ ]
14
24
 
15
25
 
16
26
  def _source_paths():
@@ -0,0 +1,162 @@
1
+ from __future__ import annotations
2
+
3
+ import typing as t
4
+ from enum import auto
5
+
6
+ from sqlglot.helper import AutoName
7
+
8
+
9
+ # ANSI escape codes for error formatting
10
+ ANSI_UNDERLINE = "\033[4m"
11
+ ANSI_RESET = "\033[0m"
12
+ ERROR_MESSAGE_CONTEXT_DEFAULT = 100
13
+
14
+
15
+ class ErrorLevel(AutoName):
16
+ IGNORE = auto()
17
+ """Ignore all errors."""
18
+
19
+ WARN = auto()
20
+ """Log all errors."""
21
+
22
+ RAISE = auto()
23
+ """Collect all errors and raise a single exception."""
24
+
25
+ IMMEDIATE = auto()
26
+ """Immediately raise an exception on the first error found."""
27
+
28
+
29
+ class SqlglotError(Exception):
30
+ pass
31
+
32
+
33
+ class UnsupportedError(SqlglotError):
34
+ pass
35
+
36
+
37
+ class ParseError(SqlglotError):
38
+ def __init__(
39
+ self,
40
+ message: str,
41
+ errors: t.Optional[t.List[t.Dict[str, t.Any]]] = None,
42
+ ):
43
+ super().__init__(message)
44
+ self.errors = errors or []
45
+
46
+ @classmethod
47
+ def new(
48
+ cls,
49
+ message: str,
50
+ description: t.Optional[str] = None,
51
+ line: t.Optional[int] = None,
52
+ col: t.Optional[int] = None,
53
+ start_context: t.Optional[str] = None,
54
+ highlight: t.Optional[str] = None,
55
+ end_context: t.Optional[str] = None,
56
+ into_expression: t.Optional[str] = None,
57
+ ) -> ParseError:
58
+ return cls(
59
+ message,
60
+ [
61
+ {
62
+ "description": description,
63
+ "line": line,
64
+ "col": col,
65
+ "start_context": start_context,
66
+ "highlight": highlight,
67
+ "end_context": end_context,
68
+ "into_expression": into_expression,
69
+ }
70
+ ],
71
+ )
72
+
73
+
74
+ class TokenError(SqlglotError):
75
+ pass
76
+
77
+
78
+ class OptimizeError(SqlglotError):
79
+ pass
80
+
81
+
82
+ class SchemaError(SqlglotError):
83
+ pass
84
+
85
+
86
+ class ExecuteError(SqlglotError):
87
+ pass
88
+
89
+
90
+ def highlight_sql(
91
+ sql: str,
92
+ positions: t.List[t.Tuple[int, int]],
93
+ context_length: int = ERROR_MESSAGE_CONTEXT_DEFAULT,
94
+ ) -> t.Tuple[str, str, str, str]:
95
+ """
96
+ Highlight a SQL string using ANSI codes at the given positions.
97
+
98
+ Args:
99
+ sql: The complete SQL string.
100
+ positions: List of (start, end) tuples where both start and end are inclusive 0-based
101
+ indexes. For example, to highlight "foo" in "SELECT foo", use (7, 9).
102
+ The positions will be sorted and de-duplicated if they overlap.
103
+ context_length: Number of characters to show before the first highlight and after
104
+ the last highlight.
105
+
106
+ Returns:
107
+ A tuple of (formatted_sql, start_context, highlight, end_context) where:
108
+ - formatted_sql: The SQL with ANSI underline codes applied to highlighted sections
109
+ - start_context: Plain text before the first highlight
110
+ - highlight: Plain text from the first highlight start to the last highlight end,
111
+ including any non-highlighted text in between (no ANSI)
112
+ - end_context: Plain text after the last highlight
113
+
114
+ Note:
115
+ If positions is empty, raises a ValueError.
116
+ """
117
+ if not positions:
118
+ raise ValueError("positions must contain at least one (start, end) tuple")
119
+
120
+ start_context = ""
121
+ end_context = ""
122
+ first_highlight_start = 0
123
+ formatted_parts = []
124
+ previous_part_end = 0
125
+ sorted_positions = sorted(positions, key=lambda pos: pos[0])
126
+
127
+ if sorted_positions[0][0] > 0:
128
+ first_highlight_start = sorted_positions[0][0]
129
+ start_context = sql[max(0, first_highlight_start - context_length) : first_highlight_start]
130
+ formatted_parts.append(start_context)
131
+ previous_part_end = first_highlight_start
132
+
133
+ for start, end in sorted_positions:
134
+ highlight_start = max(start, previous_part_end)
135
+ highlight_end = end + 1
136
+ if highlight_start >= highlight_end:
137
+ continue # Skip invalid or overlapping highlights
138
+ if highlight_start > previous_part_end:
139
+ formatted_parts.append(sql[previous_part_end:highlight_start])
140
+ formatted_parts.append(f"{ANSI_UNDERLINE}{sql[highlight_start:highlight_end]}{ANSI_RESET}")
141
+ previous_part_end = highlight_end
142
+
143
+ if previous_part_end < len(sql):
144
+ end_context = sql[previous_part_end : previous_part_end + context_length]
145
+ formatted_parts.append(end_context)
146
+
147
+ formatted_sql = "".join(formatted_parts)
148
+ highlight = sql[first_highlight_start:previous_part_end]
149
+
150
+ return formatted_sql, start_context, highlight, end_context
151
+
152
+
153
+ def concat_messages(errors: t.Sequence[t.Any], maximum: int) -> str:
154
+ msg = [str(e) for e in errors[:maximum]]
155
+ remaining = len(errors) - maximum
156
+ if remaining > 0:
157
+ msg.append(f"... and {remaining} more")
158
+ return "\n\n".join(msg)
159
+
160
+
161
+ def merge_errors(errors: t.Sequence[ParseError]) -> t.List[t.Dict[str, t.Any]]:
162
+ return [e_dict for error in errors for e_dict in error.errors]
@@ -5,15 +5,8 @@ import typing as t
5
5
  from collections import deque
6
6
  from copy import deepcopy
7
7
 
8
- try:
9
- from mypy_extensions import mypyc_attr
10
- except ImportError:
11
-
12
- def mypyc_attr(*attrs: str, **kwattrs: object) -> t.Callable[[t.Any], t.Any]: # type: ignore[misc]
13
- return lambda f: f
14
-
15
-
16
- from sqlglot.helper import to_bool
8
+ from sqlglot.helper import mypyc_attr, to_bool
9
+ from sqlglot.tokenizer_core import Token
17
10
 
18
11
 
19
12
  EC = t.TypeVar("EC", bound="ExpressionCore")
@@ -44,20 +37,20 @@ class ExpressionCore:
44
37
  is_func: t.ClassVar[bool] = False
45
38
  _hash_raw_args: t.ClassVar[bool] = False
46
39
 
47
- def __init__(self, **args: t.Any) -> None:
40
+ def __init__(self, **args: object) -> None:
48
41
  self.args: t.Dict[str, t.Any] = args
49
42
  self.parent: t.Optional[ExpressionCore] = None
50
43
  self.arg_key: t.Optional[str] = None
51
44
  self.index: t.Optional[int] = None
52
45
  self.comments: t.Optional[t.List[str]] = None
53
- self._type: t.Optional[t.Any] = None
46
+ self._type: t.Optional[ExpressionCore] = None
54
47
  self._meta: t.Optional[t.Dict[str, t.Any]] = None
55
48
  self._hash: t.Optional[int] = None
56
49
 
57
50
  for arg_key, value in self.args.items():
58
51
  self._set_parent(arg_key, value)
59
52
 
60
- def _set_parent(self, arg_key: str, value: t.Any, index: t.Optional[int] = None) -> None:
53
+ def _set_parent(self, arg_key: str, value: object, index: t.Optional[int] = None) -> None:
61
54
  if isinstance(value, ExpressionCore):
62
55
  value.parent = self
63
56
  value.arg_key = arg_key
@@ -137,11 +130,11 @@ class ExpressionCore:
137
130
  return self.parent.depth + 1
138
131
  return 0
139
132
 
140
- def find_ancestor(self, *expression_types: t.Any) -> t.Optional[t.Any]:
133
+ def find_ancestor(self, *expression_types: t.Type[EC]) -> t.Optional[EC]:
141
134
  ancestor = self.parent
142
135
  while ancestor and not isinstance(ancestor, expression_types):
143
136
  ancestor = ancestor.parent
144
- return ancestor
137
+ return ancestor # type: ignore[return-value]
145
138
 
146
139
  @property
147
140
  def same_parent(self) -> bool:
@@ -217,7 +210,7 @@ class ExpressionCore:
217
210
 
218
211
  def update_positions(
219
212
  self: EC,
220
- other: t.Optional[t.Any] = None,
213
+ other: t.Optional[ExpressionCore | Token] = None,
221
214
  line: t.Optional[int] = None,
222
215
  col: t.Optional[int] = None,
223
216
  start: t.Optional[int] = None,
@@ -297,14 +290,14 @@ class ExpressionCore:
297
290
  copy._hash = node._hash
298
291
 
299
292
  for k, vs in node.args.items():
300
- if hasattr(vs, "parent"):
293
+ if isinstance(vs, ExpressionCore):
301
294
  stack.append((vs, vs.__class__()))
302
295
  copy.set(k, stack[-1][-1])
303
296
  elif type(vs) is list:
304
297
  copy.args[k] = []
305
298
 
306
299
  for v in vs:
307
- if hasattr(v, "parent"):
300
+ if isinstance(v, ExpressionCore):
308
301
  stack.append((v, v.__class__()))
309
302
  copy.append(k, stack[-1][-1])
310
303
  else:
@@ -327,8 +320,7 @@ class ExpressionCore:
327
320
  if meta:
328
321
  for kv in "".join(meta).split(","):
329
322
  k, *v = kv.split("=")
330
- value: t.Any = v[0].strip() if v else True
331
- self.meta[k.strip()] = to_bool(value)
323
+ self.meta[k.strip()] = to_bool(v[0].strip() if v else True)
332
324
 
333
325
  if not prepend:
334
326
  self.comments.append(comment)
@@ -339,7 +331,7 @@ class ExpressionCore:
339
331
  def set(
340
332
  self,
341
333
  arg_key: str,
342
- value: t.Any,
334
+ value: object,
343
335
  index: t.Optional[int] = None,
344
336
  overwrite: bool = True,
345
337
  ) -> None:
@@ -380,10 +372,10 @@ class ExpressionCore:
380
372
  self.args[arg_key] = value
381
373
  self._set_parent(arg_key, value, index)
382
374
 
383
- def find(self, *expression_types: t.Any, bfs: bool = True) -> t.Optional[t.Any]:
375
+ def find(self, *expression_types: t.Type[EC], bfs: bool = True) -> t.Optional[EC]:
384
376
  return next(self.find_all(*expression_types, bfs=bfs), None)
385
377
 
386
- def find_all(self, *expression_types: t.Any, bfs: bool = True) -> t.Iterator[t.Any]:
378
+ def find_all(self, *expression_types: t.Type[EC], bfs: bool = True) -> t.Iterator[EC]:
387
379
  for expression in self.walk(bfs=bfs):
388
380
  if isinstance(expression, expression_types):
389
381
  yield expression
@@ -427,14 +419,16 @@ class ExpressionCore:
427
419
  self.replace(None)
428
420
  return self
429
421
 
430
- def assert_is(self, type_: t.Any) -> t.Any:
422
+ def assert_is(self, type_: t.Type[EC]) -> EC:
431
423
  if not isinstance(self, type_):
432
424
  raise AssertionError(f"{self} is not {type_}.")
433
425
  return self
434
426
 
435
- def transform(self, fun: t.Callable, *args: t.Any, copy: bool = True, **kwargs: t.Any) -> t.Any:
436
- root: t.Optional[t.Any] = None
437
- new_node: t.Optional[t.Any] = None
427
+ def transform(
428
+ self, fun: t.Callable, *args: object, copy: bool = True, **kwargs: object
429
+ ) -> t.Any:
430
+ root: t.Any = None
431
+ new_node: t.Any = None
438
432
 
439
433
  for node in (self.copy() if copy else self).dfs(prune=lambda n: n is not new_node):
440
434
  parent, arg_key, index = node.parent, node.arg_key, node.index
@@ -12,6 +12,17 @@ from difflib import get_close_matches
12
12
  from enum import Enum
13
13
  from itertools import count
14
14
 
15
+ try:
16
+ from mypy_extensions import mypyc_attr, trait
17
+ except ImportError:
18
+
19
+ def mypyc_attr(*attrs: str, **kwattrs: object) -> t.Callable[[t.Any], t.Any]: # type: ignore[misc]
20
+ return lambda f: f
21
+
22
+ def trait(f: t.Any) -> t.Any: # type: ignore[misc]
23
+ return f
24
+
25
+
15
26
  T = t.TypeVar("T")
16
27
  E = t.TypeVar("E")
17
28
 
@@ -0,0 +1,190 @@
1
+ from __future__ import annotations
2
+
3
+ import typing as t
4
+
5
+ from sqlglot.errors import ErrorLevel, ParseError, highlight_sql
6
+ from sqlglot.tokenizer_core import Token, TokenType
7
+
8
+
9
+ class ParserCore:
10
+ __slots__ = (
11
+ "error_level",
12
+ "error_message_context",
13
+ "max_errors",
14
+ "dialect",
15
+ "sql",
16
+ "errors",
17
+ "_tokens",
18
+ "_index",
19
+ "_curr",
20
+ "_next",
21
+ "_prev",
22
+ "_prev_comments",
23
+ "_pipe_cte_counter",
24
+ "_chunks",
25
+ "_chunk_index",
26
+ )
27
+
28
+ def __init__(
29
+ self,
30
+ error_level: ErrorLevel,
31
+ error_message_context: int,
32
+ max_errors: int,
33
+ dialect: t.Any,
34
+ ) -> None:
35
+ self.error_level: ErrorLevel = error_level
36
+ self.error_message_context = error_message_context
37
+ self.max_errors = max_errors
38
+ self.dialect: t.Any = dialect
39
+ self.reset()
40
+
41
+ def reset(self) -> None:
42
+ self.sql: str = ""
43
+ self.errors: t.List[ParseError] = []
44
+ self._tokens: t.List[Token] = []
45
+ self._index: int = 0
46
+ self._curr: t.Optional[Token] = None
47
+ self._next: t.Optional[Token] = None
48
+ self._prev: t.Optional[Token] = None
49
+ self._prev_comments: t.Optional[t.List[str]] = None
50
+ self._pipe_cte_counter: int = 0
51
+ self._chunks: t.List[t.List[Token]] = []
52
+ self._chunk_index: int = 0
53
+
54
+ def _advance(self, times: int = 1) -> None:
55
+ index = self._index + times
56
+ self._index = index
57
+ tokens = self._tokens
58
+ size = len(tokens)
59
+ self._curr = tokens[index] if index < size else None
60
+ self._next = tokens[index + 1] if index + 1 < size else None
61
+
62
+ if index > 0:
63
+ prev = tokens[index - 1]
64
+ self._prev = prev
65
+ self._prev_comments = prev.comments
66
+ else:
67
+ self._prev = None
68
+ self._prev_comments = None
69
+
70
+ def _advance_chunk(self) -> None:
71
+ self._index = -1
72
+ self._tokens = self._chunks[self._chunk_index]
73
+ self._chunk_index += 1
74
+ self._advance()
75
+
76
+ def _retreat(self, index: int) -> None:
77
+ if index != self._index:
78
+ self._advance(index - self._index)
79
+
80
+ def _add_comments(self, expression: t.Any) -> None:
81
+ if expression and self._prev_comments:
82
+ expression.add_comments(self._prev_comments)
83
+ self._prev_comments = None
84
+
85
+ def _match(self, token_type: TokenType, advance: bool = True, expression: t.Any = None) -> bool:
86
+ curr = self._curr
87
+ if curr and curr.token_type == token_type:
88
+ if advance:
89
+ self._advance()
90
+ self._add_comments(expression)
91
+ return True
92
+ return False
93
+
94
+ def _match_set(self, types: t.Any, advance: bool = True) -> bool:
95
+ curr = self._curr
96
+ if curr and curr.token_type in types:
97
+ if advance:
98
+ self._advance()
99
+ return True
100
+ return False
101
+
102
+ def _match_pair(
103
+ self, token_type_a: TokenType, token_type_b: TokenType, advance: bool = True
104
+ ) -> bool:
105
+ curr = self._curr
106
+ next_ = self._next
107
+ if curr and next_ and curr.token_type == token_type_a and next_.token_type == token_type_b:
108
+ if advance:
109
+ self._advance(2)
110
+ return True
111
+ return False
112
+
113
+ def _match_texts(self, texts: t.Any, advance: bool = True) -> bool:
114
+ curr = self._curr
115
+ if curr and curr.token_type != TokenType.STRING and curr.text.upper() in texts:
116
+ if advance:
117
+ self._advance()
118
+ return True
119
+ return False
120
+
121
+ def _match_text_seq(self, *texts: str, advance: bool = True) -> bool:
122
+ index = self._index
123
+ string_type = TokenType.STRING
124
+ for text in texts:
125
+ curr = self._curr
126
+ if curr and curr.token_type != string_type and curr.text.upper() == text:
127
+ self._advance()
128
+ else:
129
+ self._retreat(index)
130
+ return False
131
+
132
+ if not advance:
133
+ self._retreat(index)
134
+
135
+ return True
136
+
137
+ def _is_connected(self) -> bool:
138
+ prev = self._prev
139
+ curr = self._curr
140
+ return bool(prev and curr and prev.end + 1 == curr.start)
141
+
142
+ def _find_sql(self, start: Token, end: Token) -> str:
143
+ return self.sql[start.start : end.end + 1]
144
+
145
+ def raise_error(self, message: str, token: t.Optional[Token] = None) -> None:
146
+ token = token or self._curr or self._prev or Token.string("")
147
+ formatted_sql, start_context, highlight, end_context = highlight_sql(
148
+ sql=self.sql,
149
+ positions=[(token.start, token.end)],
150
+ context_length=self.error_message_context,
151
+ )
152
+ formatted_message = f"{message}. Line {token.line}, Col: {token.col}.\n {formatted_sql}"
153
+
154
+ error = ParseError.new(
155
+ formatted_message,
156
+ description=message,
157
+ line=token.line,
158
+ col=token.col,
159
+ start_context=start_context,
160
+ highlight=highlight,
161
+ end_context=end_context,
162
+ )
163
+
164
+ if self.error_level == ErrorLevel.IMMEDIATE:
165
+ raise error
166
+
167
+ self.errors.append(error)
168
+
169
+ def validate_expression(self, expression: t.Any, args: t.Optional[t.List] = None) -> t.Any:
170
+ if self.error_level != ErrorLevel.IGNORE:
171
+ for error_message in expression.error_messages(args):
172
+ self.raise_error(error_message)
173
+ return expression
174
+
175
+ def _try_parse(self, parse_method: t.Callable, retreat: bool = False) -> t.Optional[t.Any]:
176
+ index = self._index
177
+ error_level = self.error_level
178
+ this: t.Optional[t.Any] = None
179
+
180
+ self.error_level = ErrorLevel.IMMEDIATE
181
+ try:
182
+ this = parse_method()
183
+ except ParseError:
184
+ this = None
185
+ finally:
186
+ if not this or retreat:
187
+ self._retreat(index)
188
+ self.error_level = error_level
189
+
190
+ return this