sqlym 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,117 @@
1
+ """SQLコメント内パラメータの字句解析."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import re
6
+ from dataclasses import dataclass
7
+
8
+ # パラメータパターン
9
+ # /* $name */'default' : 削除可能
10
+ # /* name */'default' : 削除不可
11
+ PARAM_PATTERN = re.compile(
12
+ r"/\*\s*(\$)?(\w+)\s*\*/\s*"
13
+ r"("
14
+ r"'(?:''|[^'])*'" # 'string' (SQL escape: '')
15
+ r'|"(?:\"\"|[^"])*"' # "string" (SQL escape: "")
16
+ r"|\d+(?:\.\d+)?" # number
17
+ r"|\w+" # identifier
18
+ r"|\([^)]*\)" # (list)
19
+ r"|NULL" # NULL
20
+ r")?"
21
+ )
22
+
23
+ # IN句パターン
24
+ IN_PATTERN = re.compile(
25
+ r"\bIN\s*/\*\s*(\$)?(\w+)\s*\*/\s*\([^)]*\)",
26
+ re.IGNORECASE,
27
+ )
28
+
29
+
30
+ @dataclass(frozen=True)
31
+ class Token:
32
+ """パラメータトークン."""
33
+
34
+ name: str
35
+ """パラメータ名."""
36
+
37
+ removable: bool
38
+ """$付き(Noneで行削除)."""
39
+
40
+ default: str
41
+ """デフォルト値文字列."""
42
+
43
+ is_in_clause: bool
44
+ """IN句パラメータか."""
45
+
46
+ start: int
47
+ """元文字列内の開始位置."""
48
+
49
+ end: int
50
+ """元文字列内の終了位置."""
51
+
52
+
53
+ def tokenize(line: str) -> list[Token]:
54
+ """行からパラメータトークンを抽出する.
55
+
56
+ IN句パターンを先にマッチし、その後通常パラメータパターンを
57
+ 重複しない範囲でマッチさせる。
58
+
59
+ Args:
60
+ line: SQL行文字列
61
+
62
+ Returns:
63
+ Token のリスト(出現順)
64
+
65
+ """
66
+ tokens: list[Token] = []
67
+ used_ranges: list[tuple[int, int]] = []
68
+
69
+ # IN句パターンを先にマッチ
70
+ for m in IN_PATTERN.finditer(line):
71
+ dollar = m.group(1)
72
+ name = m.group(2)
73
+ tokens.append(
74
+ Token(
75
+ name=name,
76
+ removable=dollar is not None,
77
+ default=_extract_in_default(m.group(0)),
78
+ is_in_clause=True,
79
+ start=m.start(),
80
+ end=m.end(),
81
+ )
82
+ )
83
+ used_ranges.append((m.start(), m.end()))
84
+
85
+ # 通常パラメータパターン(IN句と重複しない範囲)
86
+ for m in PARAM_PATTERN.finditer(line):
87
+ if _overlaps(m.start(), m.end(), used_ranges):
88
+ continue
89
+ dollar = m.group(1)
90
+ name = m.group(2)
91
+ default = m.group(3) or ""
92
+ tokens.append(
93
+ Token(
94
+ name=name,
95
+ removable=dollar is not None,
96
+ default=default,
97
+ is_in_clause=False,
98
+ start=m.start(),
99
+ end=m.end(),
100
+ )
101
+ )
102
+
103
+ tokens.sort(key=lambda t: t.start)
104
+ return tokens
105
+
106
+
107
+ def _overlaps(start: int, end: int, ranges: list[tuple[int, int]]) -> bool:
108
+ """指定範囲が既存範囲と重複するか判定する."""
109
+ return any(start < r_end and end > r_start for r_start, r_end in ranges)
110
+
111
+
112
+ def _extract_in_default(matched: str) -> str:
113
+ """IN句マッチ文字列からデフォルトリスト部分を抽出する."""
114
+ paren_start = matched.rfind("(")
115
+ if paren_start >= 0:
116
+ return matched[paren_start:]
117
+ return ""
sqlym/parser/twoway.py ADDED
@@ -0,0 +1,516 @@
1
+ """Clione-SQL風 2way SQLパーサー."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import re
6
+ from dataclasses import dataclass, field
7
+ from typing import TYPE_CHECKING, Any
8
+
9
+ from sqlym import config
10
+ from sqlym.exceptions import SqlParseError
11
+ from sqlym.parser.line_unit import LineUnit
12
+ from sqlym.parser.tokenizer import tokenize
13
+
14
+ if TYPE_CHECKING:
15
+ from sqlym.dialect import Dialect
16
+
17
+
18
+ @dataclass
19
+ class ParsedSQL:
20
+ """パース結果."""
21
+
22
+ sql: str
23
+ params: list[Any] = field(default_factory=list)
24
+ """?形式用."""
25
+
26
+ named_params: dict[str, Any] = field(default_factory=dict)
27
+ """:name形式用."""
28
+
29
+
30
+ class TwoWaySQLParser:
31
+ """Clione-SQL風 2way SQLパーサー."""
32
+
33
+ def __init__(
34
+ self,
35
+ sql: str,
36
+ placeholder: str = "?",
37
+ *,
38
+ dialect: Dialect | None = None,
39
+ ) -> None:
40
+ """初期化.
41
+
42
+ Args:
43
+ sql: SQLテンプレート
44
+ placeholder: プレースホルダ形式 ("?", "%s", ":name")
45
+ dialect: RDBMS 方言。指定時は dialect.placeholder を使用する。
46
+
47
+ Raises:
48
+ ValueError: dialect と placeholder (デフォルト以外) を同時に指定した場合
49
+
50
+ """
51
+ if dialect is not None and placeholder != "?":
52
+ msg = "dialect と placeholder は同時に指定できません"
53
+ raise ValueError(msg)
54
+ self.original_sql = sql
55
+ self.dialect = dialect
56
+ self.placeholder = dialect.placeholder if dialect is not None else placeholder
57
+
58
+ def parse(self, params: dict[str, Any]) -> ParsedSQL:
59
+ """SQLをパースしてパラメータをバインド."""
60
+ units = self._parse_lines()
61
+ self._build_tree(units)
62
+ self._evaluate_params(units, params)
63
+ self._propagate_removal(units)
64
+ sql, bind_params, named_bind_params = self._rebuild_sql(units, params)
65
+ sql = self._clean_sql(sql)
66
+ if self.placeholder == ":name":
67
+ return ParsedSQL(
68
+ sql=sql,
69
+ params=[],
70
+ named_params=named_bind_params,
71
+ )
72
+ return ParsedSQL(
73
+ sql=sql,
74
+ params=bind_params,
75
+ named_params=params,
76
+ )
77
+
78
+ def _parse_lines(self) -> list[LineUnit]:
79
+ """行をパースしてLineUnitリストを作成(Rule 1)."""
80
+ units: list[LineUnit] = []
81
+ for i, line in enumerate(self.original_sql.splitlines()):
82
+ stripped = line.lstrip()
83
+ indent = len(line) - len(stripped) if stripped else -1
84
+ units.append(
85
+ LineUnit(
86
+ line_number=i + 1,
87
+ original=line,
88
+ indent=indent,
89
+ content=stripped,
90
+ )
91
+ )
92
+ return units
93
+
94
+ def _build_tree(self, units: list[LineUnit]) -> None:
95
+ """インデントに基づいて親子関係を構築(Rule 2)."""
96
+ stack: list[LineUnit] = []
97
+ for unit in units:
98
+ if unit.is_empty:
99
+ continue
100
+ # スタックから現在行と同じかより深いインデントを持つものを除去
101
+ while stack and stack[-1].indent >= unit.indent:
102
+ stack.pop()
103
+ # スタックが残っていれば、その先頭が親
104
+ if stack:
105
+ unit.parent = stack[-1]
106
+ stack[-1].children.append(unit)
107
+ stack.append(unit)
108
+
109
+ def _evaluate_params(self, units: list[LineUnit], params: dict[str, Any]) -> None:
110
+ """パラメータを評価して行の削除を決定(Rule 4).
111
+
112
+ $付き(removable)パラメータの値が None または params に存在しない場合、
113
+ その行を削除対象としてマークする。
114
+ 非 removable パラメータは None でも行を削除しない(NULL バインド)。
115
+ """
116
+ for unit in units:
117
+ if unit.is_empty or unit.removed:
118
+ continue
119
+ tokens = tokenize(unit.content)
120
+ for token in tokens:
121
+ if token.removable and params.get(token.name) is None:
122
+ unit.removed = True
123
+ break
124
+
125
+ def _propagate_removal(self, units: list[LineUnit]) -> None:
126
+ """子が全削除なら親も削除(ボトムアップ処理, Rule 3).
127
+
128
+ 逆順に走査することで、孫→子→親の順で伝播を実現する。
129
+ 子を持つ行が削除された場合、その兄弟でパラメータも子も持たない行
130
+ (閉じ括弧など)も削除対象とする。収束するまで繰り返す。
131
+ """
132
+ changed = True
133
+ while changed:
134
+ changed = False
135
+ for unit in reversed(units):
136
+ if unit.is_empty or unit.removed:
137
+ continue
138
+ if not unit.children:
139
+ # 子を持たない行: 親があり、兄弟が全て removed なら自身も削除
140
+ if unit.parent and not tokenize(unit.content):
141
+ siblings = unit.parent.children
142
+ others = [s for s in siblings if s is not unit]
143
+ if others and all(s.removed for s in others):
144
+ unit.removed = True
145
+ changed = True
146
+ continue
147
+ if all(child.removed for child in unit.children):
148
+ unit.removed = True
149
+ changed = True
150
+
151
+ def _rebuild_sql(
152
+ self, units: list[LineUnit], params: dict[str, Any]
153
+ ) -> tuple[str, list[Any], dict[str, Any]]:
154
+ """削除されていない行からSQLを再構築."""
155
+ result_lines: list[str] = []
156
+ bind_params: list[Any] = []
157
+ named_bind_params: dict[str, Any] = {}
158
+ is_named = self.placeholder == ":name"
159
+
160
+ for unit in units:
161
+ if unit.removed:
162
+ continue
163
+ if unit.is_empty:
164
+ result_lines.append(unit.original)
165
+ continue
166
+
167
+ line = unit.content
168
+ tokens = tokenize(line)
169
+ if not tokens:
170
+ # パラメータなし: インデント付きでそのまま出力
171
+ result_lines.append(unit.original)
172
+ continue
173
+
174
+ # トークンを後ろから置換(位置ずれ防止)
175
+ line_params: list[Any] = []
176
+ in_limit = self.dialect.in_clause_limit if self.dialect else None
177
+ for token in reversed(tokens):
178
+ value = params.get(token.name)
179
+ if token.is_in_clause:
180
+ if isinstance(value, list):
181
+ if in_limit and len(value) > in_limit:
182
+ # IN 句上限超過: (col IN (...) OR col IN (...)) に分割
183
+ extracted = self._extract_in_clause_column(line, token.start)
184
+ if extracted is None:
185
+ msg = self._format_error(
186
+ "in_clause_column_unresolved",
187
+ line_number=unit.line_number,
188
+ sql_line=line,
189
+ )
190
+ raise SqlParseError(msg)
191
+ col_expr, col_start = extracted
192
+ if is_named:
193
+ replacement, expanded = self._expand_in_clause_split_named(
194
+ token.name,
195
+ value,
196
+ in_limit,
197
+ col_expr,
198
+ )
199
+ line = line[:col_start] + replacement + line[token.end :]
200
+ named_bind_params.update(expanded)
201
+ else:
202
+ replacement, expanded = self._expand_in_clause_split(
203
+ value,
204
+ in_limit,
205
+ col_expr,
206
+ )
207
+ line = line[:col_start] + replacement + line[token.end :]
208
+ for v in reversed(expanded):
209
+ line_params.insert(0, v)
210
+ elif is_named:
211
+ replacement, expanded = self._expand_in_clause_named(token.name, value)
212
+ line = line[: token.start] + replacement + line[token.end :]
213
+ named_bind_params.update(expanded)
214
+ else:
215
+ replacement, expanded = self._expand_in_clause(value)
216
+ line = line[: token.start] + replacement + line[token.end :]
217
+ for v in reversed(expanded):
218
+ line_params.insert(0, v)
219
+ else:
220
+ # リストでない値(None等)は単一要素として IN (:name) に展開
221
+ placeholder = f":{token.name}" if is_named else self.placeholder
222
+ line = line[: token.start] + f"IN ({placeholder})" + line[token.end :]
223
+ if is_named:
224
+ named_bind_params[token.name] = value
225
+ else:
226
+ line_params.insert(0, value)
227
+ else:
228
+ placeholder = f":{token.name}" if is_named else self.placeholder
229
+ line = line[: token.start] + placeholder + line[token.end :]
230
+ if is_named:
231
+ named_bind_params[token.name] = value
232
+ else:
233
+ line_params.insert(0, value)
234
+ bind_params.extend(line_params)
235
+
236
+ # 元のインデントを復元
237
+ indent_str = " " * unit.indent
238
+ result_lines.append(indent_str + line)
239
+
240
+ return "\n".join(result_lines), bind_params, named_bind_params
241
+
242
+ def _expand_in_clause(self, values: list[Any]) -> tuple[str, list[Any]]:
243
+ """IN句のリストをプレースホルダに展開する.
244
+
245
+ Args:
246
+ values: バインドする値のリスト
247
+
248
+ Returns:
249
+ (置換文字列, バインドパラメータリスト) のタプル
250
+
251
+ """
252
+ if not values:
253
+ return "IN (NULL)", []
254
+ placeholders = ", ".join([self.placeholder] * len(values))
255
+ return f"IN ({placeholders})", list(values)
256
+
257
+ def _expand_in_clause_named(self, name: str, values: list[Any]) -> tuple[str, dict[str, Any]]:
258
+ """IN句のリストを名前付きプレースホルダに展開する.
259
+
260
+ Args:
261
+ name: パラメータ名
262
+ values: バインドする値のリスト
263
+
264
+ Returns:
265
+ (置換文字列, 名前付きバインドパラメータ辞書) のタプル
266
+
267
+ """
268
+ if not values:
269
+ return "IN (NULL)", {}
270
+ named = {f"{name}_{i}": v for i, v in enumerate(values)}
271
+ placeholders = ", ".join(f":{k}" for k in named)
272
+ return f"IN ({placeholders})", named
273
+
274
+ def _expand_in_clause_split(
275
+ self,
276
+ values: list[Any],
277
+ limit: int,
278
+ col_expr: str,
279
+ ) -> tuple[str, list[Any]]:
280
+ """IN句のリストを上限で分割してOR結合する.
281
+
282
+ Args:
283
+ values: バインドする値のリスト
284
+ limit: 1つのIN句あたりの要素数上限
285
+ col_expr: カラム式(例: "dept_id", "e.id")
286
+
287
+ Returns:
288
+ (置換文字列, バインドパラメータリスト) のタプル
289
+
290
+ """
291
+ chunks = [values[i : i + limit] for i in range(0, len(values), limit)]
292
+ parts: list[str] = []
293
+ for chunk in chunks:
294
+ phs = ", ".join([self.placeholder] * len(chunk))
295
+ parts.append(f"{col_expr} IN ({phs})")
296
+ return "(" + " OR ".join(parts) + ")", list(values)
297
+
298
+ def _expand_in_clause_split_named(
299
+ self,
300
+ name: str,
301
+ values: list[Any],
302
+ limit: int,
303
+ col_expr: str,
304
+ ) -> tuple[str, dict[str, Any]]:
305
+ """IN句のリストを上限で分割して名前付きプレースホルダでOR結合する.
306
+
307
+ Args:
308
+ name: パラメータ名
309
+ values: バインドする値のリスト
310
+ limit: 1つのIN句あたりの要素数上限
311
+ col_expr: カラム式(例: "dept_id", "e.id")
312
+
313
+ Returns:
314
+ (置換文字列, 名前付きバインドパラメータ辞書) のタプル
315
+
316
+ """
317
+ chunks = [values[i : i + limit] for i in range(0, len(values), limit)]
318
+ parts: list[str] = []
319
+ named: dict[str, Any] = {}
320
+ idx = 0
321
+ for chunk in chunks:
322
+ chunk_keys: list[str] = []
323
+ for v in chunk:
324
+ key = f"{name}_{idx}"
325
+ named[key] = v
326
+ chunk_keys.append(f":{key}")
327
+ idx += 1
328
+ parts.append(f"{col_expr} IN ({', '.join(chunk_keys)})")
329
+ return "(" + " OR ".join(parts) + ")", named
330
+
331
+ def _clean_sql(self, sql: str) -> str:
332
+ """不要なWHERE/AND/OR/空括弧を除去."""
333
+ lines = sql.split("\n")
334
+
335
+ # 1. 対応する開き括弧がない ')' だけの行を除去
336
+ paren_stack: list[int] = []
337
+ remove_indices: set[int] = set()
338
+ for i, line in enumerate(lines):
339
+ stripped = line.strip()
340
+ if stripped == ")":
341
+ if paren_stack:
342
+ paren_stack.pop()
343
+ else:
344
+ remove_indices.add(i)
345
+ elif stripped.endswith("("):
346
+ opens = stripped.count("(")
347
+ closes = stripped.count(")")
348
+ if opens > closes:
349
+ paren_stack.append(i)
350
+ lines = [line for i, line in enumerate(lines) if i not in remove_indices]
351
+ sql = "\n".join(lines)
352
+
353
+ # 2. WHERE/HAVING 直後の先頭 AND/OR を除去
354
+ sql = re.sub(
355
+ r"(\b(?:WHERE|HAVING)\b[ \t]*\n(?:[ \t]*\n)*)([ \t]+)(?:AND|OR)\b[ \t]+",
356
+ r"\1\2",
357
+ sql,
358
+ flags=re.IGNORECASE,
359
+ )
360
+
361
+ # 3. 条件のない孤立 WHERE/HAVING を除去(SQL末尾)
362
+ sql = re.sub(
363
+ r"\n?[ \t]*\b(?:WHERE|HAVING)\b[ \t]*(?:\n[ \t]*)*$",
364
+ "",
365
+ sql,
366
+ flags=re.IGNORECASE,
367
+ )
368
+
369
+ # 4. 条件のない WHERE/HAVING を除去(後続に別のSQL句がある場合)
370
+ next_clause = r"ORDER|GROUP|LIMIT|UNION|EXCEPT|INTERSECT|FETCH|OFFSET|FOR"
371
+ sql = re.sub(
372
+ rf"[ \t]*\b(?:WHERE|HAVING)\b[ \t]*\n(?=[ \t]*\b(?:{next_clause})\b)",
373
+ "",
374
+ sql,
375
+ flags=re.IGNORECASE,
376
+ )
377
+
378
+ return sql
379
+
380
+ @staticmethod
381
+ def _format_error(key: str, *, line_number: int, sql_line: str) -> str:
382
+ messages = {
383
+ "ja": {
384
+ "in_clause_column_unresolved": "IN句分割の列式を抽出できません",
385
+ },
386
+ "en": {
387
+ "in_clause_column_unresolved": (
388
+ "Failed to extract column expression for IN clause split"
389
+ ),
390
+ },
391
+ }
392
+ lang = config.ERROR_MESSAGE_LANGUAGE
393
+ base = messages.get(lang, messages["ja"]).get(key, key)
394
+ msg = f"{base}: line={line_number}"
395
+ if config.ERROR_INCLUDE_SQL:
396
+ msg = f"{msg} sql='{sql_line.strip()}'"
397
+ return msg
398
+
399
+ @staticmethod
400
+ def _extract_in_clause_column(line: str, token_start: int) -> tuple[str, int] | None:
401
+ """IN句分割用に列式を抽出する.
402
+
403
+ 末尾が識別子/引用符付き識別子/関数呼び出し/括弧式の場合に対応する。
404
+ 抽出できない場合は None を返す。
405
+ """
406
+ prefix = line[:token_start].rstrip()
407
+ if not prefix:
408
+ return None
409
+ end = len(prefix) - 1
410
+
411
+ if prefix[end] == ")":
412
+ open_idx = TwoWaySQLParser._find_matching_open_paren(prefix, end)
413
+ if open_idx is None:
414
+ return None
415
+ expr_start = open_idx
416
+ func_start = TwoWaySQLParser._parse_identifier_chain(prefix, open_idx - 1)
417
+ if func_start is not None:
418
+ expr_start = func_start
419
+ return prefix[expr_start : end + 1].strip(), expr_start
420
+
421
+ ident_start = TwoWaySQLParser._parse_identifier_chain(prefix, end)
422
+ if ident_start is None:
423
+ return None
424
+ return prefix[ident_start : end + 1].strip(), ident_start
425
+
426
+ @staticmethod
427
+ def _parse_identifier_chain(s: str, end: int) -> int | None:
428
+ """末尾の識別子/引用符付き識別子の連鎖を抽出して開始位置を返す."""
429
+ i = end
430
+ while i >= 0 and s[i].isspace():
431
+ i -= 1
432
+ if i < 0:
433
+ return None
434
+
435
+ start = TwoWaySQLParser._parse_identifier_segment(s, i)
436
+ if start is None:
437
+ return None
438
+ i = start - 1
439
+
440
+ while i >= 0:
441
+ if s[i].isspace():
442
+ return start
443
+ if s[i] != ".":
444
+ return start
445
+ i -= 1
446
+ seg_start = TwoWaySQLParser._parse_identifier_segment(s, i)
447
+ if seg_start is None:
448
+ return start
449
+ start = seg_start
450
+ i = start - 1
451
+
452
+ return start
453
+
454
+ @staticmethod
455
+ def _parse_identifier_segment(s: str, end: int) -> int | None:
456
+ """識別子セグメントを解析し開始位置を返す."""
457
+ if end < 0:
458
+ return None
459
+ if s[end] == '"':
460
+ i = end - 1
461
+ while i >= 0:
462
+ if s[i] == '"':
463
+ if i - 1 >= 0 and s[i - 1] == '"':
464
+ i -= 2
465
+ continue
466
+ return i
467
+ i -= 1
468
+ return None
469
+ if not TwoWaySQLParser._is_ident_char(s[end]):
470
+ return None
471
+ i = end
472
+ while i >= 0 and TwoWaySQLParser._is_ident_char(s[i]):
473
+ i -= 1
474
+ start = i + 1
475
+ if not s[start].isalpha() and s[start] != "_":
476
+ return None
477
+ return start
478
+
479
+ @staticmethod
480
+ def _is_ident_char(ch: str) -> bool:
481
+ return ch.isalnum() or ch in {"_", "$"}
482
+
483
+ @staticmethod
484
+ def _find_matching_open_paren(s: str, close_idx: int) -> int | None:
485
+ """close_idx に対応する '(' の位置を返す(簡易バランス)."""
486
+ depth = 0
487
+ in_single = False
488
+ in_double = False
489
+ i = close_idx
490
+ while i >= 0:
491
+ ch = s[i]
492
+ if ch == "'" and not in_double:
493
+ if i > 0 and s[i - 1] == "'":
494
+ i -= 2
495
+ continue
496
+ in_single = not in_single
497
+ i -= 1
498
+ continue
499
+ if ch == '"' and not in_single:
500
+ if i > 0 and s[i - 1] == '"':
501
+ i -= 2
502
+ continue
503
+ in_double = not in_double
504
+ i -= 1
505
+ continue
506
+ if in_single or in_double:
507
+ i -= 1
508
+ continue
509
+ if ch == ")":
510
+ depth += 1
511
+ elif ch == "(":
512
+ depth -= 1
513
+ if depth == 0:
514
+ return i
515
+ i -= 1
516
+ return None