sqlh 0.2.8__tar.gz → 0.3.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {sqlh-0.2.8 → sqlh-0.3.2}/PKG-INFO +1 -3
- {sqlh-0.2.8 → sqlh-0.3.2}/pyproject.toml +2 -4
- {sqlh-0.2.8 → sqlh-0.3.2}/sqlh/__init__.py +4 -2
- {sqlh-0.2.8 → sqlh-0.3.2}/sqlh/cli.py +36 -1
- {sqlh-0.2.8 → sqlh-0.3.2}/sqlh/core/helper.py +219 -6
- sqlh-0.3.2/sqlh/tests/test_sqlhelper.py +77 -0
- {sqlh-0.2.8 → sqlh-0.3.2}/sqlh/tests/test_utils.py +2 -2
- {sqlh-0.2.8 → sqlh-0.3.2}/sqlh/utils.py +5 -1
- sqlh-0.2.8/sqlh/tests/test_sqlhelper.py +0 -36
- {sqlh-0.2.8 → sqlh-0.3.2}/README.md +0 -0
- {sqlh-0.2.8 → sqlh-0.3.2}/sqlh/.DS_Store +0 -0
- {sqlh-0.2.8 → sqlh-0.3.2}/sqlh/core/graph.py +0 -0
- {sqlh-0.2.8 → sqlh-0.3.2}/sqlh/core/keywords.py +0 -0
- {sqlh-0.2.8 → sqlh-0.3.2}/sqlh/static/dagre_template.html +0 -0
- {sqlh-0.2.8 → sqlh-0.3.2}/sqlh/static/mermaid_template.html +0 -0
- {sqlh-0.2.8 → sqlh-0.3.2}/sqlh/tests/test_cli.py +0 -0
- {sqlh-0.2.8 → sqlh-0.3.2}/sqlh/tests/test_graph.py +0 -0
- {sqlh-0.2.8 → sqlh-0.3.2}/sqlh/tests/test_import.py +0 -0
|
@@ -1,10 +1,8 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: sqlh
|
|
3
|
-
Version: 0.2
|
|
3
|
+
Version: 0.3.2
|
|
4
4
|
Summary: A lightweight SQL lineage analysis library for tracking table dependencies in data pipelines
|
|
5
5
|
Keywords: sql,lineage,data-pipeline,dag,dependency,database,etl,data-engineering
|
|
6
|
-
Maintainer: Perry DU
|
|
7
|
-
Maintainer-email: Perry DU <duneite@gmail.com>
|
|
8
6
|
Requires-Python: >=3.10
|
|
9
7
|
Description-Content-Type: text/markdown
|
|
10
8
|
|
|
@@ -1,9 +1,7 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "sqlh"
|
|
3
|
-
version = "0.2
|
|
4
|
-
|
|
5
|
-
{name = "Perry DU", email = "duneite@gmail.com"}
|
|
6
|
-
]
|
|
3
|
+
version = "0.3.2"
|
|
4
|
+
|
|
7
5
|
description = "A lightweight SQL lineage analysis library for tracking table dependencies in data pipelines"
|
|
8
6
|
readme = "README.md"
|
|
9
7
|
requires-python = ">=3.10"
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
from .core.graph import DagGraph
|
|
2
|
-
from .core.helper import split_sql, trim_comment
|
|
2
|
+
from .core.helper import split_sql, trim_comment, split_sql_v2, split_sql_v3
|
|
3
3
|
from .utils import (
|
|
4
4
|
get_all_leaf_tables,
|
|
5
5
|
get_all_root_tables,
|
|
@@ -14,10 +14,12 @@ from .utils import (
|
|
|
14
14
|
table_count
|
|
15
15
|
)
|
|
16
16
|
|
|
17
|
-
__version__ = "0.2
|
|
17
|
+
__version__ = "0.3.2"
|
|
18
18
|
|
|
19
19
|
__all__ = [
|
|
20
20
|
"split_sql",
|
|
21
|
+
"split_sql_v2",
|
|
22
|
+
"split_sql_v3",
|
|
21
23
|
"trim_comment",
|
|
22
24
|
"DagGraph",
|
|
23
25
|
"read_sql_from_directory",
|
|
@@ -5,6 +5,7 @@ from pathlib import Path
|
|
|
5
5
|
|
|
6
6
|
from sqlh import __version__
|
|
7
7
|
|
|
8
|
+
from .core.helper import split_sql, split_sql_v2, split_sql_v3
|
|
8
9
|
from .utils import (
|
|
9
10
|
get_all_dag,
|
|
10
11
|
get_all_leaf_tables,
|
|
@@ -46,7 +47,7 @@ def _create_parent_parser():
|
|
|
46
47
|
|
|
47
48
|
|
|
48
49
|
def arg_parse():
|
|
49
|
-
parser = argparse.ArgumentParser(usage="%(prog)s [OPTIONS]
|
|
50
|
+
parser = argparse.ArgumentParser(usage="%(prog)s <COMMAND> [OPTIONS] ")
|
|
50
51
|
parser.add_argument("-v", "--version", action="version", version=__version__)
|
|
51
52
|
|
|
52
53
|
# 获取共享参数的父解析器
|
|
@@ -103,6 +104,24 @@ def arg_parse():
|
|
|
103
104
|
)
|
|
104
105
|
table_count_parser.add_argument("-t", "--table", help="table name to search")
|
|
105
106
|
table_count_parser.add_argument("-h", "--help", action="help", default=argparse.SUPPRESS, help="show this help message")
|
|
107
|
+
|
|
108
|
+
# split 子命令
|
|
109
|
+
split_parser = subparsers.add_parser(
|
|
110
|
+
"split",
|
|
111
|
+
parents=[parent_parser],
|
|
112
|
+
help="split sql file",
|
|
113
|
+
add_help=False,
|
|
114
|
+
)
|
|
115
|
+
split_parser.add_argument(
|
|
116
|
+
"-sv",
|
|
117
|
+
"--split-version",
|
|
118
|
+
type=int,
|
|
119
|
+
choices=[1, 2, 3],
|
|
120
|
+
default=1,
|
|
121
|
+
help="split version",
|
|
122
|
+
)
|
|
123
|
+
split_parser.add_argument("-h", "--help", action="help", default=argparse.SUPPRESS, help="show this help message")
|
|
124
|
+
|
|
106
125
|
return parser.parse_args()
|
|
107
126
|
|
|
108
127
|
|
|
@@ -181,3 +200,19 @@ def main():
|
|
|
181
200
|
else:
|
|
182
201
|
print(f"Error: Not Supported output format: {args.output_format}")
|
|
183
202
|
sys.exit(1)
|
|
203
|
+
|
|
204
|
+
elif args.command == "split":
|
|
205
|
+
import time
|
|
206
|
+
|
|
207
|
+
t = time.perf_counter()
|
|
208
|
+
if args.split_version == 1:
|
|
209
|
+
split_sql(sql_stmt_str)
|
|
210
|
+
elif args.split_version == 2:
|
|
211
|
+
split_sql_v2(sql_stmt_str)
|
|
212
|
+
elif args.split_version == 3:
|
|
213
|
+
split_sql_v3(sql_stmt_str)
|
|
214
|
+
else:
|
|
215
|
+
print(f"Error: Not Supported split version: {args.split_version}")
|
|
216
|
+
sys.exit(1)
|
|
217
|
+
t_parse = time.perf_counter() - t
|
|
218
|
+
print(f"parse time: {t_parse * 1000:.3f} ms")
|
|
@@ -9,13 +9,28 @@ This module provides SQL parsing functionality using token-based analysis:
|
|
|
9
9
|
|
|
10
10
|
The parser uses keyword-based tokenization rather than full AST parsing,
|
|
11
11
|
making it lightweight and fast for simple table/field extraction tasks.
|
|
12
|
+
|
|
13
|
+
=== 性能统计 ===
|
|
14
|
+
文件数量 : 489 个
|
|
15
|
+
总字符数 : 2264584 字节 (2.16 MB)
|
|
16
|
+
SQL 语句数 : 1174 条
|
|
17
|
+
读取耗时 : 15.196 ms
|
|
18
|
+
解析耗时 : 129.142 ms
|
|
19
|
+
总耗时 : 535.163 ms
|
|
20
|
+
解析速度v1 : 16.7 MB/s
|
|
21
|
+
解析速度v2 : 14.9 MB/s
|
|
22
|
+
解析速度v3 : 8.8 MB/s
|
|
23
|
+
|
|
12
24
|
"""
|
|
13
25
|
|
|
26
|
+
import re
|
|
27
|
+
|
|
14
28
|
from .keywords import KeyWords
|
|
15
29
|
|
|
16
30
|
|
|
17
31
|
class ParseException(Exception):
|
|
18
32
|
"""Exception raised when SQL parsing fails."""
|
|
33
|
+
|
|
19
34
|
pass
|
|
20
35
|
|
|
21
36
|
|
|
@@ -121,6 +136,209 @@ def split_sql(sql: str) -> list[str]:
|
|
|
121
136
|
return result
|
|
122
137
|
|
|
123
138
|
|
|
139
|
+
def split_sql_v2(sql: str) -> list[str]:
|
|
140
|
+
"""
|
|
141
|
+
Split multi-statement SQL by semicolons, handling comments and quotes.
|
|
142
|
+
Optimized for performance and readability.
|
|
143
|
+
"""
|
|
144
|
+
if not sql.strip():
|
|
145
|
+
return []
|
|
146
|
+
|
|
147
|
+
result = []
|
|
148
|
+
current_stmt = []
|
|
149
|
+
|
|
150
|
+
i = 0
|
|
151
|
+
n = len(sql)
|
|
152
|
+
|
|
153
|
+
while i < n:
|
|
154
|
+
char = sql[i]
|
|
155
|
+
|
|
156
|
+
# 1. 处理单行注释
|
|
157
|
+
if char == "-" and i + 1 < n and sql[i + 1] == "-":
|
|
158
|
+
# 跳过直到换行符
|
|
159
|
+
while i < n and sql[i] not in ("\n", "\r"):
|
|
160
|
+
i += 1
|
|
161
|
+
if i < n: # 包含换行符
|
|
162
|
+
current_stmt.append(sql[i])
|
|
163
|
+
i += 1
|
|
164
|
+
continue
|
|
165
|
+
|
|
166
|
+
# 2. 处理块注释 /* ... */
|
|
167
|
+
if char == "/" and i + 1 < n and sql[i + 1] == "*":
|
|
168
|
+
current_stmt.append("/*")
|
|
169
|
+
i += 2
|
|
170
|
+
depth = 1
|
|
171
|
+
while i < n and depth > 0:
|
|
172
|
+
if sql[i] == "/" and i + 1 < n and sql[i + 1] == "*":
|
|
173
|
+
depth += 1
|
|
174
|
+
current_stmt.append("/*")
|
|
175
|
+
i += 2
|
|
176
|
+
elif sql[i] == "*" and i + 1 < n and sql[i + 1] == "/":
|
|
177
|
+
depth -= 1
|
|
178
|
+
if depth == 0:
|
|
179
|
+
current_stmt.append("*/")
|
|
180
|
+
i += 2
|
|
181
|
+
break
|
|
182
|
+
current_stmt.append("*/")
|
|
183
|
+
i += 2
|
|
184
|
+
else:
|
|
185
|
+
current_stmt.append(sql[i])
|
|
186
|
+
i += 1
|
|
187
|
+
continue
|
|
188
|
+
|
|
189
|
+
# 3. 处理引号
|
|
190
|
+
if char == "'" or char == '"':
|
|
191
|
+
quote_char = char
|
|
192
|
+
current_stmt.append(char)
|
|
193
|
+
i += 1
|
|
194
|
+
while i < n:
|
|
195
|
+
c = sql[i]
|
|
196
|
+
current_stmt.append(c)
|
|
197
|
+
i += 1
|
|
198
|
+
if c == quote_char:
|
|
199
|
+
# 处理转义引号 '' 或 "" (取决于方言,这里简单处理为成对出现)
|
|
200
|
+
if i < n and sql[i] == quote_char:
|
|
201
|
+
current_stmt.append(sql[i])
|
|
202
|
+
i += 1
|
|
203
|
+
else:
|
|
204
|
+
break
|
|
205
|
+
continue
|
|
206
|
+
|
|
207
|
+
# 4. 处理分号
|
|
208
|
+
if char == ";":
|
|
209
|
+
stmt_str = "".join(current_stmt)
|
|
210
|
+
if stmt_str.strip():
|
|
211
|
+
result.append(stmt_str)
|
|
212
|
+
current_stmt = []
|
|
213
|
+
i += 1
|
|
214
|
+
continue
|
|
215
|
+
|
|
216
|
+
# 5. 普通字符
|
|
217
|
+
current_stmt.append(char)
|
|
218
|
+
i += 1
|
|
219
|
+
|
|
220
|
+
# 处理最后剩余的语句
|
|
221
|
+
if current_stmt:
|
|
222
|
+
stmt_str = "".join(current_stmt)
|
|
223
|
+
if stmt_str.strip():
|
|
224
|
+
result.append(stmt_str)
|
|
225
|
+
|
|
226
|
+
return result
|
|
227
|
+
|
|
228
|
+
|
|
229
|
+
def _try_parse_dollar_tag(text: str, pos: int) -> tuple[str, int] | None:
|
|
230
|
+
"""
|
|
231
|
+
从 text[pos] 开始尝试匹配美元引号开标签 $tag$。
|
|
232
|
+
返回 (tag, end_pos) 或 None。
|
|
233
|
+
"""
|
|
234
|
+
# 美元引号标签的合法字符:字母开头,字母/数字/下划线
|
|
235
|
+
_DOLLAR_TAG_RE = re.compile(r"\$([A-Za-z_][A-Za-z0-9_]*)?\$")
|
|
236
|
+
|
|
237
|
+
m = _DOLLAR_TAG_RE.match(text, pos)
|
|
238
|
+
if m:
|
|
239
|
+
tag = m.group(1) or ""
|
|
240
|
+
return tag, m.end()
|
|
241
|
+
return None
|
|
242
|
+
|
|
243
|
+
|
|
244
|
+
def split_sql_v3(text: str) -> list[str]:
|
|
245
|
+
"""
|
|
246
|
+
词法解析,完全对应 Rust lib.rs 中的 split_sql()。
|
|
247
|
+
状态:NORMAL / SINGLE_QUOTE / DOUBLE_QUOTE /
|
|
248
|
+
LINE_COMMENT / BLOCK_COMMENT / DOLLAR_QUOTE
|
|
249
|
+
"""
|
|
250
|
+
NORMAL = 0
|
|
251
|
+
SINGLE_QUOTE = 1
|
|
252
|
+
DOUBLE_QUOTE = 2
|
|
253
|
+
LINE_COMMENT = 3
|
|
254
|
+
BLOCK_COMMENT = 4
|
|
255
|
+
DOLLAR_QUOTE = 5
|
|
256
|
+
|
|
257
|
+
state = NORMAL
|
|
258
|
+
block_depth = 0 # 嵌套块注释深度
|
|
259
|
+
dollar_tag = "" # 当前美元引号的标签
|
|
260
|
+
dollar_close = "" # 对应的闭标签字符串(缓存)
|
|
261
|
+
|
|
262
|
+
result = []
|
|
263
|
+
stmt_start = 0
|
|
264
|
+
i = 0
|
|
265
|
+
n = len(text)
|
|
266
|
+
|
|
267
|
+
while i < n:
|
|
268
|
+
c = text[i]
|
|
269
|
+
nxt = text[i + 1] if i + 1 < n else "\0"
|
|
270
|
+
|
|
271
|
+
if state == NORMAL:
|
|
272
|
+
if c == "'":
|
|
273
|
+
state = SINGLE_QUOTE
|
|
274
|
+
elif c == '"':
|
|
275
|
+
state = DOUBLE_QUOTE
|
|
276
|
+
elif c == "-" and nxt == "-":
|
|
277
|
+
state = LINE_COMMENT
|
|
278
|
+
i += 1
|
|
279
|
+
elif c == "/" and nxt == "*":
|
|
280
|
+
state = BLOCK_COMMENT
|
|
281
|
+
block_depth = 1
|
|
282
|
+
i += 1
|
|
283
|
+
elif c == "$":
|
|
284
|
+
parsed = _try_parse_dollar_tag(text, i)
|
|
285
|
+
if parsed is not None:
|
|
286
|
+
dollar_tag, end = parsed
|
|
287
|
+
dollar_close = f"${dollar_tag}$"
|
|
288
|
+
state = DOLLAR_QUOTE
|
|
289
|
+
i = end
|
|
290
|
+
continue # i 已跳过整个开标签,不再 +1
|
|
291
|
+
elif c == ";":
|
|
292
|
+
stmt = text[stmt_start:i].strip()
|
|
293
|
+
if stmt:
|
|
294
|
+
result.append(stmt)
|
|
295
|
+
stmt_start = i + 1
|
|
296
|
+
|
|
297
|
+
elif state == SINGLE_QUOTE:
|
|
298
|
+
if c == "'" and nxt == "'":
|
|
299
|
+
i += 1
|
|
300
|
+
elif c == "\\" and nxt == "'":
|
|
301
|
+
i += 1
|
|
302
|
+
elif c == "'":
|
|
303
|
+
state = NORMAL
|
|
304
|
+
|
|
305
|
+
elif state == DOUBLE_QUOTE:
|
|
306
|
+
if c == '"' and nxt == '"':
|
|
307
|
+
i += 1
|
|
308
|
+
elif c == '"':
|
|
309
|
+
state = NORMAL
|
|
310
|
+
|
|
311
|
+
elif state == LINE_COMMENT:
|
|
312
|
+
if c == "\n":
|
|
313
|
+
state = NORMAL
|
|
314
|
+
|
|
315
|
+
elif state == BLOCK_COMMENT:
|
|
316
|
+
if c == "/" and nxt == "*":
|
|
317
|
+
block_depth += 1
|
|
318
|
+
i += 1
|
|
319
|
+
elif c == "*" and nxt == "/":
|
|
320
|
+
block_depth -= 1
|
|
321
|
+
if block_depth == 0:
|
|
322
|
+
state = NORMAL
|
|
323
|
+
i += 1
|
|
324
|
+
|
|
325
|
+
elif state == DOLLAR_QUOTE:
|
|
326
|
+
if c == "$" and text[i : i + len(dollar_close)] == dollar_close:
|
|
327
|
+
i += len(dollar_close)
|
|
328
|
+
state = NORMAL
|
|
329
|
+
dollar_tag = ""
|
|
330
|
+
dollar_close = ""
|
|
331
|
+
continue
|
|
332
|
+
|
|
333
|
+
i += 1
|
|
334
|
+
|
|
335
|
+
tail = text[stmt_start:].strip()
|
|
336
|
+
if tail:
|
|
337
|
+
result.append(tail)
|
|
338
|
+
|
|
339
|
+
return result
|
|
340
|
+
|
|
341
|
+
|
|
124
342
|
def trim_comment(sql: str) -> str:
|
|
125
343
|
"""
|
|
126
344
|
Remove single-line and multi-line comments from SQL.
|
|
@@ -226,7 +444,7 @@ def get_source_target_tables(sql: str) -> dict[str, list[str]] | None:
|
|
|
226
444
|
ParseException: If SQL contains multiple statements
|
|
227
445
|
|
|
228
446
|
Note:
|
|
229
|
-
TODO:
|
|
447
|
+
TODO: 不能识别join后面的 [hint] table_name
|
|
230
448
|
{
|
|
231
449
|
"source_tables": [(t1, 1), (t2, 2), (t3, 3)],
|
|
232
450
|
"target_tables": [(t4, 1)]
|
|
@@ -345,11 +563,6 @@ def get_source_target_tables(sql: str) -> dict[str, list[str]] | None:
|
|
|
345
563
|
return
|
|
346
564
|
|
|
347
565
|
|
|
348
|
-
# ============================================================================
|
|
349
|
-
# Private helper functions
|
|
350
|
-
# ============================================================================
|
|
351
|
-
|
|
352
|
-
|
|
353
566
|
def _trim_single_line_comment(sql: str) -> str:
|
|
354
567
|
"""删除单行注释"""
|
|
355
568
|
result = []
|
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
from sqlh.core.helper import (
|
|
2
|
+
get_source_target_tables,
|
|
3
|
+
get_source_target_tables_v2,
|
|
4
|
+
split_sql,
|
|
5
|
+
split_sql_v2,
|
|
6
|
+
split_sql_v3,
|
|
7
|
+
trim_comment,
|
|
8
|
+
)
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def test_split():
|
|
12
|
+
"""Test SQL splitting functionality."""
|
|
13
|
+
sql = """
|
|
14
|
+
SELECT * FROM t1;
|
|
15
|
+
INSERT INTO t2 SELECT * FROM t1;
|
|
16
|
+
"""
|
|
17
|
+
result = split_sql(sql)
|
|
18
|
+
assert len(result) == 2
|
|
19
|
+
assert "SELECT * FROM t1" in result[0]
|
|
20
|
+
assert "INSERT INTO t2 SELECT * FROM t1" in result[1]
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def test_split_v2():
|
|
24
|
+
"""Test SQL splitting functionality."""
|
|
25
|
+
sql = """/*select '12;', ; */
|
|
26
|
+
-- okk
|
|
27
|
+
SELECT * FROM t1; -- ddd ;
|
|
28
|
+
INSERT INTO t2 SELECT * FROM t1;
|
|
29
|
+
"""
|
|
30
|
+
result = split_sql_v2(sql)
|
|
31
|
+
for stmt in result:
|
|
32
|
+
print("--- SQL Statement ---")
|
|
33
|
+
print(stmt)
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def test_split_v3():
|
|
37
|
+
"""Test SQL splitting functionality."""
|
|
38
|
+
sql = """/*select '12;', ; */
|
|
39
|
+
-- okk
|
|
40
|
+
SELECT * FROM t1; -- ddd ;
|
|
41
|
+
INSERT INTO t2 SELECT * FROM t1;
|
|
42
|
+
"""
|
|
43
|
+
result = split_sql_v3(sql)
|
|
44
|
+
for stmt in result:
|
|
45
|
+
print("--- SQL Statement ---")
|
|
46
|
+
print(stmt)
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def test_trim_comment():
|
|
50
|
+
"""Test comment removal."""
|
|
51
|
+
sql = """
|
|
52
|
+
-- This is a comment
|
|
53
|
+
SELECT * FROM t1;
|
|
54
|
+
/* Multi-line
|
|
55
|
+
comment */
|
|
56
|
+
INSERT INTO t2 SELECT * FROM t1;
|
|
57
|
+
"""
|
|
58
|
+
result = trim_comment(sql)
|
|
59
|
+
assert "--" not in result
|
|
60
|
+
assert "/*" not in result
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def test_get_source_target_tables():
|
|
64
|
+
"""Test source/target table extraction."""
|
|
65
|
+
sql = "INSERT INTO dwd.user_dim SELECT * FROM ods.user;"
|
|
66
|
+
result = get_source_target_tables(sql)
|
|
67
|
+
assert result is not None
|
|
68
|
+
assert "ods.user" in result["source_tables"]
|
|
69
|
+
assert "dwd.user_dim" in result["target_tables"]
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
def test_get_source_target_tables_v2():
|
|
73
|
+
"""Test source/target table extraction."""
|
|
74
|
+
sql = "SELECT COUNT(*) FROM t2 JOIN [broadcast] t1 ON t1.c1 = t2.c2;"
|
|
75
|
+
result = get_source_target_tables_v2(sql)
|
|
76
|
+
for table in result["source_tables"]:
|
|
77
|
+
print(f"Source Table: {table}")
|
|
@@ -3,9 +3,9 @@ from typing import Tuple
|
|
|
3
3
|
from sqlh import utils
|
|
4
4
|
|
|
5
5
|
# 读取目录或文件
|
|
6
|
-
sql_path = ""
|
|
6
|
+
sql_path = "/Users/dunett/codes/duperl/daas-migration/showyu_fastdata_backup_20260413"
|
|
7
7
|
sql_stmt_str = utils.read_sql_from_directory(sql_path)
|
|
8
|
-
|
|
8
|
+
# sql_stmt_str = """insert into t3 SELECT /*+edede */ COUNT(*) FROM t2 JOIN [broadcast] t1 ON t1.c1 = t2.c2;"""
|
|
9
9
|
|
|
10
10
|
def test_read_sql_from_directory():
|
|
11
11
|
import timeit
|
|
@@ -26,7 +26,7 @@ from pathlib import Path
|
|
|
26
26
|
from typing import List, Literal, Tuple, Union
|
|
27
27
|
|
|
28
28
|
from .core.graph import DagGraph, NodeNotFoundException
|
|
29
|
-
from .core.helper import
|
|
29
|
+
from .core.helper import split_sql, trim_comment, get_source_target_tables
|
|
30
30
|
|
|
31
31
|
SearchResult = Union[Tuple[List[str], DagGraph], NodeNotFoundException]
|
|
32
32
|
|
|
@@ -305,6 +305,10 @@ def visualize_dag(
|
|
|
305
305
|
) -> None:
|
|
306
306
|
import webbrowser
|
|
307
307
|
|
|
308
|
+
if dag_graph.empty:
|
|
309
|
+
print("DAG图为空, 无需生成可视化")
|
|
310
|
+
return
|
|
311
|
+
|
|
308
312
|
html_content = dag_graph.to_html(template_type=template_type)
|
|
309
313
|
|
|
310
314
|
with open(filename, "w", encoding="utf-8") as f:
|
|
@@ -1,36 +0,0 @@
|
|
|
1
|
-
from sqlh.core.helper import get_source_target_tables, split_sql, trim_comment
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
def test_split():
|
|
5
|
-
"""Test SQL splitting functionality."""
|
|
6
|
-
sql = """
|
|
7
|
-
SELECT * FROM t1;
|
|
8
|
-
INSERT INTO t2 SELECT * FROM t1;
|
|
9
|
-
"""
|
|
10
|
-
result = split_sql(sql)
|
|
11
|
-
assert len(result) == 2
|
|
12
|
-
assert "SELECT * FROM t1" in result[0]
|
|
13
|
-
assert "INSERT INTO t2 SELECT * FROM t1" in result[1]
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
def test_trim_comment():
|
|
17
|
-
"""Test comment removal."""
|
|
18
|
-
sql = """
|
|
19
|
-
-- This is a comment
|
|
20
|
-
SELECT * FROM t1;
|
|
21
|
-
/* Multi-line
|
|
22
|
-
comment */
|
|
23
|
-
INSERT INTO t2 SELECT * FROM t1;
|
|
24
|
-
"""
|
|
25
|
-
result = trim_comment(sql)
|
|
26
|
-
assert "--" not in result
|
|
27
|
-
assert "/*" not in result
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
def test_get_source_target_tables():
|
|
31
|
-
"""Test source/target table extraction."""
|
|
32
|
-
sql = "INSERT INTO dwd.user_dim SELECT * FROM ods.user;"
|
|
33
|
-
result = get_source_target_tables(sql)
|
|
34
|
-
assert result is not None
|
|
35
|
-
assert "ods.user" in result["source_tables"]
|
|
36
|
-
assert "dwd.user_dim" in result["target_tables"]
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|