sqlh 0.2.7__tar.gz → 0.2.8__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {sqlh-0.2.7 → sqlh-0.2.8}/PKG-INFO +1 -1
- {sqlh-0.2.7 → sqlh-0.2.8}/pyproject.toml +1 -1
- {sqlh-0.2.7 → sqlh-0.2.8}/sqlh/__init__.py +1 -1
- {sqlh-0.2.7 → sqlh-0.2.8}/sqlh/cli.py +22 -6
- {sqlh-0.2.7 → sqlh-0.2.8}/sqlh/core/graph.py +21 -11
- {sqlh-0.2.7 → sqlh-0.2.8}/sqlh/tests/test_utils.py +2 -2
- {sqlh-0.2.7 → sqlh-0.2.8}/sqlh/utils.py +7 -7
- {sqlh-0.2.7 → sqlh-0.2.8}/README.md +0 -0
- {sqlh-0.2.7 → sqlh-0.2.8}/sqlh/.DS_Store +0 -0
- {sqlh-0.2.7 → sqlh-0.2.8}/sqlh/core/helper.py +0 -0
- {sqlh-0.2.7 → sqlh-0.2.8}/sqlh/core/keywords.py +0 -0
- {sqlh-0.2.7 → sqlh-0.2.8}/sqlh/static/dagre_template.html +0 -0
- {sqlh-0.2.7 → sqlh-0.2.8}/sqlh/static/mermaid_template.html +0 -0
- {sqlh-0.2.7 → sqlh-0.2.8}/sqlh/tests/test_cli.py +0 -0
- {sqlh-0.2.7 → sqlh-0.2.8}/sqlh/tests/test_graph.py +0 -0
- {sqlh-0.2.7 → sqlh-0.2.8}/sqlh/tests/test_import.py +0 -0
- {sqlh-0.2.7 → sqlh-0.2.8}/sqlh/tests/test_sqlhelper.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: sqlh
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.8
|
|
4
4
|
Summary: A lightweight SQL lineage analysis library for tracking table dependencies in data pipelines
|
|
5
5
|
Keywords: sql,lineage,data-pipeline,dag,dependency,database,etl,data-engineering
|
|
6
6
|
Maintainer: Perry DU
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import argparse
|
|
2
|
+
import json
|
|
2
3
|
import sys
|
|
3
4
|
from pathlib import Path
|
|
4
5
|
|
|
@@ -18,8 +19,8 @@ from .utils import (
|
|
|
18
19
|
search_related_root_tables,
|
|
19
20
|
search_related_tables,
|
|
20
21
|
search_related_upstream_tables,
|
|
22
|
+
table_count,
|
|
21
23
|
visualize_dag,
|
|
22
|
-
table_count
|
|
23
24
|
)
|
|
24
25
|
|
|
25
26
|
|
|
@@ -80,6 +81,7 @@ def arg_parse():
|
|
|
80
81
|
search_direction.add_argument("--downstream", action="store_true", help="search downstream tables (dependents)")
|
|
81
82
|
search_direction.add_argument("--all", action="store_true", help="search both upstream and downstream tables")
|
|
82
83
|
search_parser.add_argument("-t", "--table", help="table name to search", required=True)
|
|
84
|
+
search_parser.add_argument("-d", "--depth", help="search depth", default=None, type=int)
|
|
83
85
|
search_parser.add_argument("-h", "--help", action="help", default=argparse.SUPPRESS, help="show this help message")
|
|
84
86
|
|
|
85
87
|
# web 子命令
|
|
@@ -137,10 +139,10 @@ def main():
|
|
|
137
139
|
output = search_related_root_tables(sql_stmt_str, args.table)
|
|
138
140
|
sub_command_arg = "--root"
|
|
139
141
|
elif args.upstream:
|
|
140
|
-
output = search_related_upstream_tables(sql_stmt_str, args.table)
|
|
142
|
+
output = search_related_upstream_tables(sql_stmt_str, args.table, args.depth)
|
|
141
143
|
sub_command_arg = "--upstream"
|
|
142
144
|
elif args.downstream:
|
|
143
|
-
output = search_related_downstream_tables(sql_stmt_str, args.table)
|
|
145
|
+
output = search_related_downstream_tables(sql_stmt_str, args.table, args.depth)
|
|
144
146
|
sub_command_arg = "--downstream"
|
|
145
147
|
elif args.all:
|
|
146
148
|
output = search_related_tables(sql_stmt_str, args.table)
|
|
@@ -161,7 +163,21 @@ def main():
|
|
|
161
163
|
print(f"open web page: {html_file_path}")
|
|
162
164
|
visualize_dag(get_all_dag(sql_stmt_str), template_type="dagre", filename=html_file_path)
|
|
163
165
|
return
|
|
164
|
-
|
|
166
|
+
|
|
165
167
|
elif args.command == "table-count":
|
|
166
|
-
|
|
167
|
-
|
|
168
|
+
table_count_lst = table_count(sql_stmt_str, args.table)
|
|
169
|
+
if args.output_format == "text":
|
|
170
|
+
for table, count in table_count_lst:
|
|
171
|
+
print(f"{table}: {count}")
|
|
172
|
+
|
|
173
|
+
elif args.output_format == "json":
|
|
174
|
+
result = {
|
|
175
|
+
"status": "ok",
|
|
176
|
+
"command": "table-count",
|
|
177
|
+
"data": table_count_lst,
|
|
178
|
+
"meta": {"table_count": len(table_count_lst)},
|
|
179
|
+
}
|
|
180
|
+
print(json.dumps(result, indent=2, ensure_ascii=False))
|
|
181
|
+
else:
|
|
182
|
+
print(f"Error: Not Supported output format: {args.output_format}")
|
|
183
|
+
sys.exit(1)
|
|
@@ -267,12 +267,13 @@ class DagGraph:
|
|
|
267
267
|
|
|
268
268
|
return False
|
|
269
269
|
|
|
270
|
-
def find_upstream(self, node: str) -> FindResult:
|
|
270
|
+
def find_upstream(self, node: str, depth: int | None = None) -> FindResult:
|
|
271
271
|
"""
|
|
272
272
|
查找所有上游依赖的边
|
|
273
273
|
|
|
274
274
|
Args:
|
|
275
275
|
node: 目标节点
|
|
276
|
+
depth: 最大深度,None 表示全链
|
|
276
277
|
|
|
277
278
|
Returns:
|
|
278
279
|
上游边的集合
|
|
@@ -280,12 +281,17 @@ class DagGraph:
|
|
|
280
281
|
if node not in self.__nodes:
|
|
281
282
|
return NodeNotFoundException(f"节点不存在:{node}")
|
|
282
283
|
|
|
283
|
-
queue = deque([node])
|
|
284
|
+
queue = deque([(node, 0)]) # (node, current_depth)
|
|
284
285
|
visited = set([node])
|
|
285
286
|
all_relations = []
|
|
286
287
|
|
|
287
288
|
while queue:
|
|
288
|
-
current = queue.popleft()
|
|
289
|
+
# current = queue.popleft()
|
|
290
|
+
current, current_depth = queue.popleft()
|
|
291
|
+
# 检查深度限制
|
|
292
|
+
if depth is not None and current_depth >= depth:
|
|
293
|
+
continue
|
|
294
|
+
|
|
289
295
|
# 使用反向邻接表直接查找上游节点,提升性能
|
|
290
296
|
predecessors = self.__reverse_adjacency_list.get(current, set())
|
|
291
297
|
for predecessor in predecessors:
|
|
@@ -293,15 +299,16 @@ class DagGraph:
|
|
|
293
299
|
all_relations.append(edge)
|
|
294
300
|
if predecessor not in visited:
|
|
295
301
|
visited.add(predecessor)
|
|
296
|
-
queue.append(predecessor)
|
|
302
|
+
queue.append((predecessor, current_depth + 1))
|
|
297
303
|
return DagGraph(edges=all_relations)
|
|
298
304
|
|
|
299
|
-
def find_downstream(self, node: str) -> FindResult:
|
|
305
|
+
def find_downstream(self, node: str, depth: int | None = None) -> FindResult:
|
|
300
306
|
"""
|
|
301
307
|
查找所有下游依赖的边
|
|
302
308
|
|
|
303
309
|
Args:
|
|
304
310
|
node: 起始节点
|
|
311
|
+
depth: 最大深度,None 表示全链
|
|
305
312
|
|
|
306
313
|
Returns:
|
|
307
314
|
下游边的集合
|
|
@@ -309,12 +316,17 @@ class DagGraph:
|
|
|
309
316
|
if node not in self.__nodes:
|
|
310
317
|
return NodeNotFoundException(f"节点不存在:{node}")
|
|
311
318
|
|
|
312
|
-
queue = deque([node])
|
|
319
|
+
queue = deque([(node, 0)]) # (node, current_depth)
|
|
313
320
|
visited = set([node])
|
|
314
321
|
all_relations = []
|
|
315
322
|
|
|
316
323
|
while queue:
|
|
317
|
-
current = queue.popleft()
|
|
324
|
+
current, current_depth = queue.popleft()
|
|
325
|
+
|
|
326
|
+
# 检查深度限制
|
|
327
|
+
if depth is not None and current_depth >= depth:
|
|
328
|
+
continue
|
|
329
|
+
|
|
318
330
|
# 使用邻接表直接查找下游节点,提升性能
|
|
319
331
|
neighbors = self.__adjacency_list.get(current, set())
|
|
320
332
|
for neighbor in neighbors:
|
|
@@ -322,7 +334,7 @@ class DagGraph:
|
|
|
322
334
|
all_relations.append(edge)
|
|
323
335
|
if neighbor not in visited:
|
|
324
336
|
visited.add(neighbor)
|
|
325
|
-
queue.append(neighbor)
|
|
337
|
+
queue.append((neighbor, current_depth + 1))
|
|
326
338
|
|
|
327
339
|
return DagGraph(edges=all_relations)
|
|
328
340
|
|
|
@@ -377,9 +389,7 @@ class DagGraph:
|
|
|
377
389
|
|
|
378
390
|
# 替换模板变量
|
|
379
391
|
html_content = template.safe_substitute(
|
|
380
|
-
title="DAG Visualization",
|
|
381
|
-
mermaid_content=mermaid_content,
|
|
382
|
-
lineage_data=lineage_data
|
|
392
|
+
title="DAG Visualization", mermaid_content=mermaid_content, lineage_data=lineage_data
|
|
383
393
|
)
|
|
384
394
|
|
|
385
395
|
return html_content
|
|
@@ -26,7 +26,7 @@ def test_get_all_root_tables():
|
|
|
26
26
|
|
|
27
27
|
|
|
28
28
|
def test_search_related_upstream_tables():
|
|
29
|
-
a = utils.search_related_upstream_tables(sql_stmt_str, "
|
|
29
|
+
a = utils.search_related_upstream_tables(sql_stmt_str, "dws.xxx")
|
|
30
30
|
if isinstance(a, Tuple):
|
|
31
31
|
print(utils.list_command_text(a[0]))
|
|
32
32
|
else:
|
|
@@ -88,5 +88,5 @@ def test_search_related_root_tables():
|
|
|
88
88
|
|
|
89
89
|
def test_table_count():
|
|
90
90
|
print(utils.table_count(sql_stmt_str))
|
|
91
|
-
for table, count in utils.table_count(sql_stmt_str,
|
|
91
|
+
for table, count in utils.table_count(sql_stmt_str, "ods_hive.ods_order"):
|
|
92
92
|
print(f"{table}: {count}")
|
|
@@ -198,7 +198,7 @@ def search_related_root_tables(sql_stmt_str: str, target_table: str) -> SearchRe
|
|
|
198
198
|
return related_graph
|
|
199
199
|
|
|
200
200
|
|
|
201
|
-
def search_related_upstream_tables(sql_stmt_str: str, target_table: str) -> SearchResult:
|
|
201
|
+
def search_related_upstream_tables(sql_stmt_str: str, target_table: str, depth: int | None = None) -> SearchResult:
|
|
202
202
|
"""
|
|
203
203
|
从目标表向上追溯,获取所有上游依赖表(包含所有中间表)
|
|
204
204
|
|
|
@@ -215,7 +215,7 @@ def search_related_upstream_tables(sql_stmt_str: str, target_table: str) -> Sear
|
|
|
215
215
|
(上游表列表, 上游依赖子图)
|
|
216
216
|
"""
|
|
217
217
|
_, _, dg = __build_tables_and_graph(sql_stmt_str)
|
|
218
|
-
related_graph = dg.find_upstream(target_table)
|
|
218
|
+
related_graph = dg.find_upstream(target_table, depth=depth)
|
|
219
219
|
if isinstance(related_graph, DagGraph):
|
|
220
220
|
if related_graph.empty:
|
|
221
221
|
return [], DagGraph()
|
|
@@ -230,7 +230,7 @@ def search_related_upstream_tables(sql_stmt_str: str, target_table: str) -> Sear
|
|
|
230
230
|
return related_graph
|
|
231
231
|
|
|
232
232
|
|
|
233
|
-
def search_related_downstream_tables(sql_stmt_str: str, target_table: str) -> SearchResult:
|
|
233
|
+
def search_related_downstream_tables(sql_stmt_str: str, target_table: str, depth: int | None = None) -> SearchResult:
|
|
234
234
|
"""
|
|
235
235
|
从目标表向下追溯,获取所有下游依赖表(包含所有中间表)
|
|
236
236
|
|
|
@@ -247,7 +247,7 @@ def search_related_downstream_tables(sql_stmt_str: str, target_table: str) -> Se
|
|
|
247
247
|
(下游表列表, 下游依赖子图)
|
|
248
248
|
"""
|
|
249
249
|
_, _, dg = __build_tables_and_graph(sql_stmt_str)
|
|
250
|
-
related_graph = dg.find_downstream(target_table)
|
|
250
|
+
related_graph = dg.find_downstream(target_table, depth=depth)
|
|
251
251
|
if isinstance(related_graph, DagGraph):
|
|
252
252
|
if related_graph.empty:
|
|
253
253
|
return [], DagGraph()
|
|
@@ -262,7 +262,7 @@ def search_related_downstream_tables(sql_stmt_str: str, target_table: str) -> Se
|
|
|
262
262
|
return related_graph
|
|
263
263
|
|
|
264
264
|
|
|
265
|
-
def search_related_tables(sql_stmt_str: str, target_table: str) -> SearchResult:
|
|
265
|
+
def search_related_tables(sql_stmt_str: str, target_table: str, depth: int | None = None) -> SearchResult:
|
|
266
266
|
"""
|
|
267
267
|
从目标表双向追溯,获取所有相关表(上游+下游,不包含自身)
|
|
268
268
|
|
|
@@ -279,9 +279,9 @@ def search_related_tables(sql_stmt_str: str, target_table: str) -> SearchResult:
|
|
|
279
279
|
(相关表列表, 完整依赖子图)
|
|
280
280
|
"""
|
|
281
281
|
_, _, dg = __build_tables_and_graph(sql_stmt_str)
|
|
282
|
-
related_graph_upstream = dg.find_upstream(target_table)
|
|
282
|
+
related_graph_upstream = dg.find_upstream(target_table, depth=depth)
|
|
283
283
|
if isinstance(related_graph_upstream, DagGraph):
|
|
284
|
-
related_graph_downstream = dg.find_downstream(target_table)
|
|
284
|
+
related_graph_downstream = dg.find_downstream(target_table, depth=depth)
|
|
285
285
|
if isinstance(related_graph_downstream, DagGraph):
|
|
286
286
|
new_graph = related_graph_downstream.union(related_graph_upstream)
|
|
287
287
|
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|