sqlh 0.2.7__tar.gz → 0.2.8__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: sqlh
3
- Version: 0.2.7
3
+ Version: 0.2.8
4
4
  Summary: A lightweight SQL lineage analysis library for tracking table dependencies in data pipelines
5
5
  Keywords: sql,lineage,data-pipeline,dag,dependency,database,etl,data-engineering
6
6
  Maintainer: Perry DU
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "sqlh"
3
- version = "0.2.7"
3
+ version = "0.2.8"
4
4
  maintainers = [
5
5
  {name = "Perry DU", email = "duneite@gmail.com"}
6
6
  ]
@@ -14,7 +14,7 @@ from .utils import (
14
14
  table_count
15
15
  )
16
16
 
17
- __version__ = "0.2.7"
17
+ __version__ = "0.2.8"
18
18
 
19
19
  __all__ = [
20
20
  "split_sql",
@@ -1,4 +1,5 @@
1
1
  import argparse
2
+ import json
2
3
  import sys
3
4
  from pathlib import Path
4
5
 
@@ -18,8 +19,8 @@ from .utils import (
18
19
  search_related_root_tables,
19
20
  search_related_tables,
20
21
  search_related_upstream_tables,
22
+ table_count,
21
23
  visualize_dag,
22
- table_count
23
24
  )
24
25
 
25
26
 
@@ -80,6 +81,7 @@ def arg_parse():
80
81
  search_direction.add_argument("--downstream", action="store_true", help="search downstream tables (dependents)")
81
82
  search_direction.add_argument("--all", action="store_true", help="search both upstream and downstream tables")
82
83
  search_parser.add_argument("-t", "--table", help="table name to search", required=True)
84
+ search_parser.add_argument("-d", "--depth", help="search depth", default=None, type=int)
83
85
  search_parser.add_argument("-h", "--help", action="help", default=argparse.SUPPRESS, help="show this help message")
84
86
 
85
87
  # web 子命令
@@ -137,10 +139,10 @@ def main():
137
139
  output = search_related_root_tables(sql_stmt_str, args.table)
138
140
  sub_command_arg = "--root"
139
141
  elif args.upstream:
140
- output = search_related_upstream_tables(sql_stmt_str, args.table)
142
+ output = search_related_upstream_tables(sql_stmt_str, args.table, args.depth)
141
143
  sub_command_arg = "--upstream"
142
144
  elif args.downstream:
143
- output = search_related_downstream_tables(sql_stmt_str, args.table)
145
+ output = search_related_downstream_tables(sql_stmt_str, args.table, args.depth)
144
146
  sub_command_arg = "--downstream"
145
147
  elif args.all:
146
148
  output = search_related_tables(sql_stmt_str, args.table)
@@ -161,7 +163,21 @@ def main():
161
163
  print(f"open web page: {html_file_path}")
162
164
  visualize_dag(get_all_dag(sql_stmt_str), template_type="dagre", filename=html_file_path)
163
165
  return
164
-
166
+
165
167
  elif args.command == "table-count":
166
- for table, count in table_count(sql_stmt_str, args.table):
167
- print(f"{table}: {count}")
168
+ table_count_lst = table_count(sql_stmt_str, args.table)
169
+ if args.output_format == "text":
170
+ for table, count in table_count_lst:
171
+ print(f"{table}: {count}")
172
+
173
+ elif args.output_format == "json":
174
+ result = {
175
+ "status": "ok",
176
+ "command": "table-count",
177
+ "data": table_count_lst,
178
+ "meta": {"table_count": len(table_count_lst)},
179
+ }
180
+ print(json.dumps(result, indent=2, ensure_ascii=False))
181
+ else:
182
+ print(f"Error: Not Supported output format: {args.output_format}")
183
+ sys.exit(1)
@@ -267,12 +267,13 @@ class DagGraph:
267
267
 
268
268
  return False
269
269
 
270
- def find_upstream(self, node: str) -> FindResult:
270
+ def find_upstream(self, node: str, depth: int | None = None) -> FindResult:
271
271
  """
272
272
  查找所有上游依赖的边
273
273
 
274
274
  Args:
275
275
  node: 目标节点
276
+ depth: 最大深度,None 表示全链
276
277
 
277
278
  Returns:
278
279
  上游边的集合
@@ -280,12 +281,17 @@ class DagGraph:
280
281
  if node not in self.__nodes:
281
282
  return NodeNotFoundException(f"节点不存在:{node}")
282
283
 
283
- queue = deque([node])
284
+ queue = deque([(node, 0)]) # (node, current_depth)
284
285
  visited = set([node])
285
286
  all_relations = []
286
287
 
287
288
  while queue:
288
- current = queue.popleft()
289
+ # current = queue.popleft()
290
+ current, current_depth = queue.popleft()
291
+ # 检查深度限制
292
+ if depth is not None and current_depth >= depth:
293
+ continue
294
+
289
295
  # 使用反向邻接表直接查找上游节点,提升性能
290
296
  predecessors = self.__reverse_adjacency_list.get(current, set())
291
297
  for predecessor in predecessors:
@@ -293,15 +299,16 @@ class DagGraph:
293
299
  all_relations.append(edge)
294
300
  if predecessor not in visited:
295
301
  visited.add(predecessor)
296
- queue.append(predecessor)
302
+ queue.append((predecessor, current_depth + 1))
297
303
  return DagGraph(edges=all_relations)
298
304
 
299
- def find_downstream(self, node: str) -> FindResult:
305
+ def find_downstream(self, node: str, depth: int | None = None) -> FindResult:
300
306
  """
301
307
  查找所有下游依赖的边
302
308
 
303
309
  Args:
304
310
  node: 起始节点
311
+ depth: 最大深度,None 表示全链
305
312
 
306
313
  Returns:
307
314
  下游边的集合
@@ -309,12 +316,17 @@ class DagGraph:
309
316
  if node not in self.__nodes:
310
317
  return NodeNotFoundException(f"节点不存在:{node}")
311
318
 
312
- queue = deque([node])
319
+ queue = deque([(node, 0)]) # (node, current_depth)
313
320
  visited = set([node])
314
321
  all_relations = []
315
322
 
316
323
  while queue:
317
- current = queue.popleft()
324
+ current, current_depth = queue.popleft()
325
+
326
+ # 检查深度限制
327
+ if depth is not None and current_depth >= depth:
328
+ continue
329
+
318
330
  # 使用邻接表直接查找下游节点,提升性能
319
331
  neighbors = self.__adjacency_list.get(current, set())
320
332
  for neighbor in neighbors:
@@ -322,7 +334,7 @@ class DagGraph:
322
334
  all_relations.append(edge)
323
335
  if neighbor not in visited:
324
336
  visited.add(neighbor)
325
- queue.append(neighbor)
337
+ queue.append((neighbor, current_depth + 1))
326
338
 
327
339
  return DagGraph(edges=all_relations)
328
340
 
@@ -377,9 +389,7 @@ class DagGraph:
377
389
 
378
390
  # 替换模板变量
379
391
  html_content = template.safe_substitute(
380
- title="DAG Visualization",
381
- mermaid_content=mermaid_content,
382
- lineage_data=lineage_data
392
+ title="DAG Visualization", mermaid_content=mermaid_content, lineage_data=lineage_data
383
393
  )
384
394
 
385
395
  return html_content
@@ -26,7 +26,7 @@ def test_get_all_root_tables():
26
26
 
27
27
 
28
28
  def test_search_related_upstream_tables():
29
- a = utils.search_related_upstream_tables(sql_stmt_str, "ods_hive.ods_order")
29
+ a = utils.search_related_upstream_tables(sql_stmt_str, "dws.xxx")
30
30
  if isinstance(a, Tuple):
31
31
  print(utils.list_command_text(a[0]))
32
32
  else:
@@ -88,5 +88,5 @@ def test_search_related_root_tables():
88
88
 
89
89
  def test_table_count():
90
90
  print(utils.table_count(sql_stmt_str))
91
- for table, count in utils.table_count(sql_stmt_str, 'ods_hive.ods_order'):
91
+ for table, count in utils.table_count(sql_stmt_str, "ods_hive.ods_order"):
92
92
  print(f"{table}: {count}")
@@ -198,7 +198,7 @@ def search_related_root_tables(sql_stmt_str: str, target_table: str) -> SearchRe
198
198
  return related_graph
199
199
 
200
200
 
201
- def search_related_upstream_tables(sql_stmt_str: str, target_table: str) -> SearchResult:
201
+ def search_related_upstream_tables(sql_stmt_str: str, target_table: str, depth: int | None = None) -> SearchResult:
202
202
  """
203
203
  从目标表向上追溯,获取所有上游依赖表(包含所有中间表)
204
204
 
@@ -215,7 +215,7 @@ def search_related_upstream_tables(sql_stmt_str: str, target_table: str) -> Sear
215
215
  (上游表列表, 上游依赖子图)
216
216
  """
217
217
  _, _, dg = __build_tables_and_graph(sql_stmt_str)
218
- related_graph = dg.find_upstream(target_table)
218
+ related_graph = dg.find_upstream(target_table, depth=depth)
219
219
  if isinstance(related_graph, DagGraph):
220
220
  if related_graph.empty:
221
221
  return [], DagGraph()
@@ -230,7 +230,7 @@ def search_related_upstream_tables(sql_stmt_str: str, target_table: str) -> Sear
230
230
  return related_graph
231
231
 
232
232
 
233
- def search_related_downstream_tables(sql_stmt_str: str, target_table: str) -> SearchResult:
233
+ def search_related_downstream_tables(sql_stmt_str: str, target_table: str, depth: int | None = None) -> SearchResult:
234
234
  """
235
235
  从目标表向下追溯,获取所有下游依赖表(包含所有中间表)
236
236
 
@@ -247,7 +247,7 @@ def search_related_downstream_tables(sql_stmt_str: str, target_table: str) -> Se
247
247
  (下游表列表, 下游依赖子图)
248
248
  """
249
249
  _, _, dg = __build_tables_and_graph(sql_stmt_str)
250
- related_graph = dg.find_downstream(target_table)
250
+ related_graph = dg.find_downstream(target_table, depth=depth)
251
251
  if isinstance(related_graph, DagGraph):
252
252
  if related_graph.empty:
253
253
  return [], DagGraph()
@@ -262,7 +262,7 @@ def search_related_downstream_tables(sql_stmt_str: str, target_table: str) -> Se
262
262
  return related_graph
263
263
 
264
264
 
265
- def search_related_tables(sql_stmt_str: str, target_table: str) -> SearchResult:
265
+ def search_related_tables(sql_stmt_str: str, target_table: str, depth: int | None = None) -> SearchResult:
266
266
  """
267
267
  从目标表双向追溯,获取所有相关表(上游+下游,不包含自身)
268
268
 
@@ -279,9 +279,9 @@ def search_related_tables(sql_stmt_str: str, target_table: str) -> SearchResult:
279
279
  (相关表列表, 完整依赖子图)
280
280
  """
281
281
  _, _, dg = __build_tables_and_graph(sql_stmt_str)
282
- related_graph_upstream = dg.find_upstream(target_table)
282
+ related_graph_upstream = dg.find_upstream(target_table, depth=depth)
283
283
  if isinstance(related_graph_upstream, DagGraph):
284
- related_graph_downstream = dg.find_downstream(target_table)
284
+ related_graph_downstream = dg.find_downstream(target_table, depth=depth)
285
285
  if isinstance(related_graph_downstream, DagGraph):
286
286
  new_graph = related_graph_downstream.union(related_graph_upstream)
287
287
 
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes