sqlh 0.2.6__tar.gz → 0.2.8__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: sqlh
3
- Version: 0.2.6
3
+ Version: 0.2.8
4
4
  Summary: A lightweight SQL lineage analysis library for tracking table dependencies in data pipelines
5
5
  Keywords: sql,lineage,data-pipeline,dag,dependency,database,etl,data-engineering
6
6
  Maintainer: Perry DU
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "sqlh"
3
- version = "0.2.6"
3
+ version = "0.2.8"
4
4
  maintainers = [
5
5
  {name = "Perry DU", email = "duneite@gmail.com"}
6
6
  ]
@@ -11,9 +11,10 @@ from .utils import (
11
11
  search_related_tables,
12
12
  search_related_upstream_tables,
13
13
  visualize_dag,
14
+ table_count
14
15
  )
15
16
 
16
- __version__ = "0.2.6"
17
+ __version__ = "0.2.8"
17
18
 
18
19
  __all__ = [
19
20
  "split_sql",
@@ -29,4 +30,5 @@ __all__ = [
29
30
  "search_related_tables",
30
31
  "search_command_json",
31
32
  "visualize_dag",
33
+ "table_count"
32
34
  ]
@@ -1,4 +1,5 @@
1
1
  import argparse
2
+ import json
2
3
  import sys
3
4
  from pathlib import Path
4
5
 
@@ -18,6 +19,7 @@ from .utils import (
18
19
  search_related_root_tables,
19
20
  search_related_tables,
20
21
  search_related_upstream_tables,
22
+ table_count,
21
23
  visualize_dag,
22
24
  )
23
25
 
@@ -79,6 +81,7 @@ def arg_parse():
79
81
  search_direction.add_argument("--downstream", action="store_true", help="search downstream tables (dependents)")
80
82
  search_direction.add_argument("--all", action="store_true", help="search both upstream and downstream tables")
81
83
  search_parser.add_argument("-t", "--table", help="table name to search", required=True)
84
+ search_parser.add_argument("-d", "--depth", help="search depth", default=None, type=int)
82
85
  search_parser.add_argument("-h", "--help", action="help", default=argparse.SUPPRESS, help="show this help message")
83
86
 
84
87
  # web 子命令
@@ -91,6 +94,15 @@ def arg_parse():
91
94
  web_parser.add_argument("--html-path", help="html file path for visualization", default=".")
92
95
  web_parser.add_argument("-h", "--help", action="help", default=argparse.SUPPRESS, help="show this help message")
93
96
 
97
+ # table-count 子命令
98
+ table_count_parser = subparsers.add_parser(
99
+ "table-count",
100
+ parents=[parent_parser],
101
+ help="count the number of tables",
102
+ add_help=False,
103
+ )
104
+ table_count_parser.add_argument("-t", "--table", help="table name to search")
105
+ table_count_parser.add_argument("-h", "--help", action="help", default=argparse.SUPPRESS, help="show this help message")
94
106
  return parser.parse_args()
95
107
 
96
108
 
@@ -127,10 +139,10 @@ def main():
127
139
  output = search_related_root_tables(sql_stmt_str, args.table)
128
140
  sub_command_arg = "--root"
129
141
  elif args.upstream:
130
- output = search_related_upstream_tables(sql_stmt_str, args.table)
142
+ output = search_related_upstream_tables(sql_stmt_str, args.table, args.depth)
131
143
  sub_command_arg = "--upstream"
132
144
  elif args.downstream:
133
- output = search_related_downstream_tables(sql_stmt_str, args.table)
145
+ output = search_related_downstream_tables(sql_stmt_str, args.table, args.depth)
134
146
  sub_command_arg = "--downstream"
135
147
  elif args.all:
136
148
  output = search_related_tables(sql_stmt_str, args.table)
@@ -151,3 +163,21 @@ def main():
151
163
  print(f"open web page: {html_file_path}")
152
164
  visualize_dag(get_all_dag(sql_stmt_str), template_type="dagre", filename=html_file_path)
153
165
  return
166
+
167
+ elif args.command == "table-count":
168
+ table_count_lst = table_count(sql_stmt_str, args.table)
169
+ if args.output_format == "text":
170
+ for table, count in table_count_lst:
171
+ print(f"{table}: {count}")
172
+
173
+ elif args.output_format == "json":
174
+ result = {
175
+ "status": "ok",
176
+ "command": "table-count",
177
+ "data": table_count_lst,
178
+ "meta": {"table_count": len(table_count_lst)},
179
+ }
180
+ print(json.dumps(result, indent=2, ensure_ascii=False))
181
+ else:
182
+ print(f"Error: Not Supported output format: {args.output_format}")
183
+ sys.exit(1)
@@ -267,12 +267,13 @@ class DagGraph:
267
267
 
268
268
  return False
269
269
 
270
- def find_upstream(self, node: str) -> FindResult:
270
+ def find_upstream(self, node: str, depth: int | None = None) -> FindResult:
271
271
  """
272
272
  查找所有上游依赖的边
273
273
 
274
274
  Args:
275
275
  node: 目标节点
276
+ depth: 最大深度,None 表示全链
276
277
 
277
278
  Returns:
278
279
  上游边的集合
@@ -280,12 +281,17 @@ class DagGraph:
280
281
  if node not in self.__nodes:
281
282
  return NodeNotFoundException(f"节点不存在:{node}")
282
283
 
283
- queue = deque([node])
284
+ queue = deque([(node, 0)]) # (node, current_depth)
284
285
  visited = set([node])
285
286
  all_relations = []
286
287
 
287
288
  while queue:
288
- current = queue.popleft()
289
+ # current = queue.popleft()
290
+ current, current_depth = queue.popleft()
291
+ # 检查深度限制
292
+ if depth is not None and current_depth >= depth:
293
+ continue
294
+
289
295
  # 使用反向邻接表直接查找上游节点,提升性能
290
296
  predecessors = self.__reverse_adjacency_list.get(current, set())
291
297
  for predecessor in predecessors:
@@ -293,15 +299,16 @@ class DagGraph:
293
299
  all_relations.append(edge)
294
300
  if predecessor not in visited:
295
301
  visited.add(predecessor)
296
- queue.append(predecessor)
302
+ queue.append((predecessor, current_depth + 1))
297
303
  return DagGraph(edges=all_relations)
298
304
 
299
- def find_downstream(self, node: str) -> FindResult:
305
+ def find_downstream(self, node: str, depth: int | None = None) -> FindResult:
300
306
  """
301
307
  查找所有下游依赖的边
302
308
 
303
309
  Args:
304
310
  node: 起始节点
311
+ depth: 最大深度,None 表示全链
305
312
 
306
313
  Returns:
307
314
  下游边的集合
@@ -309,12 +316,17 @@ class DagGraph:
309
316
  if node not in self.__nodes:
310
317
  return NodeNotFoundException(f"节点不存在:{node}")
311
318
 
312
- queue = deque([node])
319
+ queue = deque([(node, 0)]) # (node, current_depth)
313
320
  visited = set([node])
314
321
  all_relations = []
315
322
 
316
323
  while queue:
317
- current = queue.popleft()
324
+ current, current_depth = queue.popleft()
325
+
326
+ # 检查深度限制
327
+ if depth is not None and current_depth >= depth:
328
+ continue
329
+
318
330
  # 使用邻接表直接查找下游节点,提升性能
319
331
  neighbors = self.__adjacency_list.get(current, set())
320
332
  for neighbor in neighbors:
@@ -322,7 +334,7 @@ class DagGraph:
322
334
  all_relations.append(edge)
323
335
  if neighbor not in visited:
324
336
  visited.add(neighbor)
325
- queue.append(neighbor)
337
+ queue.append((neighbor, current_depth + 1))
326
338
 
327
339
  return DagGraph(edges=all_relations)
328
340
 
@@ -377,9 +389,7 @@ class DagGraph:
377
389
 
378
390
  # 替换模板变量
379
391
  html_content = template.safe_substitute(
380
- title="DAG Visualization",
381
- mermaid_content=mermaid_content,
382
- lineage_data=lineage_data
392
+ title="DAG Visualization", mermaid_content=mermaid_content, lineage_data=lineage_data
383
393
  )
384
394
 
385
395
  return html_content
@@ -26,7 +26,7 @@ def test_get_all_root_tables():
26
26
 
27
27
 
28
28
  def test_search_related_upstream_tables():
29
- a = utils.search_related_upstream_tables(sql_stmt_str, "dws.dws_cy_cust_ltst_active_rec")
29
+ a = utils.search_related_upstream_tables(sql_stmt_str, "dws.xxx")
30
30
  if isinstance(a, Tuple):
31
31
  print(utils.list_command_text(a[0]))
32
32
  else:
@@ -34,7 +34,7 @@ def test_search_related_upstream_tables():
34
34
 
35
35
 
36
36
  def test_search_related_downstream_tables():
37
- a = utils.search_related_downstream_tables(sql_stmt_str, "ods.ods_plr_dwm_hr_xy_shop_performance_all")
37
+ a = utils.search_related_downstream_tables(sql_stmt_str, "ods_hive.ods_order")
38
38
  if isinstance(a, Tuple):
39
39
  print(utils.list_command_text(a[0]))
40
40
  else:
@@ -42,7 +42,7 @@ def test_search_related_downstream_tables():
42
42
 
43
43
 
44
44
  def test_search_related_tables():
45
- a = utils.search_related_tables(sql_stmt_str, "dim.dim_shopinfo")
45
+ a = utils.search_related_tables(sql_stmt_str, "ods_hive.ods_order")
46
46
  if isinstance(a, Tuple):
47
47
  print(utils.list_command_text(a[0]))
48
48
  else:
@@ -78,9 +78,15 @@ def test_search_command_json():
78
78
 
79
79
 
80
80
  def test_search_related_root_tables():
81
- output = utils.search_related_root_tables(sql_stmt_str, "ods_hive.ods_ec_staff")
81
+ output = utils.search_related_root_tables(sql_stmt_str, "ods_hive.ods_order")
82
82
  # print(utils.list_command_text(output[0]))
83
83
  if isinstance(output, Tuple):
84
84
  print(utils.list_command_text(output[0]))
85
85
  else:
86
86
  print(output)
87
+
88
+
89
+ def test_table_count():
90
+ print(utils.table_count(sql_stmt_str))
91
+ for table, count in utils.table_count(sql_stmt_str, "ods_hive.ods_order"):
92
+ print(f"{table}: {count}")
@@ -21,6 +21,7 @@ Example:
21
21
 
22
22
  import json
23
23
  import re
24
+ from collections import Counter
24
25
  from pathlib import Path
25
26
  from typing import List, Literal, Tuple, Union
26
27
 
@@ -197,7 +198,7 @@ def search_related_root_tables(sql_stmt_str: str, target_table: str) -> SearchRe
197
198
  return related_graph
198
199
 
199
200
 
200
- def search_related_upstream_tables(sql_stmt_str: str, target_table: str) -> SearchResult:
201
+ def search_related_upstream_tables(sql_stmt_str: str, target_table: str, depth: int | None = None) -> SearchResult:
201
202
  """
202
203
  从目标表向上追溯,获取所有上游依赖表(包含所有中间表)
203
204
 
@@ -214,7 +215,7 @@ def search_related_upstream_tables(sql_stmt_str: str, target_table: str) -> Sear
214
215
  (上游表列表, 上游依赖子图)
215
216
  """
216
217
  _, _, dg = __build_tables_and_graph(sql_stmt_str)
217
- related_graph = dg.find_upstream(target_table)
218
+ related_graph = dg.find_upstream(target_table, depth=depth)
218
219
  if isinstance(related_graph, DagGraph):
219
220
  if related_graph.empty:
220
221
  return [], DagGraph()
@@ -229,7 +230,7 @@ def search_related_upstream_tables(sql_stmt_str: str, target_table: str) -> Sear
229
230
  return related_graph
230
231
 
231
232
 
232
- def search_related_downstream_tables(sql_stmt_str: str, target_table: str) -> SearchResult:
233
+ def search_related_downstream_tables(sql_stmt_str: str, target_table: str, depth: int | None = None) -> SearchResult:
233
234
  """
234
235
  从目标表向下追溯,获取所有下游依赖表(包含所有中间表)
235
236
 
@@ -246,7 +247,7 @@ def search_related_downstream_tables(sql_stmt_str: str, target_table: str) -> Se
246
247
  (下游表列表, 下游依赖子图)
247
248
  """
248
249
  _, _, dg = __build_tables_and_graph(sql_stmt_str)
249
- related_graph = dg.find_downstream(target_table)
250
+ related_graph = dg.find_downstream(target_table, depth=depth)
250
251
  if isinstance(related_graph, DagGraph):
251
252
  if related_graph.empty:
252
253
  return [], DagGraph()
@@ -261,7 +262,7 @@ def search_related_downstream_tables(sql_stmt_str: str, target_table: str) -> Se
261
262
  return related_graph
262
263
 
263
264
 
264
- def search_related_tables(sql_stmt_str: str, target_table: str) -> SearchResult:
265
+ def search_related_tables(sql_stmt_str: str, target_table: str, depth: int | None = None) -> SearchResult:
265
266
  """
266
267
  从目标表双向追溯,获取所有相关表(上游+下游,不包含自身)
267
268
 
@@ -278,9 +279,9 @@ def search_related_tables(sql_stmt_str: str, target_table: str) -> SearchResult:
278
279
  (相关表列表, 完整依赖子图)
279
280
  """
280
281
  _, _, dg = __build_tables_and_graph(sql_stmt_str)
281
- related_graph_upstream = dg.find_upstream(target_table)
282
+ related_graph_upstream = dg.find_upstream(target_table, depth=depth)
282
283
  if isinstance(related_graph_upstream, DagGraph):
283
- related_graph_downstream = dg.find_downstream(target_table)
284
+ related_graph_downstream = dg.find_downstream(target_table, depth=depth)
284
285
  if isinstance(related_graph_downstream, DagGraph):
285
286
  new_graph = related_graph_downstream.union(related_graph_upstream)
286
287
 
@@ -363,3 +364,15 @@ def search_command_text(search_result: SearchResult) -> str:
363
364
  return "- " + "\n- ".join(sorted(related_tables))
364
365
  else:
365
366
  return str(search_result)
367
+
368
+
369
+ def table_count(sql_stmt_str: str, table_name: str | None = None):
370
+ tables = get_all_tables(sql_stmt_str)
371
+ counter = Counter()
372
+ if table_name:
373
+ counter[table_name] = sql_stmt_str.count(table_name.lower())
374
+ return [(table_name, counter[table_name])]
375
+ else:
376
+ for table in tables:
377
+ counter[table] += sql_stmt_str.count(table.lower())
378
+ return [(table, counter[table]) for table in sorted(counter, key=lambda x: counter[x], reverse=True)]
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes