PyPI - sqlh - Versions diffs - 0.2.7__tar.gz → 0.2.8__tar.gz - Mend

sqlh 0.2.7tar.gz → 0.2.8tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (17) hide show

{sqlh-0.2.7 → sqlh-0.2.8}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.3
 Name: sqlh
-Version: 0.2.7
+Version: 0.2.8
 Summary: A lightweight SQL lineage analysis library for tracking table dependencies in data pipelines
 Keywords: sql,lineage,data-pipeline,dag,dependency,database,etl,data-engineering
 Maintainer: Perry DU

{sqlh-0.2.7 → sqlh-0.2.8}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "sqlh"
-version = "0.2.7"
+version = "0.2.8"
 maintainers = [
   {name = "Perry DU", email = "duneite@gmail.com"}
 ]

{sqlh-0.2.7 → sqlh-0.2.8}/sqlh/__init__.py RENAMED Viewed

@@ -14,7 +14,7 @@ from .utils import (
     table_count
 )
-__version__ = "0.2.7"
+__version__ = "0.2.8"
 __all__ = [
     "split_sql",

{sqlh-0.2.7 → sqlh-0.2.8}/sqlh/cli.py RENAMED Viewed

@@ -1,4 +1,5 @@
 import argparse
+import json
 import sys
 from pathlib import Path
@@ -18,8 +19,8 @@ from .utils import (
     search_related_root_tables,
     search_related_tables,
     search_related_upstream_tables,
+    table_count,
     visualize_dag,
-    table_count
 )
@@ -80,6 +81,7 @@ def arg_parse():
     search_direction.add_argument("--downstream", action="store_true", help="search downstream tables (dependents)")
     search_direction.add_argument("--all", action="store_true", help="search both upstream and downstream tables")
     search_parser.add_argument("-t", "--table", help="table name to search", required=True)
+    search_parser.add_argument("-d", "--depth", help="search depth", default=None, type=int)
     search_parser.add_argument("-h", "--help", action="help", default=argparse.SUPPRESS, help="show this help message")
     # web 子命令
@@ -137,10 +139,10 @@ def main():
             output = search_related_root_tables(sql_stmt_str, args.table)
             sub_command_arg = "--root"
         elif args.upstream:
-            output = search_related_upstream_tables(sql_stmt_str, args.table)
+            output = search_related_upstream_tables(sql_stmt_str, args.table, args.depth)
             sub_command_arg = "--upstream"
         elif args.downstream:
-            output = search_related_downstream_tables(sql_stmt_str, args.table)
+            output = search_related_downstream_tables(sql_stmt_str, args.table, args.depth)
             sub_command_arg = "--downstream"
         elif args.all:
             output = search_related_tables(sql_stmt_str, args.table)
@@ -161,7 +163,21 @@ def main():
         print(f"open web page: {html_file_path}")
         visualize_dag(get_all_dag(sql_stmt_str), template_type="dagre", filename=html_file_path)
         return
     elif args.command == "table-count":
-        for table, count in table_count(sql_stmt_str, args.table):
-            print(f"{table}: {count}")
+        table_count_lst = table_count(sql_stmt_str, args.table)
+        if args.output_format == "text":
+            for table, count in table_count_lst:
+                print(f"{table}: {count}")
+        elif args.output_format == "json":
+            result = {
+                "status": "ok",
+                "command": "table-count",
+                "data": table_count_lst,
+                "meta": {"table_count": len(table_count_lst)},
+            }
+            print(json.dumps(result, indent=2, ensure_ascii=False))
+        else:
+            print(f"Error: Not Supported output format: {args.output_format}")
+            sys.exit(1)

{sqlh-0.2.7 → sqlh-0.2.8}/sqlh/core/graph.py RENAMED Viewed

@@ -267,12 +267,13 @@ class DagGraph:
         return False
-    def find_upstream(self, node: str) -> FindResult:
+    def find_upstream(self, node: str, depth: int | None = None) -> FindResult:
         """
         查找所有上游依赖的边
         Args:
             node: 目标节点
+            depth: 最大深度，None 表示全链
         Returns:
             上游边的集合
@@ -280,12 +281,17 @@ class DagGraph:
         if node not in self.__nodes:
             return NodeNotFoundException(f"节点不存在:{node}")
-        queue = deque([node])
+        queue = deque([(node, 0)])  # (node, current_depth)
         visited = set([node])
         all_relations = []
         while queue:
-            current = queue.popleft()
+            # current = queue.popleft()
+            current, current_depth = queue.popleft()
+            # 检查深度限制
+            if depth is not None and current_depth >= depth:
+                continue
             # 使用反向邻接表直接查找上游节点，提升性能
             predecessors = self.__reverse_adjacency_list.get(current, set())
             for predecessor in predecessors:
@@ -293,15 +299,16 @@ class DagGraph:
                 all_relations.append(edge)
                 if predecessor not in visited:
                     visited.add(predecessor)
-                    queue.append(predecessor)
+                    queue.append((predecessor, current_depth + 1))
         return DagGraph(edges=all_relations)
-    def find_downstream(self, node: str) -> FindResult:
+    def find_downstream(self, node: str, depth: int | None = None) -> FindResult:
         """
         查找所有下游依赖的边
         Args:
             node: 起始节点
+            depth: 最大深度，None 表示全链
         Returns:
             下游边的集合
@@ -309,12 +316,17 @@ class DagGraph:
         if node not in self.__nodes:
             return NodeNotFoundException(f"节点不存在:{node}")
-        queue = deque([node])
+        queue = deque([(node, 0)])  # (node, current_depth)
         visited = set([node])
         all_relations = []
         while queue:
-            current = queue.popleft()
+            current, current_depth = queue.popleft()
+            # 检查深度限制
+            if depth is not None and current_depth >= depth:
+                continue
             # 使用邻接表直接查找下游节点，提升性能
             neighbors = self.__adjacency_list.get(current, set())
             for neighbor in neighbors:
@@ -322,7 +334,7 @@ class DagGraph:
                 all_relations.append(edge)
                 if neighbor not in visited:
                     visited.add(neighbor)
-                    queue.append(neighbor)
+                    queue.append((neighbor, current_depth + 1))
         return DagGraph(edges=all_relations)
@@ -377,9 +389,7 @@ class DagGraph:
         # 替换模板变量
         html_content = template.safe_substitute(
-            title="DAG Visualization",
-            mermaid_content=mermaid_content,
-            lineage_data=lineage_data
+            title="DAG Visualization", mermaid_content=mermaid_content, lineage_data=lineage_data
         )
         return html_content

{sqlh-0.2.7 → sqlh-0.2.8}/sqlh/tests/test_utils.py RENAMED Viewed

@@ -26,7 +26,7 @@ def test_get_all_root_tables():
 def test_search_related_upstream_tables():
-    a = utils.search_related_upstream_tables(sql_stmt_str, "ods_hive.ods_order")
+    a = utils.search_related_upstream_tables(sql_stmt_str, "dws.xxx")
     if isinstance(a, Tuple):
         print(utils.list_command_text(a[0]))
     else:
@@ -88,5 +88,5 @@ def test_search_related_root_tables():
 def test_table_count():
     print(utils.table_count(sql_stmt_str))
-    for table, count in utils.table_count(sql_stmt_str, 'ods_hive.ods_order'):
+    for table, count in utils.table_count(sql_stmt_str, "ods_hive.ods_order"):
         print(f"{table}: {count}")

{sqlh-0.2.7 → sqlh-0.2.8}/sqlh/utils.py RENAMED Viewed

@@ -198,7 +198,7 @@ def search_related_root_tables(sql_stmt_str: str, target_table: str) -> SearchRe
         return related_graph
-def search_related_upstream_tables(sql_stmt_str: str, target_table: str) -> SearchResult:
+def search_related_upstream_tables(sql_stmt_str: str, target_table: str, depth: int | None = None) -> SearchResult:
     """
     从目标表向上追溯,获取所有上游依赖表(包含所有中间表)
@@ -215,7 +215,7 @@ def search_related_upstream_tables(sql_stmt_str: str, target_table: str) -> Sear
         (上游表列表, 上游依赖子图)
     """
     _, _, dg = __build_tables_and_graph(sql_stmt_str)
-    related_graph = dg.find_upstream(target_table)
+    related_graph = dg.find_upstream(target_table, depth=depth)
     if isinstance(related_graph, DagGraph):
         if related_graph.empty:
             return [], DagGraph()
@@ -230,7 +230,7 @@ def search_related_upstream_tables(sql_stmt_str: str, target_table: str) -> Sear
         return related_graph
-def search_related_downstream_tables(sql_stmt_str: str, target_table: str) -> SearchResult:
+def search_related_downstream_tables(sql_stmt_str: str, target_table: str, depth: int | None = None) -> SearchResult:
     """
     从目标表向下追溯,获取所有下游依赖表(包含所有中间表)
@@ -247,7 +247,7 @@ def search_related_downstream_tables(sql_stmt_str: str, target_table: str) -> Se
         (下游表列表, 下游依赖子图)
     """
     _, _, dg = __build_tables_and_graph(sql_stmt_str)
-    related_graph = dg.find_downstream(target_table)
+    related_graph = dg.find_downstream(target_table, depth=depth)
     if isinstance(related_graph, DagGraph):
         if related_graph.empty:
             return [], DagGraph()
@@ -262,7 +262,7 @@ def search_related_downstream_tables(sql_stmt_str: str, target_table: str) -> Se
         return related_graph
-def search_related_tables(sql_stmt_str: str, target_table: str) -> SearchResult:
+def search_related_tables(sql_stmt_str: str, target_table: str, depth: int | None = None) -> SearchResult:
     """
     从目标表双向追溯,获取所有相关表(上游+下游,不包含自身)
@@ -279,9 +279,9 @@ def search_related_tables(sql_stmt_str: str, target_table: str) -> SearchResult:
         (相关表列表, 完整依赖子图)
     """
     _, _, dg = __build_tables_and_graph(sql_stmt_str)
-    related_graph_upstream = dg.find_upstream(target_table)
+    related_graph_upstream = dg.find_upstream(target_table, depth=depth)
     if isinstance(related_graph_upstream, DagGraph):
-        related_graph_downstream = dg.find_downstream(target_table)
+        related_graph_downstream = dg.find_downstream(target_table, depth=depth)
         if isinstance(related_graph_downstream, DagGraph):
             new_graph = related_graph_downstream.union(related_graph_upstream)