PyPI - sqlh - Versions diffs - 0.2.3__py3-none-any.whl - Mend

sqlh 0.2.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (18) hide show

sqlh/.DS_Store +0 -0
sqlh/__init__.py +32 -0
sqlh/cli.py +153 -0
sqlh/core/graph.py +385 -0
sqlh/core/helper.py +444 -0
sqlh/core/keywords.py +62 -0
sqlh/static/dagre_template.html +857 -0
sqlh/static/mermaid_template.html +28 -0
sqlh/tests/test_cli.py +3 -0
sqlh/tests/test_graph.py +55 -0
sqlh/tests/test_import.py +16 -0
sqlh/tests/test_sqlhelper.py +36 -0
sqlh/tests/test_utils.py +86 -0
sqlh/utils.py +365 -0
sqlh-0.2.3.dist-info/METADATA +338 -0
sqlh-0.2.3.dist-info/RECORD +18 -0
sqlh-0.2.3.dist-info/WHEEL +4 -0
sqlh-0.2.3.dist-info/entry_points.txt +3 -0

sqlh/.DS_Store ADDED Viewed

Binary file

sqlh/__init__.py ADDED Viewed

@@ -0,0 +1,32 @@
+from .core.graph import DagGraph
+from .core.helper import split_sql, trim_comment
+from .utils import (
+    get_all_leaf_tables,
+    get_all_root_tables,
+    get_all_tables,
+    read_sql_from_directory,
+    search_command_json,
+    search_related_downstream_tables,
+    search_related_root_tables,
+    search_related_tables,
+    search_related_upstream_tables,
+    visualize_dag,
+)
+__version__ = "0.2.3"
+__all__ = [
+    "split_sql",
+    "trim_comment",
+    "DagGraph",
+    "read_sql_from_directory",
+    "get_all_tables",
+    "get_all_root_tables",
+    "get_all_leaf_tables",
+    "search_related_root_tables",
+    "search_related_upstream_tables",
+    "search_related_downstream_tables",
+    "search_related_tables",
+    "search_command_json",
+    "visualize_dag",
+]

sqlh/cli.py ADDED Viewed

@@ -0,0 +1,153 @@
+import argparse
+import sys
+from pathlib import Path
+from sqlh import __version__
+from .utils import (
+    get_all_dag,
+    get_all_leaf_tables,
+    get_all_root_tables,
+    get_all_tables,
+    list_command_json,
+    list_command_text,
+    read_sql_from_directory,
+    search_command_json,
+    search_command_text,
+    search_related_downstream_tables,
+    search_related_root_tables,
+    search_related_tables,
+    search_related_upstream_tables,
+    visualize_dag,
+)
+def _create_parent_parser():
+    """创建包含共享参数的父解析器"""
+    parent_parser = argparse.ArgumentParser(add_help=False)
+    # 共享参数: 所有子命令都支持 -s/--sql 或 -p/--path
+    sql_or_path = parent_parser.add_mutually_exclusive_group(required=True)
+    sql_or_path.add_argument("-s", "--sql", dest="sql", help="sql statement")
+    sql_or_path.add_argument("-p", "--path", dest="path", help="sql file or directory path")
+    # 共享参数: 所有子命令都支持输出格式
+    parent_parser.add_argument(
+        "-f",
+        "--output-format",
+        choices=["json", "text", "web", "html"],
+        default="json",
+        help="output format",
+    )
+    return parent_parser
+def arg_parse():
+    parser = argparse.ArgumentParser(usage="%(prog)s [OPTIONS] <COMMAND>", description="mini-sqllineage")
+    parser.add_argument("-v", "--version", action="version", version=__version__)
+    # 获取共享参数的父解析器
+    parent_parser = _create_parent_parser()
+    # 子命令
+    subparsers = parser.add_subparsers(dest="command", required=True)
+    # list-tables 子命令
+    list_parser = subparsers.add_parser(
+        "list",
+        parents=[parent_parser],
+        help="list all tables / root tables / leaf tables",
+        add_help=False,
+    )
+    list_target = list_parser.add_mutually_exclusive_group(required=True)
+    list_target.add_argument("--all", action="store_true", help="list all tables")
+    list_target.add_argument("--root", action="store_true", help="list root tables (tables with no dependencies)")
+    list_target.add_argument("--leaf", action="store_true", help="list leaf tables (tables not used by others)")
+    list_parser.add_argument("-h", "--help", action="help", default=argparse.SUPPRESS, help="show this help message")
+    # search-table 子命令
+    search_parser = subparsers.add_parser(
+        "search",
+        parents=[parent_parser],
+        help="search table relationships",
+        add_help=False,
+    )
+    search_direction = search_parser.add_mutually_exclusive_group(required=True)
+    search_direction.add_argument("--root", action="store_true", help="search root tables of the specified table")
+    search_direction.add_argument("--upstream", action="store_true", help="search upstream tables (dependencies)")
+    search_direction.add_argument("--downstream", action="store_true", help="search downstream tables (dependents)")
+    search_direction.add_argument("--all", action="store_true", help="search both upstream and downstream tables")
+    search_parser.add_argument("-t", "--table", help="table name to search", required=True)
+    search_parser.add_argument("-h", "--help", action="help", default=argparse.SUPPRESS, help="show this help message")
+    # web 子命令
+    web_parser = subparsers.add_parser(
+        "web",
+        parents=[parent_parser],
+        help="start web server for visualization",
+        add_help=False,
+    )
+    web_parser.add_argument("--html-path", help="html file path for visualization", default=".")
+    web_parser.add_argument("-h", "--help", action="help", default=argparse.SUPPRESS, help="show this help message")
+    return parser.parse_args()
+def main():
+    args = arg_parse()
+    if args.sql:
+        sql_stmt_str = args.sql
+    else:
+        try:
+            sql_stmt_str = read_sql_from_directory(args.path)
+        except FileNotFoundError:
+            print(f"Error: File not found: {args.path}")
+            sys.exit(1)
+    if args.command == "list":
+        if args.all:
+            output = get_all_tables(sql_stmt_str)
+            sub_command_arg = "--all"
+        elif args.root:
+            output = get_all_root_tables(sql_stmt_str)
+            sub_command_arg = "--root"
+        elif args.leaf:
+            output = get_all_leaf_tables(sql_stmt_str)
+            sub_command_arg = "--leaf"
+        if args.output_format == "json":
+            print(list_command_json(output, sub_command_arg))
+        else:
+            print(list_command_text(output))
+    elif args.command == "search":
+        if args.root:
+            output = search_related_root_tables(sql_stmt_str, args.table)
+            sub_command_arg = "--root"
+        elif args.upstream:
+            output = search_related_upstream_tables(sql_stmt_str, args.table)
+            sub_command_arg = "--upstream"
+        elif args.downstream:
+            output = search_related_downstream_tables(sql_stmt_str, args.table)
+            sub_command_arg = "--downstream"
+        elif args.all:
+            output = search_related_tables(sql_stmt_str, args.table)
+            sub_command_arg = "--all"
+        if args.output_format == "json":
+            print(search_command_json(output, sub_command_arg))
+        elif args.output_format in ["web", "html"]:
+            if isinstance(output, tuple):
+                visualize_dag(output[1], template_type="mermaid", filename="lineage_mermaid.html")
+            else:
+                print(output)
+        elif args.output_format == "text":
+            print(search_command_text(output))
+    elif args.command == "web":
+        html_file_path = Path(args.html_path) / "lineage_dagre.html"
+        print(f"open web page: {html_file_path}")
+        visualize_dag(get_all_dag(sql_stmt_str), template_type="dagre", filename=html_file_path)
+        return

sqlh/core/graph.py ADDED Viewed

@@ -0,0 +1,385 @@
+"""
+DAG (Directed Acyclic Graph) implementation for table dependency tracking.
+This module provides a graph data structure for representing and analyzing
+table dependencies in SQL queries, with support for:
+- Node and edge management
+- Upstream/downstream traversal
+- Cycle detection
+- Mermaid.js format visualization
+Example:
+    from src.graph import DagGraph
+    dg = DagGraph()
+    dg.add_edge("table_a", "table_b")
+    dg.add_edge("table_b", "table_c")
+    dg.print_all_edges_to_mermaid()
+"""
+from collections import deque
+from pathlib import Path
+from string import Template
+from typing import Literal, Union
+class NodeNotFoundException(Exception):
+    """Exception raised when a node is not found in the graph."""
+    pass
+class NodeExistsException(Exception):
+    """Exception raised when attempting to add a node that already exists."""
+    pass
+class CycleDetectedException(Exception):
+    """Exception raised when a cycle is detected in the graph."""
+    pass
+FindResult = Union["DagGraph", NodeNotFoundException]
+class DagGraph:
+    def __init__(self, nodes: list[str] | None = None, edges: list[tuple[str, str]] | None = None) -> None:
+        """
+        初始化DAG图
+        Args:
+            nodes: 初始节点列表，默认为空列表
+        """
+        if nodes is None:
+            nodes = []
+        self.__nodes = set(nodes)  # 使用集合提升查找效率
+        if edges is None:
+            # edges = []
+            self.__edges = set()  # 使用集合存储边
+        else:
+            self.__edges = set(edges)  # 使用集合存储边
+            # self.__nodes.update({node for edge in edges for node in edge})  # 从边中提取节点并添加到节点集合
+            for edge in edges:
+                self.__nodes.add(edge[0])
+                self.__nodes.add(edge[1])
+        self.__adjacency_list: dict[str, set[str]] = {}  # 邻接表，用于快速遍历（下游）
+        self.__reverse_adjacency_list: dict[str, set[str]] = {}  # 反向邻接表，用于上游查找
+        for node in nodes:
+            self.__adjacency_list[node] = set()
+            self.__reverse_adjacency_list[node] = set()
+    def add_node(self, node: str) -> None:
+        """
+        添加节点
+        Args:
+            node: 节点对象
+        Raises:
+            NodeExistsException: 节点已存在
+        """
+        if node in self.__nodes:
+            raise NodeExistsException(f"节点已存在:{node}")
+        self.__nodes.add(node)
+        if node not in self.__adjacency_list:
+            self.__adjacency_list[node] = set()
+        if node not in self.__reverse_adjacency_list:
+            self.__reverse_adjacency_list[node] = set()
+    def remove_node(self, node: str) -> None:
+        """
+        删除节点及其相关边
+        Args:
+            node: 节点对象
+        Raises:
+            NodeNotFoundException: 节点不存在
+        """
+        if node not in self.__nodes:
+            raise NodeNotFoundException(f"节点不存在:{node}")
+        # 删除节点
+        self.__nodes.discard(node)
+        # 删除与该节点相关的所有边
+        edges_to_remove = {(f, t) for f, t in self.__edges if f == node or t == node}
+        self.__edges -= edges_to_remove
+        # 更新邻接表
+        if node in self.__adjacency_list:
+            del self.__adjacency_list[node]
+        for adjacent in self.__adjacency_list:
+            self.__adjacency_list[adjacent].discard(node)
+        # 更新反向邻接表
+        if node in self.__reverse_adjacency_list:
+            del self.__reverse_adjacency_list[node]
+        for adjacent in self.__reverse_adjacency_list:
+            self.__reverse_adjacency_list[adjacent].discard(node)
+    def add_edge(self, _from: str, _to: str) -> None:
+        """
+        添加边，如果节点不存在则自动添加
+        Args:
+            _from: 起始节点
+            _to: 目标节点
+        Raises:
+            CycleDetectedException: 如果添加该边会形成环
+        """
+        # 自动添加不存在的节点
+        if _from not in self.__nodes:
+            self.add_node(_from)
+        if _to not in self.__nodes:
+            self.add_node(_to)
+        # 检查是否会形成环
+        # if self.__would_create_cycle(_from.name, _to.name):
+        #     raise CycleDetectedException(f"添加边 {_from} -> {_to} 会形成环")
+        # 添加边
+        edge = (_from, _to)
+        self.__edges.add(edge)
+        self.__adjacency_list[_from].add(_to)
+        self.__reverse_adjacency_list[_to].add(_from)
+    def remove_edge(self, _from: str, _to: str) -> None:
+        """
+        删除边
+        Args:
+            _from: 起始节点
+            _to: 目标节点
+        Raises:
+            NodeNotFoundException: 节点不存在
+        """
+        if _from not in self.__nodes:
+            raise NodeNotFoundException(f"节点不存在:{_from}")
+        if _to not in self.__nodes:
+            raise NodeNotFoundException(f"节点不存在:{_to}")
+        # edge = (_from, _to)
+        self.__edges.discard((_from, _to))
+        if _from in self.__adjacency_list:
+            self.__adjacency_list[_from].discard(_to)
+        if _to in self.__reverse_adjacency_list:
+            self.__reverse_adjacency_list[_to].discard(_from)
+    def get_nodes(self) -> list[str]:
+        """
+        获取所有节点
+        Returns:
+            节点列表
+        """
+        return sorted(list(self.__nodes))
+    def get_edges(self) -> list[tuple[str, str]]:
+        """
+        获取所有边（去重）
+        Returns:
+            边列表，每个元素为 (from, to) 元组
+        """
+        return sorted(list(self.__edges))
+    def union(self, other: "DagGraph") -> "DagGraph":
+        """
+        合并两个DAG图
+        Args:
+            other: 另一个DAG图
+        Returns:
+            合并后的DAG图
+        """
+        new_nodes = self.get_nodes() + other.get_nodes()
+        new_edges = self.get_edges() + other.get_edges()
+        return DagGraph(new_nodes, new_edges)
+    @property
+    def empty(self) -> bool:
+        return len(self.__nodes) == 0
+    def __would_create_cycle(self, from_node: str, to_node: str) -> bool:
+        """
+        检查添加边是否会形成环
+        Args:
+            from_node: 起始节点
+            to_node: 目标节点
+        Returns:
+            True 如果会形成环
+        """
+        if from_node == to_node:
+            return True
+        # 从 to_node 开始DFS，看能否到达 from_node
+        visited = set()
+        stack = [to_node]
+        while stack:
+            current = stack.pop()
+            if current == from_node:
+                return True
+            if current not in visited:
+                visited.add(current)
+                # 获取当前节点的所有下游节点
+                if current in self.__adjacency_list:
+                    stack.extend(self.__adjacency_list[current])
+        return False
+    def has_cycle(self) -> bool:
+        """
+        检测图中是否存在环
+        Returns:
+            True 如果存在环
+        """
+        visited = set()
+        rec_stack = set()
+        def dfs(node: str) -> bool:
+            visited.add(node)
+            rec_stack.add(node)
+            for neighbor in self.__adjacency_list.get(node, set()):
+                if neighbor not in visited:
+                    if dfs(neighbor):
+                        return True
+                elif neighbor in rec_stack:
+                    return True
+            rec_stack.remove(node)
+            return False
+        for node in self.__nodes:
+            if node not in visited:
+                if dfs(node):
+                    return True
+        return False
+    def find_upstream(self, node: str) -> FindResult:
+        """
+        查找所有上游依赖的边
+        Args:
+            node: 目标节点
+        Returns:
+            上游边的集合
+        """
+        if node not in self.__nodes:
+            return NodeNotFoundException(f"节点不存在:{node}")
+        queue = deque([node])
+        visited = set([node])
+        all_relations = []
+        while queue:
+            current = queue.popleft()
+            # 使用反向邻接表直接查找上游节点，提升性能
+            predecessors = self.__reverse_adjacency_list.get(current, set())
+            for predecessor in predecessors:
+                edge = (predecessor, current)
+                all_relations.append(edge)
+                if predecessor not in visited:
+                    visited.add(predecessor)
+                    queue.append(predecessor)
+        return DagGraph(edges=all_relations)
+    def find_downstream(self, node: str) -> FindResult:
+        """
+        查找所有下游依赖的边
+        Args:
+            node: 起始节点
+        Returns:
+            下游边的集合
+        """
+        if node not in self.__nodes:
+            return NodeNotFoundException(f"节点不存在:{node}")
+        queue = deque([node])
+        visited = set([node])
+        all_relations = []
+        while queue:
+            current = queue.popleft()
+            # 使用邻接表直接查找下游节点，提升性能
+            neighbors = self.__adjacency_list.get(current, set())
+            for neighbor in neighbors:
+                edge = (current, neighbor)
+                all_relations.append(edge)
+                if neighbor not in visited:
+                    visited.add(neighbor)
+                    queue.append(neighbor)
+        return DagGraph(edges=all_relations)
+    def to_mermaid(self, direction="LR") -> str:
+        """
+        转换为 Mermaid 格式字符串
+        Returns:
+            Mermaid格式的图描述字符串
+        """
+        if not self.__nodes:
+            return ""
+        else:
+            mermaid_str = f"graph {direction}"
+            for _from, _to in self.__edges:
+                mermaid_str += f"\n  {_from} --> {_to}"
+            return mermaid_str
+    def to_dict(self) -> dict:
+        """
+        转换为字典格式
+        Returns:
+            字典格式的图描述
+        """
+        nodes = [{"id": node, "label": node.split(".")[:-1]} for node in self.__nodes]
+        edges = [{"source": _from, "target": _to} for _from, _to in self.__edges]
+        return {"nodes": nodes, "edges": edges, "node_count": len(nodes)}
+    def to_html(self, template_type: Literal["mermaid", "dagre"] = "mermaid") -> str:
+        """
+        生成包含Mermaid.js可视化的HTML代码
+        Args:
+            edges: 可选的边集合,如果为None则使用所有边
+        Returns:
+            包含Mermaid.js可视化的HTML字符串
+        """
+        mermaid_content = self.to_mermaid()
+        lineage_data = self.to_dict()
+        # 读取HTML模板文件
+        if template_type == "mermaid":
+            template_path = Path(__file__).parent.parent / "static" / "mermaid_template.html"
+        elif template_type == "dagre":
+            template_path = Path(__file__).parent.parent / "static" / "dagre_template.html"
+        else:
+            raise ValueError(f"Unknown template type: {template_type}")
+        with open(template_path, encoding="utf-8") as f:
+            template = Template(f.read())
+        # 替换模板变量
+        html_content = template.safe_substitute(
+            title="DAG Visualization",
+            mermaid_content=mermaid_content,
+            lineage_data=lineage_data
+        )
+        return html_content