PyPI - sql-dag-flow - Versions diffs - 0.1.0__tar.gz - Mend

sql-dag-flow 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (22) hide show

sql_dag_flow-0.1.0/LICENSE +21 -0
sql_dag_flow-0.1.0/MANIFEST.in +1 -0
sql_dag_flow-0.1.0/PKG-INFO +137 -0
sql_dag_flow-0.1.0/README.md +112 -0
sql_dag_flow-0.1.0/pyproject.toml +42 -0
sql_dag_flow-0.1.0/setup.cfg +4 -0
sql_dag_flow-0.1.0/src/sql_dag_flow/__init__.py +0 -0
sql_dag_flow-0.1.0/src/sql_dag_flow/main.py +203 -0
sql_dag_flow-0.1.0/src/sql_dag_flow/parser.py +290 -0
sql_dag_flow-0.1.0/src/sql_dag_flow/static/assets/index-BvR3xTHQ.js +47 -0
sql_dag_flow-0.1.0/src/sql_dag_flow/static/assets/index-h7gxR8yA.css +1 -0
sql_dag_flow-0.1.0/src/sql_dag_flow/static/index.html +14 -0
sql_dag_flow-0.1.0/src/sql_dag_flow/static/vite.svg +1 -0
sql_dag_flow-0.1.0/src/sql_dag_flow/test_api_endpoints.py +44 -0
sql_dag_flow-0.1.0/src/sql_dag_flow/test_parser.py +29 -0
sql_dag_flow-0.1.0/src/sql_dag_flow/verify_counts.py +21 -0
sql_dag_flow-0.1.0/src/sql_dag_flow.egg-info/PKG-INFO +137 -0
sql_dag_flow-0.1.0/src/sql_dag_flow.egg-info/SOURCES.txt +20 -0
sql_dag_flow-0.1.0/src/sql_dag_flow.egg-info/dependency_links.txt +1 -0
sql_dag_flow-0.1.0/src/sql_dag_flow.egg-info/entry_points.txt +2 -0
sql_dag_flow-0.1.0/src/sql_dag_flow.egg-info/requires.txt +5 -0
sql_dag_flow-0.1.0/src/sql_dag_flow.egg-info/top_level.txt +1 -0

sql_dag_flow-0.1.0/LICENSE ADDED Viewed

@@ -0,0 +1,21 @@
+MIT License
+Copyright (c) 2026 Flavio Sandoval (@dsandovalflavio)
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

sql_dag_flow-0.1.0/MANIFEST.in ADDED Viewed

	@@ -0,0 +1 @@
1	+ recursive-include src/sql_dag_flow/static *

sql_dag_flow-0.1.0/PKG-INFO ADDED Viewed

@@ -0,0 +1,137 @@
+Metadata-Version: 2.4
+Name: sql-dag-flow
+Version: 0.1.0
+Summary: A sophisticated SQL lineage visualization tool for Medallion Architectures.
+Author-email: Flavio Sandoval <dsandovalflavio@gmail.com>
+License: MIT
+Keywords: sql,lineage,dag,visualization,medallion-architecture,data-engineering
+Classifier: Development Status :: 4 - Beta
+Classifier: Intended Audience :: Developers
+Classifier: License :: OSI Approved :: MIT License
+Classifier: Programming Language :: Python :: 3
+Classifier: Programming Language :: Python :: 3.8
+Classifier: Programming Language :: Python :: 3.9
+Classifier: Programming Language :: Python :: 3.10
+Classifier: Programming Language :: Python :: 3.11
+Requires-Python: >=3.8
+Description-Content-Type: text/markdown
+License-File: LICENSE
+Requires-Dist: fastapi
+Requires-Dist: uvicorn
+Requires-Dist: sqlglot
+Requires-Dist: networkx
+Requires-Dist: pydantic
+Dynamic: license-file
+# SQL DAG Flow
+**SQL DAG Flow** is a powerful, open-source visualization tool designed to automatically map and visualize the lineage of your SQL data pipelines.
+Built specifically for modern data stacks using the **Medallion Architecture** (Bronze, Silver, Gold), it parses your SQL files to generate an interactive, dependency-aware Directed Acyclic Graph (DAG).
+![SQL DAG Flow Screenshot](/images/sql-architecture-logo.png)
+## 🚀 Key Features
+*   **Automatic Parsing & Visualization**: Recursively scans your project folders to find `.sql` files and detect dependencies (`FROM`, `JOIN`, `CTE`s) using `sqlglot`.
+*   **Medallion Architecture Support**: Automatically categorizes and colors nodes based on folder structure:
+    *   🟤 **Bronze**: Raw ingestion layers.
+    *   ⚪ **Silver**: Cleaned and conformed data.
+    *   🟡 **Gold**: Business-level aggregates.
+*   **Smart Folder Selection**:
+    *   **Selective Exploration**: Choose specific subfolders to analyze using an interactive tree view.
+    *   **Deep Filtering**: Focus only on relevant parts of your pipeline.
+*   **Advanced Organization**:
+    *   **Selection Toolbar**: Multi-select nodes and align them horizontally/vertically.
+    *   **Node Hiding**: Hide specific nodes or entire trees to declutter the view.
+    *   **Auto Layout**: Automatically arrange nodes using Dagre layout engine.
+*   **Configuration Management**:
+    *   **Save & Load**: Persist your layouts, hidden nodes, and viewport settings to JSON.
+    *   **Workspaces**: manage multiple project configurations.
+*   **Rich Metadata**:
+    *   **Details Panel**: View full SQL content and schema details.
+    *   **Annotations**: Add sticky notes with **Markdown support**, resize them, and create visual groups.
+*   **Visual Cues**:
+    *   **Solid Border**: Indicates a Table.
+    *   **Dashed Border**: Indicates a View (auto-detected).
+*   **Premium UI/UX**:
+    *   **Dark/Light Modes**: Themed for your preference.
+    *   **Export**: Save as high-resolution **PNG** or vector **SVG**.
+## 🌍 Supported Dialects
+Powered by `sqlglot`, supporting:
+*   **BigQuery** (Default)
+*   **Snowflake**
+*   **PostgreSQL**
+*   **Spark / Databricks**
+*   **Amazon Redshift**
+*   **DuckDB**
+*   **MySQL**
+*   ...and more.
+## 📦 Installation
+Install easily via `pip`:
+```bash
+pip install sql-dag-flow
+```
+## ▶️ Usage
+### 1. Command Line Interface (CLI)
+You can run the tool directly from your terminal:
+```bash
+# Analyze the current directory
+sql-dag-flow
+# Analyze a specific SQL project
+sql-dag-flow /path/to/my/dbt_project
+```
+### 2. Python API
+Integrate it into your Python scripts or notebooks:
+```python
+from sql_dag_flow import start
+# Start the server and open the browser
+start(directory="./my_sql_project")
+```
+## 📂 Project Structure Expectations
+SQL DAG Flow is opinionated but flexible. It looks for standard Medallion Architecture naming conventions to assign colors:
+*   **Bronze Layer**: Any folder named `bronze`, `raw`, `landing`, or `staging`.
+*   **Silver Layer**: Any folder named `silver`, `intermediate`, or `conformed`.
+*   **Gold Layer**: Any folder named `gold`, `mart`, `serving`, or `presentation`.
+*   **Other**: Any other folder is categorized as "Other" (Gray).
+## 🛠️ Configuration & Customization
+### Settings
+Click the **Settings (Gear)** icon in the bottom toolbar to:
+*   **Change SQL Dialect**: Ensure your specific SQL syntax is parsed correctly.
+*   **Toggle Node Style**: Switch between "Full" (colored body) and "Minimal" (colored border) styles.
+*   **Change Palette**: Switch between Standard, Vivid, and Pastel color palettes.
+### Saving Layouts
+Your graph layout (positions, hidden nodes) is **not** permanent by default. To save your work:
+1.  Click **Save** in the top bar.
+2.  Choose a filename (e.g., `marketing_flow.json`).
+3.  Next time, click **Load** to restore that exact view.
+## 🤝 Contributing
+Contributions are welcome!
+1.  Fork the repository.
+2.  Create a feature branch.
+3.  Submit a Pull Request.
+---
+*Created by [Flavio Sandoval](https://github.com/dsandovalflavio)*

sql_dag_flow-0.1.0/README.md ADDED Viewed

@@ -0,0 +1,112 @@
+# SQL DAG Flow
+**SQL DAG Flow** is a powerful, open-source visualization tool designed to automatically map and visualize the lineage of your SQL data pipelines.
+Built specifically for modern data stacks using the **Medallion Architecture** (Bronze, Silver, Gold), it parses your SQL files to generate an interactive, dependency-aware Directed Acyclic Graph (DAG).
+![SQL DAG Flow Screenshot](/images/sql-architecture-logo.png)
+## 🚀 Key Features
+*   **Automatic Parsing & Visualization**: Recursively scans your project folders to find `.sql` files and detect dependencies (`FROM`, `JOIN`, `CTE`s) using `sqlglot`.
+*   **Medallion Architecture Support**: Automatically categorizes and colors nodes based on folder structure:
+    *   🟤 **Bronze**: Raw ingestion layers.
+    *   ⚪ **Silver**: Cleaned and conformed data.
+    *   🟡 **Gold**: Business-level aggregates.
+*   **Smart Folder Selection**:
+    *   **Selective Exploration**: Choose specific subfolders to analyze using an interactive tree view.
+    *   **Deep Filtering**: Focus only on relevant parts of your pipeline.
+*   **Advanced Organization**:
+    *   **Selection Toolbar**: Multi-select nodes and align them horizontally/vertically.
+    *   **Node Hiding**: Hide specific nodes or entire trees to declutter the view.
+    *   **Auto Layout**: Automatically arrange nodes using Dagre layout engine.
+*   **Configuration Management**:
+    *   **Save & Load**: Persist your layouts, hidden nodes, and viewport settings to JSON.
+    *   **Workspaces**: manage multiple project configurations.
+*   **Rich Metadata**:
+    *   **Details Panel**: View full SQL content and schema details.
+    *   **Annotations**: Add sticky notes with **Markdown support**, resize them, and create visual groups.
+*   **Visual Cues**:
+    *   **Solid Border**: Indicates a Table.
+    *   **Dashed Border**: Indicates a View (auto-detected).
+*   **Premium UI/UX**:
+    *   **Dark/Light Modes**: Themed for your preference.
+    *   **Export**: Save as high-resolution **PNG** or vector **SVG**.
+## 🌍 Supported Dialects
+Powered by `sqlglot`, supporting:
+*   **BigQuery** (Default)
+*   **Snowflake**
+*   **PostgreSQL**
+*   **Spark / Databricks**
+*   **Amazon Redshift**
+*   **DuckDB**
+*   **MySQL**
+*   ...and more.
+## 📦 Installation
+Install easily via `pip`:
+```bash
+pip install sql-dag-flow
+```
+## ▶️ Usage
+### 1. Command Line Interface (CLI)
+You can run the tool directly from your terminal:
+```bash
+# Analyze the current directory
+sql-dag-flow
+# Analyze a specific SQL project
+sql-dag-flow /path/to/my/dbt_project
+```
+### 2. Python API
+Integrate it into your Python scripts or notebooks:
+```python
+from sql_dag_flow import start
+# Start the server and open the browser
+start(directory="./my_sql_project")
+```
+## 📂 Project Structure Expectations
+SQL DAG Flow is opinionated but flexible. It looks for standard Medallion Architecture naming conventions to assign colors:
+*   **Bronze Layer**: Any folder named `bronze`, `raw`, `landing`, or `staging`.
+*   **Silver Layer**: Any folder named `silver`, `intermediate`, or `conformed`.
+*   **Gold Layer**: Any folder named `gold`, `mart`, `serving`, or `presentation`.
+*   **Other**: Any other folder is categorized as "Other" (Gray).
+## 🛠️ Configuration & Customization
+### Settings
+Click the **Settings (Gear)** icon in the bottom toolbar to:
+*   **Change SQL Dialect**: Ensure your specific SQL syntax is parsed correctly.
+*   **Toggle Node Style**: Switch between "Full" (colored body) and "Minimal" (colored border) styles.
+*   **Change Palette**: Switch between Standard, Vivid, and Pastel color palettes.
+### Saving Layouts
+Your graph layout (positions, hidden nodes) is **not** permanent by default. To save your work:
+1.  Click **Save** in the top bar.
+2.  Choose a filename (e.g., `marketing_flow.json`).
+3.  Next time, click **Load** to restore that exact view.
+## 🤝 Contributing
+Contributions are welcome!
+1.  Fork the repository.
+2.  Create a feature branch.
+3.  Submit a Pull Request.
+---
+*Created by [Flavio Sandoval](https://github.com/dsandovalflavio)*

sql_dag_flow-0.1.0/pyproject.toml ADDED Viewed

@@ -0,0 +1,42 @@
+[build-system]
+requires = ["setuptools>=42", "wheel"]
+build-backend = "setuptools.build_meta"
+[project]
+name = "sql-dag-flow"
+version = "0.1.0"
+description = "A sophisticated SQL lineage visualization tool for Medallion Architectures."
+readme = "README.md"
+requires-python = ">=3.8"
+license = {text = "MIT"}
+authors = [
+    {name = "Flavio Sandoval", email = "dsandovalflavio@gmail.com"}
+]
+keywords = ["sql", "lineage", "dag", "visualization", "medallion-architecture", "data-engineering"]
+classifiers = [
+    "Development Status :: 4 - Beta",
+    "Intended Audience :: Developers",
+    "License :: OSI Approved :: MIT License",
+    "Programming Language :: Python :: 3",
+    "Programming Language :: Python :: 3.8",
+    "Programming Language :: Python :: 3.9",
+    "Programming Language :: Python :: 3.10",
+    "Programming Language :: Python :: 3.11",
+]
+dependencies = [
+    "fastapi",
+    "uvicorn",
+    "sqlglot",
+    "networkx",
+    "pydantic"
+]
+[project.scripts]
+sql-dag-flow = "sql_dag_flow.main:start"
+[tool.setuptools.packages.find]
+where = ["src"]
+include = ["sql_dag_flow*"]
+[tool.setuptools.package-data]
+sql_dag_flow = ["static/**/*"]

sql_dag_flow-0.1.0/setup.cfg ADDED Viewed

@@ -0,0 +1,4 @@
+[egg_info]
+tag_build =
+tag_date = 0

sql_dag_flow-0.1.0/src/sql_dag_flow/__init__.py ADDED Viewed

File without changes

sql_dag_flow-0.1.0/src/sql_dag_flow/main.py ADDED Viewed

@@ -0,0 +1,203 @@
+from fastapi import FastAPI, HTTPException, Body
+from pydantic import BaseModel
+from fastapi.middleware.cors import CORSMiddleware
+from fastapi.staticfiles import StaticFiles
+from fastapi.responses import FileResponse
+import uvicorn
+import os
+import sys
+import json
+import webbrowser
+import threading
+import time
+from .parser import parse_sql_files, build_graph
+app = FastAPI()
+# Enable CORS for frontend
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+# Package structure
+# __file__ is inside src/sql_dag_flow/main.py
+BASE_DIR = os.path.dirname(os.path.abspath(__file__))
+STATIC_DIR = os.path.join(BASE_DIR, "static")
+# Global state
+CURRENT_DIRECTORY = os.getcwd() # Default, updated by start()
+DIAGRAM_FILE = "sql_diagram.json"
+@app.get("/graph")
+def get_graph(dialect: str = "bigquery"):
+    """Parses SQL files in the current directory and returns graph data."""
+    if not os.path.exists(CURRENT_DIRECTORY):
+        return {"nodes": [], "edges": [], "error": "Directory not found"}
+    tables = parse_sql_files(CURRENT_DIRECTORY, dialect=dialect)
+    nodes, edges = build_graph(tables)
+    return {"nodes": nodes, "edges": edges}
+@app.post("/config/path")
+def set_path(path_data: dict = Body(...)):
+    """Updates the directory to scan."""
+    global CURRENT_DIRECTORY
+    path = path_data.get("path")
+    # Basic validation
+    if not path or not os.path.exists(path):
+        raise HTTPException(status_code=400, detail="Directory does not exist")
+    CURRENT_DIRECTORY = path
+    return {"message": "Path updated", "path": CURRENT_DIRECTORY}
+@app.post("/scan/folders")
+def scan_folders(path_data: dict = Body(...)):
+    """Scans a directory and returns all subfolders (recursive, relative paths)."""
+    path = path_data.get("path")
+    if not path or not os.path.exists(path):
+         raise HTTPException(status_code=400, detail="Directory does not exist")
+    try:
+        subfolders = []
+        # Walk the directory tree
+        for root, dirs, files in os.walk(path):
+            # Skip hidden folders
+            dirs[:] = [d for d in dirs if not d.startswith('.')]
+            for d in dirs:
+                # Create relative path from the root path
+                full_path = os.path.join(root, d)
+                rel_path = os.path.relpath(full_path, path)
+                # Normalize separators to forward slashes for consistency
+                rel_path = rel_path.replace(os.sep, '/')
+                subfolders.append(rel_path)
+        # Sort for better UX
+        subfolders.sort()
+        return {"folders": subfolders}
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
+@app.post("/graph/filtered")
+def get_filtered_graph(data: dict = Body(...)):
+    """Parses SQL files with subfolder filtering."""
+    if not os.path.exists(CURRENT_DIRECTORY):
+        return {"nodes": [], "edges": [], "error": "Directory not found"}
+    subfolders = data.get("subfolders") # List of strings or None
+    dialect = data.get("dialect", "bigquery")
+    tables = parse_sql_files(CURRENT_DIRECTORY, allowed_subfolders=subfolders, dialect=dialect)
+    nodes, edges = build_graph(tables)
+    return {"nodes": nodes, "edges": edges}
+@app.get("/config/path")
+def get_path():
+    return {"path": CURRENT_DIRECTORY}
+class SaveRequest(BaseModel):
+    nodes: list
+    edges: list
+    viewport: dict
+    metadata: dict
+    filename: str = "sql_diagram.json" # Default filename
+@app.post("/save")
+def save_graph(request: SaveRequest):
+    try:
+        # Use the path from metadata if available, otherwise default
+        path = request.metadata.get("path", ".")
+        if not os.path.isabs(path):
+             path = os.path.abspath(path)
+        filepath = os.path.join(path, request.filename)
+        data = {
+            "nodes": request.nodes,
+            "edges": request.edges,
+            "viewport": request.viewport,
+            "metadata": request.metadata
+        }
+        with open(filepath, "w") as f:
+            json.dump(data, f, indent=4)
+        return {"message": f"Graph saved successfully to {filepath}"}
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
+@app.get("/load")
+def load_graph(path: str = ".", filename: str = "sql_diagram.json"):
+    try:
+        if not os.path.isabs(path):
+             path = os.path.abspath(path)
+        filepath = os.path.join(path, filename)
+        if not os.path.exists(filepath):
+            return {"nodes": [], "edges": [], "viewport": {"x": 0, "y": 0, "zoom": 1}, "metadata": {}}
+        with open(filepath, "r") as f:
+            data = json.load(f)
+        return data
+    except Exception as e:
+        print(f"Error loading graph: {e}")
+        return {"nodes": [], "edges": [], "viewport": {"x": 0, "y": 0, "zoom": 1}, "metadata": {}}
+@app.get("/config_files")
+def list_config_files(path: str = "."):
+    try:
+        if not os.path.isabs(path):
+             path = os.path.abspath(path)
+        if not os.path.exists(path):
+            return {"files": []}
+        files = [f for f in os.listdir(path) if f.endswith(".json") and os.path.isfile(os.path.join(path, f))]
+        return {"files": files}
+    except Exception as e:
+        print(f"Error listing config files: {e}")
+        return {"files": []}
+# Serve Static Files (Frontend)
+if os.path.exists(STATIC_DIR):
+    app.mount("/assets", StaticFiles(directory=os.path.join(STATIC_DIR, "assets")), name="assets")
+    # Catch-all for SPA routing
+    @app.get("/{full_path:path}")
+    async def serve_spa(full_path: str):
+        file_path = os.path.join(STATIC_DIR, full_path)
+        if os.path.isfile(file_path):
+            return FileResponse(file_path)
+        return FileResponse(os.path.join(STATIC_DIR, "index.html"))
+def start():
+    """Entry point for the CLI tool."""
+    global CURRENT_DIRECTORY
+    # CLI Argument Parsing
+    if len(sys.argv) > 1:
+        path_arg = sys.argv[1]
+        if os.path.exists(path_arg):
+            CURRENT_DIRECTORY = os.path.abspath(path_arg)
+            print(f"Setting project path from CLI: {CURRENT_DIRECTORY}")
+        else:
+            print(f"Warning: Path '{path_arg}' does not exist. Using defaults.")
+    else:
+        CURRENT_DIRECTORY = os.getcwd()
+        print(f"Using current directory: {CURRENT_DIRECTORY}")
+    def open_browser():
+        time.sleep(1.5)
+        webbrowser.open("http://localhost:8000")
+    threading.Thread(target=open_browser, daemon=True).start()
+    # Run uvicorn programmatically
+    # Note: When running programmatically, reload=True is not supported easily without other hacks
+    uvicorn.run(app, host="127.0.0.1", port=8000)
+if __name__ == "__main__":
+    start()