sql-dag-flow 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Flavio Sandoval (@dsandovalflavio)
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1 @@
1
+ recursive-include src/sql_dag_flow/static *
@@ -0,0 +1,137 @@
1
+ Metadata-Version: 2.4
2
+ Name: sql-dag-flow
3
+ Version: 0.1.0
4
+ Summary: A sophisticated SQL lineage visualization tool for Medallion Architectures.
5
+ Author-email: Flavio Sandoval <dsandovalflavio@gmail.com>
6
+ License: MIT
7
+ Keywords: sql,lineage,dag,visualization,medallion-architecture,data-engineering
8
+ Classifier: Development Status :: 4 - Beta
9
+ Classifier: Intended Audience :: Developers
10
+ Classifier: License :: OSI Approved :: MIT License
11
+ Classifier: Programming Language :: Python :: 3
12
+ Classifier: Programming Language :: Python :: 3.8
13
+ Classifier: Programming Language :: Python :: 3.9
14
+ Classifier: Programming Language :: Python :: 3.10
15
+ Classifier: Programming Language :: Python :: 3.11
16
+ Requires-Python: >=3.8
17
+ Description-Content-Type: text/markdown
18
+ License-File: LICENSE
19
+ Requires-Dist: fastapi
20
+ Requires-Dist: uvicorn
21
+ Requires-Dist: sqlglot
22
+ Requires-Dist: networkx
23
+ Requires-Dist: pydantic
24
+ Dynamic: license-file
25
+
26
+ # SQL DAG Flow
27
+
28
+ **SQL DAG Flow** is a powerful, open-source visualization tool designed to automatically map and visualize the lineage of your SQL data pipelines.
29
+
30
+ Built specifically for modern data stacks using the **Medallion Architecture** (Bronze, Silver, Gold), it parses your SQL files to generate an interactive, dependency-aware Directed Acyclic Graph (DAG).
31
+
32
+ ![SQL DAG Flow Screenshot](/images/sql-architecture-logo.png)
33
+
34
+ ## 🚀 Key Features
35
+
36
+ * **Automatic Parsing & Visualization**: Recursively scans your project folders to find `.sql` files and detect dependencies (`FROM`, `JOIN`, `CTE`s) using `sqlglot`.
37
+ * **Medallion Architecture Support**: Automatically categorizes and colors nodes based on folder structure:
38
+ * 🟤 **Bronze**: Raw ingestion layers.
39
+ * ⚪ **Silver**: Cleaned and conformed data.
40
+ * 🟡 **Gold**: Business-level aggregates.
41
+ * **Smart Folder Selection**:
42
+ * **Selective Exploration**: Choose specific subfolders to analyze using an interactive tree view.
43
+ * **Deep Filtering**: Focus only on relevant parts of your pipeline.
44
+ * **Advanced Organization**:
45
+ * **Selection Toolbar**: Multi-select nodes and align them horizontally/vertically.
46
+ * **Node Hiding**: Hide specific nodes or entire trees to declutter the view.
47
+ * **Auto Layout**: Automatically arrange nodes using Dagre layout engine.
48
+ * **Configuration Management**:
49
+ * **Save & Load**: Persist your layouts, hidden nodes, and viewport settings to JSON.
50
+ * **Workspaces**: manage multiple project configurations.
51
+ * **Rich Metadata**:
52
+ * **Details Panel**: View full SQL content and schema details.
53
+ * **Annotations**: Add sticky notes with **Markdown support**, resize them, and create visual groups.
54
+ * **Visual Cues**:
55
+ * **Solid Border**: Indicates a Table.
56
+ * **Dashed Border**: Indicates a View (auto-detected).
57
+ * **Premium UI/UX**:
58
+ * **Dark/Light Modes**: Themed for your preference.
59
+ * **Export**: Save as high-resolution **PNG** or vector **SVG**.
60
+
61
+ ## 🌍 Supported Dialects
62
+
63
+ Powered by `sqlglot`, supporting:
64
+ * **BigQuery** (Default)
65
+ * **Snowflake**
66
+ * **PostgreSQL**
67
+ * **Spark / Databricks**
68
+ * **Amazon Redshift**
69
+ * **DuckDB**
70
+ * **MySQL**
71
+ * ...and more.
72
+
73
+ ## 📦 Installation
74
+
75
+ Install easily via `pip`:
76
+
77
+ ```bash
78
+ pip install sql-dag-flow
79
+ ```
80
+
81
+ ## ▶️ Usage
82
+
83
+ ### 1. Command Line Interface (CLI)
84
+
85
+ You can run the tool directly from your terminal:
86
+
87
+ ```bash
88
+ # Analyze the current directory
89
+ sql-dag-flow
90
+
91
+ # Analyze a specific SQL project
92
+ sql-dag-flow /path/to/my/dbt_project
93
+ ```
94
+
95
+ ### 2. Python API
96
+
97
+ Integrate it into your Python scripts or notebooks:
98
+
99
+ ```python
100
+ from sql_dag_flow import start
101
+
102
+ # Start the server and open the browser
103
+ start(directory="./my_sql_project")
104
+ ```
105
+
106
+ ## 📂 Project Structure Expectations
107
+
108
+ SQL DAG Flow is opinionated but flexible. It looks for standard Medallion Architecture naming conventions to assign colors:
109
+
110
+ * **Bronze Layer**: Any folder named `bronze`, `raw`, `landing`, or `staging`.
111
+ * **Silver Layer**: Any folder named `silver`, `intermediate`, or `conformed`.
112
+ * **Gold Layer**: Any folder named `gold`, `mart`, `serving`, or `presentation`.
113
+ * **Other**: Any other folder is categorized as "Other" (Gray).
114
+
115
+ ## 🛠️ Configuration & Customization
116
+
117
+ ### Settings
118
+ Click the **Settings (Gear)** icon in the bottom toolbar to:
119
+ * **Change SQL Dialect**: Ensure your specific SQL syntax is parsed correctly.
120
+ * **Toggle Node Style**: Switch between "Full" (colored body) and "Minimal" (colored border) styles.
121
+ * **Change Palette**: Switch between Standard, Vivid, and Pastel color palettes.
122
+
123
+ ### Saving Layouts
124
+ Your graph layout (positions, hidden nodes) is **not** permanent by default. To save your work:
125
+ 1. Click **Save** in the top bar.
126
+ 2. Choose a filename (e.g., `marketing_flow.json`).
127
+ 3. Next time, click **Load** to restore that exact view.
128
+
129
+ ## 🤝 Contributing
130
+
131
+ Contributions are welcome!
132
+ 1. Fork the repository.
133
+ 2. Create a feature branch.
134
+ 3. Submit a Pull Request.
135
+
136
+ ---
137
+ *Created by [Flavio Sandoval](https://github.com/dsandovalflavio)*
@@ -0,0 +1,112 @@
1
+ # SQL DAG Flow
2
+
3
+ **SQL DAG Flow** is a powerful, open-source visualization tool designed to automatically map and visualize the lineage of your SQL data pipelines.
4
+
5
+ Built specifically for modern data stacks using the **Medallion Architecture** (Bronze, Silver, Gold), it parses your SQL files to generate an interactive, dependency-aware Directed Acyclic Graph (DAG).
6
+
7
+ ![SQL DAG Flow Screenshot](/images/sql-architecture-logo.png)
8
+
9
+ ## 🚀 Key Features
10
+
11
+ * **Automatic Parsing & Visualization**: Recursively scans your project folders to find `.sql` files and detect dependencies (`FROM`, `JOIN`, `CTE`s) using `sqlglot`.
12
+ * **Medallion Architecture Support**: Automatically categorizes and colors nodes based on folder structure:
13
+ * 🟤 **Bronze**: Raw ingestion layers.
14
+ * ⚪ **Silver**: Cleaned and conformed data.
15
+ * 🟡 **Gold**: Business-level aggregates.
16
+ * **Smart Folder Selection**:
17
+ * **Selective Exploration**: Choose specific subfolders to analyze using an interactive tree view.
18
+ * **Deep Filtering**: Focus only on relevant parts of your pipeline.
19
+ * **Advanced Organization**:
20
+ * **Selection Toolbar**: Multi-select nodes and align them horizontally/vertically.
21
+ * **Node Hiding**: Hide specific nodes or entire trees to declutter the view.
22
+ * **Auto Layout**: Automatically arrange nodes using Dagre layout engine.
23
+ * **Configuration Management**:
24
+ * **Save & Load**: Persist your layouts, hidden nodes, and viewport settings to JSON.
25
+ * **Workspaces**: manage multiple project configurations.
26
+ * **Rich Metadata**:
27
+ * **Details Panel**: View full SQL content and schema details.
28
+ * **Annotations**: Add sticky notes with **Markdown support**, resize them, and create visual groups.
29
+ * **Visual Cues**:
30
+ * **Solid Border**: Indicates a Table.
31
+ * **Dashed Border**: Indicates a View (auto-detected).
32
+ * **Premium UI/UX**:
33
+ * **Dark/Light Modes**: Themed for your preference.
34
+ * **Export**: Save as high-resolution **PNG** or vector **SVG**.
35
+
36
+ ## 🌍 Supported Dialects
37
+
38
+ Powered by `sqlglot`, supporting:
39
+ * **BigQuery** (Default)
40
+ * **Snowflake**
41
+ * **PostgreSQL**
42
+ * **Spark / Databricks**
43
+ * **Amazon Redshift**
44
+ * **DuckDB**
45
+ * **MySQL**
46
+ * ...and more.
47
+
48
+ ## 📦 Installation
49
+
50
+ Install easily via `pip`:
51
+
52
+ ```bash
53
+ pip install sql-dag-flow
54
+ ```
55
+
56
+ ## ▶️ Usage
57
+
58
+ ### 1. Command Line Interface (CLI)
59
+
60
+ You can run the tool directly from your terminal:
61
+
62
+ ```bash
63
+ # Analyze the current directory
64
+ sql-dag-flow
65
+
66
+ # Analyze a specific SQL project
67
+ sql-dag-flow /path/to/my/dbt_project
68
+ ```
69
+
70
+ ### 2. Python API
71
+
72
+ Integrate it into your Python scripts or notebooks:
73
+
74
+ ```python
75
+ from sql_dag_flow import start
76
+
77
+ # Start the server and open the browser
78
+ start(directory="./my_sql_project")
79
+ ```
80
+
81
+ ## 📂 Project Structure Expectations
82
+
83
+ SQL DAG Flow is opinionated but flexible. It looks for standard Medallion Architecture naming conventions to assign colors:
84
+
85
+ * **Bronze Layer**: Any folder named `bronze`, `raw`, `landing`, or `staging`.
86
+ * **Silver Layer**: Any folder named `silver`, `intermediate`, or `conformed`.
87
+ * **Gold Layer**: Any folder named `gold`, `mart`, `serving`, or `presentation`.
88
+ * **Other**: Any other folder is categorized as "Other" (Gray).
89
+
90
+ ## 🛠️ Configuration & Customization
91
+
92
+ ### Settings
93
+ Click the **Settings (Gear)** icon in the bottom toolbar to:
94
+ * **Change SQL Dialect**: Ensure your specific SQL syntax is parsed correctly.
95
+ * **Toggle Node Style**: Switch between "Full" (colored body) and "Minimal" (colored border) styles.
96
+ * **Change Palette**: Switch between Standard, Vivid, and Pastel color palettes.
97
+
98
+ ### Saving Layouts
99
+ Your graph layout (positions, hidden nodes) is **not** permanent by default. To save your work:
100
+ 1. Click **Save** in the top bar.
101
+ 2. Choose a filename (e.g., `marketing_flow.json`).
102
+ 3. Next time, click **Load** to restore that exact view.
103
+
104
+ ## 🤝 Contributing
105
+
106
+ Contributions are welcome!
107
+ 1. Fork the repository.
108
+ 2. Create a feature branch.
109
+ 3. Submit a Pull Request.
110
+
111
+ ---
112
+ *Created by [Flavio Sandoval](https://github.com/dsandovalflavio)*
@@ -0,0 +1,42 @@
1
+ [build-system]
2
+ requires = ["setuptools>=42", "wheel"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "sql-dag-flow"
7
+ version = "0.1.0"
8
+ description = "A sophisticated SQL lineage visualization tool for Medallion Architectures."
9
+ readme = "README.md"
10
+ requires-python = ">=3.8"
11
+ license = {text = "MIT"}
12
+ authors = [
13
+ {name = "Flavio Sandoval", email = "dsandovalflavio@gmail.com"}
14
+ ]
15
+ keywords = ["sql", "lineage", "dag", "visualization", "medallion-architecture", "data-engineering"]
16
+ classifiers = [
17
+ "Development Status :: 4 - Beta",
18
+ "Intended Audience :: Developers",
19
+ "License :: OSI Approved :: MIT License",
20
+ "Programming Language :: Python :: 3",
21
+ "Programming Language :: Python :: 3.8",
22
+ "Programming Language :: Python :: 3.9",
23
+ "Programming Language :: Python :: 3.10",
24
+ "Programming Language :: Python :: 3.11",
25
+ ]
26
+ dependencies = [
27
+ "fastapi",
28
+ "uvicorn",
29
+ "sqlglot",
30
+ "networkx",
31
+ "pydantic"
32
+ ]
33
+
34
+ [project.scripts]
35
+ sql-dag-flow = "sql_dag_flow.main:start"
36
+
37
+ [tool.setuptools.packages.find]
38
+ where = ["src"]
39
+ include = ["sql_dag_flow*"]
40
+
41
+ [tool.setuptools.package-data]
42
+ sql_dag_flow = ["static/**/*"]
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
File without changes
@@ -0,0 +1,203 @@
1
+ from fastapi import FastAPI, HTTPException, Body
2
+ from pydantic import BaseModel
3
+ from fastapi.middleware.cors import CORSMiddleware
4
+ from fastapi.staticfiles import StaticFiles
5
+ from fastapi.responses import FileResponse
6
+ import uvicorn
7
+ import os
8
+ import sys
9
+ import json
10
+ import webbrowser
11
+ import threading
12
+ import time
13
+ from .parser import parse_sql_files, build_graph
14
+
15
+ app = FastAPI()
16
+
17
+ # Enable CORS for frontend
18
+ app.add_middleware(
19
+ CORSMiddleware,
20
+ allow_origins=["*"],
21
+ allow_credentials=True,
22
+ allow_methods=["*"],
23
+ allow_headers=["*"],
24
+ )
25
+
26
+ # Package structure
27
+ # __file__ is inside src/sql_dag_flow/main.py
28
+ BASE_DIR = os.path.dirname(os.path.abspath(__file__))
29
+ STATIC_DIR = os.path.join(BASE_DIR, "static")
30
+
31
+ # Global state
32
+ CURRENT_DIRECTORY = os.getcwd() # Default, updated by start()
33
+ DIAGRAM_FILE = "sql_diagram.json"
34
+
35
+ @app.get("/graph")
36
+ def get_graph(dialect: str = "bigquery"):
37
+ """Parses SQL files in the current directory and returns graph data."""
38
+ if not os.path.exists(CURRENT_DIRECTORY):
39
+ return {"nodes": [], "edges": [], "error": "Directory not found"}
40
+
41
+ tables = parse_sql_files(CURRENT_DIRECTORY, dialect=dialect)
42
+ nodes, edges = build_graph(tables)
43
+ return {"nodes": nodes, "edges": edges}
44
+
45
+ @app.post("/config/path")
46
+ def set_path(path_data: dict = Body(...)):
47
+ """Updates the directory to scan."""
48
+ global CURRENT_DIRECTORY
49
+ path = path_data.get("path")
50
+ # Basic validation
51
+ if not path or not os.path.exists(path):
52
+ raise HTTPException(status_code=400, detail="Directory does not exist")
53
+
54
+
55
+ CURRENT_DIRECTORY = path
56
+ return {"message": "Path updated", "path": CURRENT_DIRECTORY}
57
+
58
+ @app.post("/scan/folders")
59
+ def scan_folders(path_data: dict = Body(...)):
60
+ """Scans a directory and returns all subfolders (recursive, relative paths)."""
61
+ path = path_data.get("path")
62
+ if not path or not os.path.exists(path):
63
+ raise HTTPException(status_code=400, detail="Directory does not exist")
64
+
65
+ try:
66
+ subfolders = []
67
+ # Walk the directory tree
68
+ for root, dirs, files in os.walk(path):
69
+ # Skip hidden folders
70
+ dirs[:] = [d for d in dirs if not d.startswith('.')]
71
+
72
+ for d in dirs:
73
+ # Create relative path from the root path
74
+ full_path = os.path.join(root, d)
75
+ rel_path = os.path.relpath(full_path, path)
76
+ # Normalize separators to forward slashes for consistency
77
+ rel_path = rel_path.replace(os.sep, '/')
78
+ subfolders.append(rel_path)
79
+
80
+ # Sort for better UX
81
+ subfolders.sort()
82
+ return {"folders": subfolders}
83
+ except Exception as e:
84
+ raise HTTPException(status_code=500, detail=str(e))
85
+
86
+ @app.post("/graph/filtered")
87
+ def get_filtered_graph(data: dict = Body(...)):
88
+ """Parses SQL files with subfolder filtering."""
89
+ if not os.path.exists(CURRENT_DIRECTORY):
90
+ return {"nodes": [], "edges": [], "error": "Directory not found"}
91
+
92
+ subfolders = data.get("subfolders") # List of strings or None
93
+ dialect = data.get("dialect", "bigquery")
94
+ tables = parse_sql_files(CURRENT_DIRECTORY, allowed_subfolders=subfolders, dialect=dialect)
95
+ nodes, edges = build_graph(tables)
96
+ return {"nodes": nodes, "edges": edges}
97
+
98
+ @app.get("/config/path")
99
+ def get_path():
100
+ return {"path": CURRENT_DIRECTORY}
101
+
102
+ class SaveRequest(BaseModel):
103
+ nodes: list
104
+ edges: list
105
+ viewport: dict
106
+ metadata: dict
107
+ filename: str = "sql_diagram.json" # Default filename
108
+
109
+ @app.post("/save")
110
+ def save_graph(request: SaveRequest):
111
+ try:
112
+ # Use the path from metadata if available, otherwise default
113
+ path = request.metadata.get("path", ".")
114
+ if not os.path.isabs(path):
115
+ path = os.path.abspath(path)
116
+
117
+ filepath = os.path.join(path, request.filename)
118
+
119
+ data = {
120
+ "nodes": request.nodes,
121
+ "edges": request.edges,
122
+ "viewport": request.viewport,
123
+ "metadata": request.metadata
124
+ }
125
+ with open(filepath, "w") as f:
126
+ json.dump(data, f, indent=4)
127
+ return {"message": f"Graph saved successfully to {filepath}"}
128
+ except Exception as e:
129
+ raise HTTPException(status_code=500, detail=str(e))
130
+
131
+ @app.get("/load")
132
+ def load_graph(path: str = ".", filename: str = "sql_diagram.json"):
133
+ try:
134
+ if not os.path.isabs(path):
135
+ path = os.path.abspath(path)
136
+
137
+ filepath = os.path.join(path, filename)
138
+
139
+ if not os.path.exists(filepath):
140
+ return {"nodes": [], "edges": [], "viewport": {"x": 0, "y": 0, "zoom": 1}, "metadata": {}}
141
+
142
+ with open(filepath, "r") as f:
143
+ data = json.load(f)
144
+ return data
145
+ except Exception as e:
146
+ print(f"Error loading graph: {e}")
147
+ return {"nodes": [], "edges": [], "viewport": {"x": 0, "y": 0, "zoom": 1}, "metadata": {}}
148
+
149
+ @app.get("/config_files")
150
+ def list_config_files(path: str = "."):
151
+ try:
152
+ if not os.path.isabs(path):
153
+ path = os.path.abspath(path)
154
+
155
+ if not os.path.exists(path):
156
+ return {"files": []}
157
+
158
+ files = [f for f in os.listdir(path) if f.endswith(".json") and os.path.isfile(os.path.join(path, f))]
159
+ return {"files": files}
160
+ except Exception as e:
161
+ print(f"Error listing config files: {e}")
162
+ return {"files": []}
163
+
164
+ # Serve Static Files (Frontend)
165
+ if os.path.exists(STATIC_DIR):
166
+ app.mount("/assets", StaticFiles(directory=os.path.join(STATIC_DIR, "assets")), name="assets")
167
+
168
+ # Catch-all for SPA routing
169
+ @app.get("/{full_path:path}")
170
+ async def serve_spa(full_path: str):
171
+ file_path = os.path.join(STATIC_DIR, full_path)
172
+ if os.path.isfile(file_path):
173
+ return FileResponse(file_path)
174
+ return FileResponse(os.path.join(STATIC_DIR, "index.html"))
175
+
176
+ def start():
177
+ """Entry point for the CLI tool."""
178
+ global CURRENT_DIRECTORY
179
+
180
+ # CLI Argument Parsing
181
+ if len(sys.argv) > 1:
182
+ path_arg = sys.argv[1]
183
+ if os.path.exists(path_arg):
184
+ CURRENT_DIRECTORY = os.path.abspath(path_arg)
185
+ print(f"Setting project path from CLI: {CURRENT_DIRECTORY}")
186
+ else:
187
+ print(f"Warning: Path '{path_arg}' does not exist. Using defaults.")
188
+ else:
189
+ CURRENT_DIRECTORY = os.getcwd()
190
+ print(f"Using current directory: {CURRENT_DIRECTORY}")
191
+
192
+ def open_browser():
193
+ time.sleep(1.5)
194
+ webbrowser.open("http://localhost:8000")
195
+
196
+ threading.Thread(target=open_browser, daemon=True).start()
197
+
198
+ # Run uvicorn programmatically
199
+ # Note: When running programmatically, reload=True is not supported easily without other hacks
200
+ uvicorn.run(app, host="127.0.0.1", port=8000)
201
+
202
+ if __name__ == "__main__":
203
+ start()