sqlh 0.2.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sqlh-0.2.3/PKG-INFO +338 -0
- sqlh-0.2.3/README.md +328 -0
- sqlh-0.2.3/pyproject.toml +41 -0
- sqlh-0.2.3/sqlh/.DS_Store +0 -0
- sqlh-0.2.3/sqlh/__init__.py +32 -0
- sqlh-0.2.3/sqlh/cli.py +153 -0
- sqlh-0.2.3/sqlh/core/graph.py +385 -0
- sqlh-0.2.3/sqlh/core/helper.py +444 -0
- sqlh-0.2.3/sqlh/core/keywords.py +62 -0
- sqlh-0.2.3/sqlh/static/dagre_template.html +857 -0
- sqlh-0.2.3/sqlh/static/mermaid_template.html +28 -0
- sqlh-0.2.3/sqlh/tests/test_cli.py +3 -0
- sqlh-0.2.3/sqlh/tests/test_graph.py +55 -0
- sqlh-0.2.3/sqlh/tests/test_import.py +16 -0
- sqlh-0.2.3/sqlh/tests/test_sqlhelper.py +36 -0
- sqlh-0.2.3/sqlh/tests/test_utils.py +86 -0
- sqlh-0.2.3/sqlh/utils.py +365 -0
sqlh-0.2.3/PKG-INFO
ADDED
|
@@ -0,0 +1,338 @@
|
|
|
1
|
+
Metadata-Version: 2.3
|
|
2
|
+
Name: sqlh
|
|
3
|
+
Version: 0.2.3
|
|
4
|
+
Summary: A lightweight SQL lineage analysis library for tracking table dependencies in data pipelines
|
|
5
|
+
Keywords: sql,lineage,data-pipeline,dag,dependency,database,etl,data-engineering
|
|
6
|
+
Maintainer: Perry DU
|
|
7
|
+
Maintainer-email: Perry DU <duneite@gmail.com>
|
|
8
|
+
Requires-Python: >=3.9
|
|
9
|
+
Description-Content-Type: text/markdown
|
|
10
|
+
|
|
11
|
+
# mini-sqllineage
|
|
12
|
+
|
|
13
|
+
[](https://pypi.org/project/sqlh/)
|
|
14
|
+
[](https://pypi.org/project/sqlh/)
|
|
15
|
+
[](https://github.com/dupen01/mini-sqllineage/blob/main/LICENSE)
|
|
16
|
+
[](https://github.com/dupen01/mini-sqllineage/actions)
|
|
17
|
+
|
|
18
|
+
A lightweight Python library for analyzing SQL lineage and tracking table dependencies in data pipelines.
|
|
19
|
+
|
|
20
|
+
## Features
|
|
21
|
+
|
|
22
|
+
- 📊 **SQL Lineage Analysis**: Parse SQL statements and extract table dependencies
|
|
23
|
+
- 🔄 **DAG Visualization**: Visualize data lineage as Directed Acyclic Graph
|
|
24
|
+
- 🔍 **Dependency Search**: Find upstream/downstream tables and related dependencies
|
|
25
|
+
- 🎯 **Root/Leaf Detection**: Identify source tables (ODS) and target tables (ADS)
|
|
26
|
+
- 🖥️ **CLI Tool**: Command-line interface for quick analysis
|
|
27
|
+
- 🌐 **Web Visualization**: Interactive web UI for exploring lineage
|
|
28
|
+
- ⚡ **Fast**: Token-based parsing, lightweight and efficient
|
|
29
|
+
|
|
30
|
+
## Installation
|
|
31
|
+
|
|
32
|
+
```bash
|
|
33
|
+
pip install mini-sqllineage
|
|
34
|
+
```
|
|
35
|
+
|
|
36
|
+
## Quick Start
|
|
37
|
+
|
|
38
|
+
### Python API
|
|
39
|
+
|
|
40
|
+
```python
|
|
41
|
+
from sqllineage import (
|
|
42
|
+
get_all_tables,
|
|
43
|
+
get_all_root_tables,
|
|
44
|
+
search_related_upstream_tables,
|
|
45
|
+
)
|
|
46
|
+
|
|
47
|
+
# Get all tables from SQL
|
|
48
|
+
sql = """
|
|
49
|
+
INSERT INTO dwd.user_dim SELECT * FROM ods.user;
|
|
50
|
+
INSERT INTO ads.user_report SELECT * FROM dwd.user_dim;
|
|
51
|
+
"""
|
|
52
|
+
tables = get_all_tables(sql)
|
|
53
|
+
print(tables) # ['ods.user', 'dwd.user_dim', 'ads.user_report']
|
|
54
|
+
|
|
55
|
+
# Get root tables (no upstream dependencies)
|
|
56
|
+
root_tables = get_all_root_tables(sql)
|
|
57
|
+
print(root_tables) # ['ods.user']
|
|
58
|
+
|
|
59
|
+
# Search upstream dependencies
|
|
60
|
+
upstream = search_related_upstream_tables(sql, 'ads.user_report')
|
|
61
|
+
print(upstream[0]) # ['dwd.user_dim', 'ods.user']
|
|
62
|
+
```
|
|
63
|
+
|
|
64
|
+
### CLI Usage
|
|
65
|
+
|
|
66
|
+
```bash
|
|
67
|
+
# List all tables
|
|
68
|
+
sqlh list --all -p /path/to/sql/files
|
|
69
|
+
|
|
70
|
+
# List root tables
|
|
71
|
+
sqlh list --root -p /path/to/sql/files
|
|
72
|
+
|
|
73
|
+
# Search upstream tables
|
|
74
|
+
sqlh search --upstream -t ads.user_report -p /path/to/sql/files
|
|
75
|
+
|
|
76
|
+
# Open web visualization
|
|
77
|
+
sqlh web -p /path/to/sql/files
|
|
78
|
+
```
|
|
79
|
+
|
|
80
|
+
## API Reference
|
|
81
|
+
|
|
82
|
+
### Core Functions
|
|
83
|
+
|
|
84
|
+
| Function | Description |
|
|
85
|
+
|----------|-------------|
|
|
86
|
+
| `get_all_tables(sql)` | Get all tables from SQL statements |
|
|
87
|
+
| `get_all_root_tables(sql)` | Get tables with no upstream dependencies |
|
|
88
|
+
| `get_all_leaf_tables(sql)` | Get tables with no downstream dependencies |
|
|
89
|
+
| `search_related_tables(sql, table)` | Search all related tables (upstream + downstream) |
|
|
90
|
+
| `search_related_upstream_tables(sql, table)` | Search upstream dependencies |
|
|
91
|
+
| `search_related_downstream_tables(sql, table)` | Search downstream dependents |
|
|
92
|
+
| `search_related_root_tables(sql, table)` | Search root tables in the dependency path |
|
|
93
|
+
| `read_sql_from_directory(path)` | Read SQL files from directory |
|
|
94
|
+
|
|
95
|
+
### DagGraph Class
|
|
96
|
+
|
|
97
|
+
```python
|
|
98
|
+
from sqllineage import DagGraph
|
|
99
|
+
|
|
100
|
+
dag = DagGraph()
|
|
101
|
+
dag.add_edge("table_a", "table_b")
|
|
102
|
+
dag.add_edge("table_b", "table_c")
|
|
103
|
+
|
|
104
|
+
# Export to Mermaid
|
|
105
|
+
mermaid_str = dag.to_mermaid()
|
|
106
|
+
|
|
107
|
+
# Export to HTML
|
|
108
|
+
html_content = dag.to_html()
|
|
109
|
+
|
|
110
|
+
# Find upstream/downstream
|
|
111
|
+
upstream = dag.find_upstream("table_c")
|
|
112
|
+
downstream = dag.find_downstream("table_a")
|
|
113
|
+
```
|
|
114
|
+
|
|
115
|
+
## CLI Commands
|
|
116
|
+
|
|
117
|
+
### List Tables
|
|
118
|
+
|
|
119
|
+
```bash
|
|
120
|
+
# Get all tables
|
|
121
|
+
sqlh list --all -p /path/to/sql/files
|
|
122
|
+
|
|
123
|
+
# Get root tables
|
|
124
|
+
sqlh list --root -p /path/to/sql/files
|
|
125
|
+
|
|
126
|
+
# Get leaf tables
|
|
127
|
+
sqlh list --leaf -p /path/to/sql/files
|
|
128
|
+
|
|
129
|
+
# Output formats
|
|
130
|
+
sqlh list --all -p /path/to/sql/files --output-format json
|
|
131
|
+
sqlh list --all -p /path/to/sql/files --output-format text
|
|
132
|
+
```
|
|
133
|
+
|
|
134
|
+
### Search Tables
|
|
135
|
+
|
|
136
|
+
```bash
|
|
137
|
+
# Search upstream tables
|
|
138
|
+
sqlh search --upstream -t table_name -p /path/to/sql/files
|
|
139
|
+
|
|
140
|
+
# Search downstream tables
|
|
141
|
+
sqlh search --downstream -t table_name -p /path/to/sql/files
|
|
142
|
+
|
|
143
|
+
# Search all related tables
|
|
144
|
+
sqlh search --all -t table_name -p /path/to/sql/files
|
|
145
|
+
|
|
146
|
+
# Search root tables
|
|
147
|
+
sqlh search --root -t table_name -p /path/to/sql/files
|
|
148
|
+
```
|
|
149
|
+
|
|
150
|
+
### Web Visualization
|
|
151
|
+
|
|
152
|
+
```bash
|
|
153
|
+
# Open web server
|
|
154
|
+
sqlh web -p /path/to/sql/files
|
|
155
|
+
|
|
156
|
+
# Specify HTML output path
|
|
157
|
+
sqlh web -p /path/to/sql/files --html-path ./custom.html
|
|
158
|
+
```
|
|
159
|
+
|
|
160
|
+
## Output Formats
|
|
161
|
+
|
|
162
|
+
### JSON Format
|
|
163
|
+
|
|
164
|
+
```json
|
|
165
|
+
{
|
|
166
|
+
"status": "ok",
|
|
167
|
+
"command": "list-tables",
|
|
168
|
+
"tables": ["table1", "table2"],
|
|
169
|
+
"meta": {
|
|
170
|
+
"table_count": 2
|
|
171
|
+
}
|
|
172
|
+
}
|
|
173
|
+
```
|
|
174
|
+
|
|
175
|
+
### Text Format
|
|
176
|
+
|
|
177
|
+
```
|
|
178
|
+
table1
|
|
179
|
+
table2
|
|
180
|
+
```
|
|
181
|
+
|
|
182
|
+
### Mermaid Format
|
|
183
|
+
|
|
184
|
+
```mermaid
|
|
185
|
+
graph LR
|
|
186
|
+
table1 --> table2
|
|
187
|
+
table2 --> table3
|
|
188
|
+
```
|
|
189
|
+
|
|
190
|
+
## Supported SQL Statements
|
|
191
|
+
|
|
192
|
+
- `SELECT` queries
|
|
193
|
+
- `INSERT INTO ... SELECT` statements
|
|
194
|
+
- `CREATE TABLE AS SELECT` (CTAS)
|
|
195
|
+
- `WITH ... AS` (CTE)
|
|
196
|
+
- `JOIN` operations
|
|
197
|
+
- Subqueries
|
|
198
|
+
|
|
199
|
+
## Development
|
|
200
|
+
|
|
201
|
+
### Setup
|
|
202
|
+
|
|
203
|
+
```bash
|
|
204
|
+
# Clone the repository
|
|
205
|
+
git clone https://github.com/yourusername/mini-sqllineage.git
|
|
206
|
+
cd mini-sqllineage
|
|
207
|
+
|
|
208
|
+
# Install in development mode
|
|
209
|
+
pip install -e ".[dev]"
|
|
210
|
+
|
|
211
|
+
# Run tests
|
|
212
|
+
pytest
|
|
213
|
+
|
|
214
|
+
# Run linting
|
|
215
|
+
ruff check .
|
|
216
|
+
|
|
217
|
+
# Run type checking
|
|
218
|
+
mypy sqllineage
|
|
219
|
+
```
|
|
220
|
+
|
|
221
|
+
### Project Structure
|
|
222
|
+
|
|
223
|
+
```
|
|
224
|
+
mini-sqllineage/
|
|
225
|
+
├── sqllineage/
|
|
226
|
+
│ ├── __init__.py
|
|
227
|
+
│ ├── cli.py # Command-line interface
|
|
228
|
+
│ ├── utils.py # Utility functions
|
|
229
|
+
│ └── core/
|
|
230
|
+
│ ├── graph.py # DAG implementation
|
|
231
|
+
│ ├── helper.py # SQL parser
|
|
232
|
+
│ └── keywords.py # SQL keywords
|
|
233
|
+
├── tests/ # Test suite
|
|
234
|
+
├── static/ # Web visualization templates
|
|
235
|
+
└── README.md
|
|
236
|
+
```
|
|
237
|
+
|
|
238
|
+
## Contributing
|
|
239
|
+
|
|
240
|
+
Contributions are welcome! Please feel free to submit a Pull Request.
|
|
241
|
+
|
|
242
|
+
## License
|
|
243
|
+
|
|
244
|
+
This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
|
|
245
|
+
|
|
246
|
+
## Changelog
|
|
247
|
+
|
|
248
|
+
See [CHANGELOG.md](CHANGELOG.md) for a list of changes.
|
|
249
|
+
|
|
250
|
+
## TODO
|
|
251
|
+
|
|
252
|
+
- [ ] Fuzzy search for table names with suggestions
|
|
253
|
+
- [ ] Support for more SQL dialects (PostgreSQL, MySQL, etc.)
|
|
254
|
+
- [ ] Database schema import (DESCRIBE TABLE)
|
|
255
|
+
- [ ] Column-level lineage tracking
|
|
256
|
+
- [ ] CI/CD pipeline configuration
|
|
257
|
+
|
|
258
|
+
## Acknowledgments
|
|
259
|
+
|
|
260
|
+
- [Cytoscape.js](https://js.cytoscape.org/) - Graph visualization library
|
|
261
|
+
- [dagre.js](https://github.com/dagrejs/dagre) - Graph layout algorithm
|
|
262
|
+
- [Mermaid.js](https://mermaid.js.org/) - Diagram generation
|
|
263
|
+
|
|
264
|
+
---
|
|
265
|
+
|
|
266
|
+
# TODO
|
|
267
|
+
- [] 搜索指定表时考虑模糊匹配,当表名不存在时,返回提示或者给出可能的表名(相似度)
|
|
268
|
+
|
|
269
|
+
## CLI
|
|
270
|
+
|
|
271
|
+
### output 格式:
|
|
272
|
+
- json: 默认格式, 输出 json 格式
|
|
273
|
+
|
|
274
|
+
1. 搜索命令的输出
|
|
275
|
+
```json
|
|
276
|
+
{
|
|
277
|
+
"status": "ok",
|
|
278
|
+
"command": "search-table",
|
|
279
|
+
"data": {
|
|
280
|
+
"nodes": [
|
|
281
|
+
{"id": "table1", "label": "ods"},
|
|
282
|
+
{"id": "table2", "label": "dwd"},
|
|
283
|
+
],
|
|
284
|
+
"edges": [
|
|
285
|
+
{"source": "table1", "target": "table2"}
|
|
286
|
+
]
|
|
287
|
+
},
|
|
288
|
+
"mermaid": "graph LR\n table1 --> table2",
|
|
289
|
+
"meta": {
|
|
290
|
+
"node_count": 2
|
|
291
|
+
}
|
|
292
|
+
}
|
|
293
|
+
```
|
|
294
|
+
|
|
295
|
+
|
|
296
|
+
2. 列举命令的输出
|
|
297
|
+
```json
|
|
298
|
+
{
|
|
299
|
+
"status": "ok",
|
|
300
|
+
"command": "list-tables",
|
|
301
|
+
"tables": [
|
|
302
|
+
"table1",
|
|
303
|
+
"table2"
|
|
304
|
+
],
|
|
305
|
+
"meta": {
|
|
306
|
+
"table_count": 2
|
|
307
|
+
}
|
|
308
|
+
}
|
|
309
|
+
|
|
310
|
+
```
|
|
311
|
+
|
|
312
|
+
### CLI 参数
|
|
313
|
+
```bash
|
|
314
|
+
# 获取所有表名
|
|
315
|
+
sqlh list --all --path </path/to/sql-files> --output-format <json|text>
|
|
316
|
+
|
|
317
|
+
# 获取所有 root 表名
|
|
318
|
+
sqlh list --root --path </path/to/sql-files> --output-format <json|text>
|
|
319
|
+
|
|
320
|
+
# 获取所有 leaf 表名
|
|
321
|
+
sqlh list --leaf --path </path/to/sql-files> --output-format <json|text>
|
|
322
|
+
|
|
323
|
+
# 搜索指定表的root 表名
|
|
324
|
+
sqlh search --root --path </path/to/sql-files> --table <table-name> --output-format <json|text>
|
|
325
|
+
|
|
326
|
+
# 搜索指定表的所有上游表名
|
|
327
|
+
sqlh search --upstream --path </path/to/sql-files> --table <table-name> --output-format <json|web|text>
|
|
328
|
+
|
|
329
|
+
# 搜索指定表的所有下游表名
|
|
330
|
+
sqlh search --downstream --path </path/to/sql-files> --table <table-name> --output-format <json|web|text>
|
|
331
|
+
|
|
332
|
+
|
|
333
|
+
# 搜索指定表的所有相关表
|
|
334
|
+
sqlh search --all --path </path/to/sql-files> --table <table-name> --output-format <json|web|text>
|
|
335
|
+
|
|
336
|
+
# 打开全部血缘关系图 web
|
|
337
|
+
sqlh web --path </path/to/sql-files>
|
|
338
|
+
```
|
sqlh-0.2.3/README.md
ADDED
|
@@ -0,0 +1,328 @@
|
|
|
1
|
+
# mini-sqllineage
|
|
2
|
+
|
|
3
|
+
[](https://pypi.org/project/sqlh/)
|
|
4
|
+
[](https://pypi.org/project/sqlh/)
|
|
5
|
+
[](https://github.com/dupen01/mini-sqllineage/blob/main/LICENSE)
|
|
6
|
+
[](https://github.com/dupen01/mini-sqllineage/actions)
|
|
7
|
+
|
|
8
|
+
A lightweight Python library for analyzing SQL lineage and tracking table dependencies in data pipelines.
|
|
9
|
+
|
|
10
|
+
## Features
|
|
11
|
+
|
|
12
|
+
- 📊 **SQL Lineage Analysis**: Parse SQL statements and extract table dependencies
|
|
13
|
+
- 🔄 **DAG Visualization**: Visualize data lineage as Directed Acyclic Graph
|
|
14
|
+
- 🔍 **Dependency Search**: Find upstream/downstream tables and related dependencies
|
|
15
|
+
- 🎯 **Root/Leaf Detection**: Identify source tables (ODS) and target tables (ADS)
|
|
16
|
+
- 🖥️ **CLI Tool**: Command-line interface for quick analysis
|
|
17
|
+
- 🌐 **Web Visualization**: Interactive web UI for exploring lineage
|
|
18
|
+
- ⚡ **Fast**: Token-based parsing, lightweight and efficient
|
|
19
|
+
|
|
20
|
+
## Installation
|
|
21
|
+
|
|
22
|
+
```bash
|
|
23
|
+
pip install mini-sqllineage
|
|
24
|
+
```
|
|
25
|
+
|
|
26
|
+
## Quick Start
|
|
27
|
+
|
|
28
|
+
### Python API
|
|
29
|
+
|
|
30
|
+
```python
|
|
31
|
+
from sqllineage import (
|
|
32
|
+
get_all_tables,
|
|
33
|
+
get_all_root_tables,
|
|
34
|
+
search_related_upstream_tables,
|
|
35
|
+
)
|
|
36
|
+
|
|
37
|
+
# Get all tables from SQL
|
|
38
|
+
sql = """
|
|
39
|
+
INSERT INTO dwd.user_dim SELECT * FROM ods.user;
|
|
40
|
+
INSERT INTO ads.user_report SELECT * FROM dwd.user_dim;
|
|
41
|
+
"""
|
|
42
|
+
tables = get_all_tables(sql)
|
|
43
|
+
print(tables) # ['ods.user', 'dwd.user_dim', 'ads.user_report']
|
|
44
|
+
|
|
45
|
+
# Get root tables (no upstream dependencies)
|
|
46
|
+
root_tables = get_all_root_tables(sql)
|
|
47
|
+
print(root_tables) # ['ods.user']
|
|
48
|
+
|
|
49
|
+
# Search upstream dependencies
|
|
50
|
+
upstream = search_related_upstream_tables(sql, 'ads.user_report')
|
|
51
|
+
print(upstream[0]) # ['dwd.user_dim', 'ods.user']
|
|
52
|
+
```
|
|
53
|
+
|
|
54
|
+
### CLI Usage
|
|
55
|
+
|
|
56
|
+
```bash
|
|
57
|
+
# List all tables
|
|
58
|
+
sqlh list --all -p /path/to/sql/files
|
|
59
|
+
|
|
60
|
+
# List root tables
|
|
61
|
+
sqlh list --root -p /path/to/sql/files
|
|
62
|
+
|
|
63
|
+
# Search upstream tables
|
|
64
|
+
sqlh search --upstream -t ads.user_report -p /path/to/sql/files
|
|
65
|
+
|
|
66
|
+
# Open web visualization
|
|
67
|
+
sqlh web -p /path/to/sql/files
|
|
68
|
+
```
|
|
69
|
+
|
|
70
|
+
## API Reference
|
|
71
|
+
|
|
72
|
+
### Core Functions
|
|
73
|
+
|
|
74
|
+
| Function | Description |
|
|
75
|
+
|----------|-------------|
|
|
76
|
+
| `get_all_tables(sql)` | Get all tables from SQL statements |
|
|
77
|
+
| `get_all_root_tables(sql)` | Get tables with no upstream dependencies |
|
|
78
|
+
| `get_all_leaf_tables(sql)` | Get tables with no downstream dependencies |
|
|
79
|
+
| `search_related_tables(sql, table)` | Search all related tables (upstream + downstream) |
|
|
80
|
+
| `search_related_upstream_tables(sql, table)` | Search upstream dependencies |
|
|
81
|
+
| `search_related_downstream_tables(sql, table)` | Search downstream dependents |
|
|
82
|
+
| `search_related_root_tables(sql, table)` | Search root tables in the dependency path |
|
|
83
|
+
| `read_sql_from_directory(path)` | Read SQL files from directory |
|
|
84
|
+
|
|
85
|
+
### DagGraph Class
|
|
86
|
+
|
|
87
|
+
```python
|
|
88
|
+
from sqllineage import DagGraph
|
|
89
|
+
|
|
90
|
+
dag = DagGraph()
|
|
91
|
+
dag.add_edge("table_a", "table_b")
|
|
92
|
+
dag.add_edge("table_b", "table_c")
|
|
93
|
+
|
|
94
|
+
# Export to Mermaid
|
|
95
|
+
mermaid_str = dag.to_mermaid()
|
|
96
|
+
|
|
97
|
+
# Export to HTML
|
|
98
|
+
html_content = dag.to_html()
|
|
99
|
+
|
|
100
|
+
# Find upstream/downstream
|
|
101
|
+
upstream = dag.find_upstream("table_c")
|
|
102
|
+
downstream = dag.find_downstream("table_a")
|
|
103
|
+
```
|
|
104
|
+
|
|
105
|
+
## CLI Commands
|
|
106
|
+
|
|
107
|
+
### List Tables
|
|
108
|
+
|
|
109
|
+
```bash
|
|
110
|
+
# Get all tables
|
|
111
|
+
sqlh list --all -p /path/to/sql/files
|
|
112
|
+
|
|
113
|
+
# Get root tables
|
|
114
|
+
sqlh list --root -p /path/to/sql/files
|
|
115
|
+
|
|
116
|
+
# Get leaf tables
|
|
117
|
+
sqlh list --leaf -p /path/to/sql/files
|
|
118
|
+
|
|
119
|
+
# Output formats
|
|
120
|
+
sqlh list --all -p /path/to/sql/files --output-format json
|
|
121
|
+
sqlh list --all -p /path/to/sql/files --output-format text
|
|
122
|
+
```
|
|
123
|
+
|
|
124
|
+
### Search Tables
|
|
125
|
+
|
|
126
|
+
```bash
|
|
127
|
+
# Search upstream tables
|
|
128
|
+
sqlh search --upstream -t table_name -p /path/to/sql/files
|
|
129
|
+
|
|
130
|
+
# Search downstream tables
|
|
131
|
+
sqlh search --downstream -t table_name -p /path/to/sql/files
|
|
132
|
+
|
|
133
|
+
# Search all related tables
|
|
134
|
+
sqlh search --all -t table_name -p /path/to/sql/files
|
|
135
|
+
|
|
136
|
+
# Search root tables
|
|
137
|
+
sqlh search --root -t table_name -p /path/to/sql/files
|
|
138
|
+
```
|
|
139
|
+
|
|
140
|
+
### Web Visualization
|
|
141
|
+
|
|
142
|
+
```bash
|
|
143
|
+
# Open web server
|
|
144
|
+
sqlh web -p /path/to/sql/files
|
|
145
|
+
|
|
146
|
+
# Specify HTML output path
|
|
147
|
+
sqlh web -p /path/to/sql/files --html-path ./custom.html
|
|
148
|
+
```
|
|
149
|
+
|
|
150
|
+
## Output Formats
|
|
151
|
+
|
|
152
|
+
### JSON Format
|
|
153
|
+
|
|
154
|
+
```json
|
|
155
|
+
{
|
|
156
|
+
"status": "ok",
|
|
157
|
+
"command": "list-tables",
|
|
158
|
+
"tables": ["table1", "table2"],
|
|
159
|
+
"meta": {
|
|
160
|
+
"table_count": 2
|
|
161
|
+
}
|
|
162
|
+
}
|
|
163
|
+
```
|
|
164
|
+
|
|
165
|
+
### Text Format
|
|
166
|
+
|
|
167
|
+
```
|
|
168
|
+
table1
|
|
169
|
+
table2
|
|
170
|
+
```
|
|
171
|
+
|
|
172
|
+
### Mermaid Format
|
|
173
|
+
|
|
174
|
+
```mermaid
|
|
175
|
+
graph LR
|
|
176
|
+
table1 --> table2
|
|
177
|
+
table2 --> table3
|
|
178
|
+
```
|
|
179
|
+
|
|
180
|
+
## Supported SQL Statements
|
|
181
|
+
|
|
182
|
+
- `SELECT` queries
|
|
183
|
+
- `INSERT INTO ... SELECT` statements
|
|
184
|
+
- `CREATE TABLE AS SELECT` (CTAS)
|
|
185
|
+
- `WITH ... AS` (CTE)
|
|
186
|
+
- `JOIN` operations
|
|
187
|
+
- Subqueries
|
|
188
|
+
|
|
189
|
+
## Development
|
|
190
|
+
|
|
191
|
+
### Setup
|
|
192
|
+
|
|
193
|
+
```bash
|
|
194
|
+
# Clone the repository
|
|
195
|
+
git clone https://github.com/yourusername/mini-sqllineage.git
|
|
196
|
+
cd mini-sqllineage
|
|
197
|
+
|
|
198
|
+
# Install in development mode
|
|
199
|
+
pip install -e ".[dev]"
|
|
200
|
+
|
|
201
|
+
# Run tests
|
|
202
|
+
pytest
|
|
203
|
+
|
|
204
|
+
# Run linting
|
|
205
|
+
ruff check .
|
|
206
|
+
|
|
207
|
+
# Run type checking
|
|
208
|
+
mypy sqllineage
|
|
209
|
+
```
|
|
210
|
+
|
|
211
|
+
### Project Structure
|
|
212
|
+
|
|
213
|
+
```
|
|
214
|
+
mini-sqllineage/
|
|
215
|
+
├── sqllineage/
|
|
216
|
+
│ ├── __init__.py
|
|
217
|
+
│ ├── cli.py # Command-line interface
|
|
218
|
+
│ ├── utils.py # Utility functions
|
|
219
|
+
│ └── core/
|
|
220
|
+
│ ├── graph.py # DAG implementation
|
|
221
|
+
│ ├── helper.py # SQL parser
|
|
222
|
+
│ └── keywords.py # SQL keywords
|
|
223
|
+
├── tests/ # Test suite
|
|
224
|
+
├── static/ # Web visualization templates
|
|
225
|
+
└── README.md
|
|
226
|
+
```
|
|
227
|
+
|
|
228
|
+
## Contributing
|
|
229
|
+
|
|
230
|
+
Contributions are welcome! Please feel free to submit a Pull Request.
|
|
231
|
+
|
|
232
|
+
## License
|
|
233
|
+
|
|
234
|
+
This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
|
|
235
|
+
|
|
236
|
+
## Changelog
|
|
237
|
+
|
|
238
|
+
See [CHANGELOG.md](CHANGELOG.md) for a list of changes.
|
|
239
|
+
|
|
240
|
+
## TODO
|
|
241
|
+
|
|
242
|
+
- [ ] Fuzzy search for table names with suggestions
|
|
243
|
+
- [ ] Support for more SQL dialects (PostgreSQL, MySQL, etc.)
|
|
244
|
+
- [ ] Database schema import (DESCRIBE TABLE)
|
|
245
|
+
- [ ] Column-level lineage tracking
|
|
246
|
+
- [ ] CI/CD pipeline configuration
|
|
247
|
+
|
|
248
|
+
## Acknowledgments
|
|
249
|
+
|
|
250
|
+
- [Cytoscape.js](https://js.cytoscape.org/) - Graph visualization library
|
|
251
|
+
- [dagre.js](https://github.com/dagrejs/dagre) - Graph layout algorithm
|
|
252
|
+
- [Mermaid.js](https://mermaid.js.org/) - Diagram generation
|
|
253
|
+
|
|
254
|
+
---
|
|
255
|
+
|
|
256
|
+
# TODO
|
|
257
|
+
- [] 搜索指定表时考虑模糊匹配,当表名不存在时,返回提示或者给出可能的表名(相似度)
|
|
258
|
+
|
|
259
|
+
## CLI
|
|
260
|
+
|
|
261
|
+
### output 格式:
|
|
262
|
+
- json: 默认格式, 输出 json 格式
|
|
263
|
+
|
|
264
|
+
1. 搜索命令的输出
|
|
265
|
+
```json
|
|
266
|
+
{
|
|
267
|
+
"status": "ok",
|
|
268
|
+
"command": "search-table",
|
|
269
|
+
"data": {
|
|
270
|
+
"nodes": [
|
|
271
|
+
{"id": "table1", "label": "ods"},
|
|
272
|
+
{"id": "table2", "label": "dwd"},
|
|
273
|
+
],
|
|
274
|
+
"edges": [
|
|
275
|
+
{"source": "table1", "target": "table2"}
|
|
276
|
+
]
|
|
277
|
+
},
|
|
278
|
+
"mermaid": "graph LR\n table1 --> table2",
|
|
279
|
+
"meta": {
|
|
280
|
+
"node_count": 2
|
|
281
|
+
}
|
|
282
|
+
}
|
|
283
|
+
```
|
|
284
|
+
|
|
285
|
+
|
|
286
|
+
2. 列举命令的输出
|
|
287
|
+
```json
|
|
288
|
+
{
|
|
289
|
+
"status": "ok",
|
|
290
|
+
"command": "list-tables",
|
|
291
|
+
"tables": [
|
|
292
|
+
"table1",
|
|
293
|
+
"table2"
|
|
294
|
+
],
|
|
295
|
+
"meta": {
|
|
296
|
+
"table_count": 2
|
|
297
|
+
}
|
|
298
|
+
}
|
|
299
|
+
|
|
300
|
+
```
|
|
301
|
+
|
|
302
|
+
### CLI 参数
|
|
303
|
+
```bash
|
|
304
|
+
# 获取所有表名
|
|
305
|
+
sqlh list --all --path </path/to/sql-files> --output-format <json|text>
|
|
306
|
+
|
|
307
|
+
# 获取所有 root 表名
|
|
308
|
+
sqlh list --root --path </path/to/sql-files> --output-format <json|text>
|
|
309
|
+
|
|
310
|
+
# 获取所有 leaf 表名
|
|
311
|
+
sqlh list --leaf --path </path/to/sql-files> --output-format <json|text>
|
|
312
|
+
|
|
313
|
+
# 搜索指定表的root 表名
|
|
314
|
+
sqlh search --root --path </path/to/sql-files> --table <table-name> --output-format <json|text>
|
|
315
|
+
|
|
316
|
+
# 搜索指定表的所有上游表名
|
|
317
|
+
sqlh search --upstream --path </path/to/sql-files> --table <table-name> --output-format <json|web|text>
|
|
318
|
+
|
|
319
|
+
# 搜索指定表的所有下游表名
|
|
320
|
+
sqlh search --downstream --path </path/to/sql-files> --table <table-name> --output-format <json|web|text>
|
|
321
|
+
|
|
322
|
+
|
|
323
|
+
# 搜索指定表的所有相关表
|
|
324
|
+
sqlh search --all --path </path/to/sql-files> --table <table-name> --output-format <json|web|text>
|
|
325
|
+
|
|
326
|
+
# 打开全部血缘关系图 web
|
|
327
|
+
sqlh web --path </path/to/sql-files>
|
|
328
|
+
```
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
[project]
|
|
2
|
+
name = "sqlh"
|
|
3
|
+
version = "0.2.3"
|
|
4
|
+
# dynamic = ["version"]
|
|
5
|
+
maintainers = [
|
|
6
|
+
{name = "Perry DU", email = "duneite@gmail.com"}
|
|
7
|
+
]
|
|
8
|
+
description = "A lightweight SQL lineage analysis library for tracking table dependencies in data pipelines"
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
requires-python = ">=3.9"
|
|
11
|
+
keywords = [
|
|
12
|
+
"sql",
|
|
13
|
+
"lineage",
|
|
14
|
+
"data-pipeline",
|
|
15
|
+
"dag",
|
|
16
|
+
"dependency",
|
|
17
|
+
"database",
|
|
18
|
+
"etl",
|
|
19
|
+
"data-engineering"
|
|
20
|
+
]
|
|
21
|
+
|
|
22
|
+
dependencies = []
|
|
23
|
+
|
|
24
|
+
[project.scripts]
|
|
25
|
+
sqlh = "sqlh.cli:main"
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
[dependency-groups]
|
|
29
|
+
dev = [
|
|
30
|
+
"pytest>=9.0.2",
|
|
31
|
+
]
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
[build-system]
|
|
35
|
+
requires = ["uv_build>=0.10.9,<0.11.0"]
|
|
36
|
+
build-backend = "uv_build"
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
[tool.uv.build-backend]
|
|
40
|
+
module-root = ""
|
|
41
|
+
module-name = "sqlh"
|
|
Binary file
|