sqlh 0.2.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
sqlh-0.2.3/PKG-INFO ADDED
@@ -0,0 +1,338 @@
1
+ Metadata-Version: 2.3
2
+ Name: sqlh
3
+ Version: 0.2.3
4
+ Summary: A lightweight SQL lineage analysis library for tracking table dependencies in data pipelines
5
+ Keywords: sql,lineage,data-pipeline,dag,dependency,database,etl,data-engineering
6
+ Maintainer: Perry DU
7
+ Maintainer-email: Perry DU <duneite@gmail.com>
8
+ Requires-Python: >=3.9
9
+ Description-Content-Type: text/markdown
10
+
11
+ # mini-sqllineage
12
+
13
+ [![PyPI version](https://img.shields.io/pypi/v/sqlh.svg)](https://pypi.org/project/sqlh/)
14
+ [![Python version](https://img.shields.io/pypi/pyversions/sqlh.svg)](https://pypi.org/project/sqlh/)
15
+ [![License](https://img.shields.io/pypi/l/sqlh.svg)](https://github.com/dupen01/mini-sqllineage/blob/main/LICENSE)
16
+ [![Tests](https://img.shields.io/badge/tests-passing-brightgreen.svg)](https://github.com/dupen01/mini-sqllineage/actions)
17
+
18
+ A lightweight Python library for analyzing SQL lineage and tracking table dependencies in data pipelines.
19
+
20
+ ## Features
21
+
22
+ - 📊 **SQL Lineage Analysis**: Parse SQL statements and extract table dependencies
23
+ - 🔄 **DAG Visualization**: Visualize data lineage as Directed Acyclic Graph
24
+ - 🔍 **Dependency Search**: Find upstream/downstream tables and related dependencies
25
+ - 🎯 **Root/Leaf Detection**: Identify source tables (ODS) and target tables (ADS)
26
+ - 🖥️ **CLI Tool**: Command-line interface for quick analysis
27
+ - 🌐 **Web Visualization**: Interactive web UI for exploring lineage
28
+ - ⚡ **Fast**: Token-based parsing, lightweight and efficient
29
+
30
+ ## Installation
31
+
32
+ ```bash
33
+ pip install mini-sqllineage
34
+ ```
35
+
36
+ ## Quick Start
37
+
38
+ ### Python API
39
+
40
+ ```python
41
+ from sqllineage import (
42
+ get_all_tables,
43
+ get_all_root_tables,
44
+ search_related_upstream_tables,
45
+ )
46
+
47
+ # Get all tables from SQL
48
+ sql = """
49
+ INSERT INTO dwd.user_dim SELECT * FROM ods.user;
50
+ INSERT INTO ads.user_report SELECT * FROM dwd.user_dim;
51
+ """
52
+ tables = get_all_tables(sql)
53
+ print(tables) # ['ods.user', 'dwd.user_dim', 'ads.user_report']
54
+
55
+ # Get root tables (no upstream dependencies)
56
+ root_tables = get_all_root_tables(sql)
57
+ print(root_tables) # ['ods.user']
58
+
59
+ # Search upstream dependencies
60
+ upstream = search_related_upstream_tables(sql, 'ads.user_report')
61
+ print(upstream[0]) # ['dwd.user_dim', 'ods.user']
62
+ ```
63
+
64
+ ### CLI Usage
65
+
66
+ ```bash
67
+ # List all tables
68
+ sqlh list --all -p /path/to/sql/files
69
+
70
+ # List root tables
71
+ sqlh list --root -p /path/to/sql/files
72
+
73
+ # Search upstream tables
74
+ sqlh search --upstream -t ads.user_report -p /path/to/sql/files
75
+
76
+ # Open web visualization
77
+ sqlh web -p /path/to/sql/files
78
+ ```
79
+
80
+ ## API Reference
81
+
82
+ ### Core Functions
83
+
84
+ | Function | Description |
85
+ |----------|-------------|
86
+ | `get_all_tables(sql)` | Get all tables from SQL statements |
87
+ | `get_all_root_tables(sql)` | Get tables with no upstream dependencies |
88
+ | `get_all_leaf_tables(sql)` | Get tables with no downstream dependencies |
89
+ | `search_related_tables(sql, table)` | Search all related tables (upstream + downstream) |
90
+ | `search_related_upstream_tables(sql, table)` | Search upstream dependencies |
91
+ | `search_related_downstream_tables(sql, table)` | Search downstream dependents |
92
+ | `search_related_root_tables(sql, table)` | Search root tables in the dependency path |
93
+ | `read_sql_from_directory(path)` | Read SQL files from directory |
94
+
95
+ ### DagGraph Class
96
+
97
+ ```python
98
+ from sqllineage import DagGraph
99
+
100
+ dag = DagGraph()
101
+ dag.add_edge("table_a", "table_b")
102
+ dag.add_edge("table_b", "table_c")
103
+
104
+ # Export to Mermaid
105
+ mermaid_str = dag.to_mermaid()
106
+
107
+ # Export to HTML
108
+ html_content = dag.to_html()
109
+
110
+ # Find upstream/downstream
111
+ upstream = dag.find_upstream("table_c")
112
+ downstream = dag.find_downstream("table_a")
113
+ ```
114
+
115
+ ## CLI Commands
116
+
117
+ ### List Tables
118
+
119
+ ```bash
120
+ # Get all tables
121
+ sqlh list --all -p /path/to/sql/files
122
+
123
+ # Get root tables
124
+ sqlh list --root -p /path/to/sql/files
125
+
126
+ # Get leaf tables
127
+ sqlh list --leaf -p /path/to/sql/files
128
+
129
+ # Output formats
130
+ sqlh list --all -p /path/to/sql/files --output-format json
131
+ sqlh list --all -p /path/to/sql/files --output-format text
132
+ ```
133
+
134
+ ### Search Tables
135
+
136
+ ```bash
137
+ # Search upstream tables
138
+ sqlh search --upstream -t table_name -p /path/to/sql/files
139
+
140
+ # Search downstream tables
141
+ sqlh search --downstream -t table_name -p /path/to/sql/files
142
+
143
+ # Search all related tables
144
+ sqlh search --all -t table_name -p /path/to/sql/files
145
+
146
+ # Search root tables
147
+ sqlh search --root -t table_name -p /path/to/sql/files
148
+ ```
149
+
150
+ ### Web Visualization
151
+
152
+ ```bash
153
+ # Open web server
154
+ sqlh web -p /path/to/sql/files
155
+
156
+ # Specify HTML output path
157
+ sqlh web -p /path/to/sql/files --html-path ./custom.html
158
+ ```
159
+
160
+ ## Output Formats
161
+
162
+ ### JSON Format
163
+
164
+ ```json
165
+ {
166
+ "status": "ok",
167
+ "command": "list-tables",
168
+ "tables": ["table1", "table2"],
169
+ "meta": {
170
+ "table_count": 2
171
+ }
172
+ }
173
+ ```
174
+
175
+ ### Text Format
176
+
177
+ ```
178
+ table1
179
+ table2
180
+ ```
181
+
182
+ ### Mermaid Format
183
+
184
+ ```mermaid
185
+ graph LR
186
+ table1 --> table2
187
+ table2 --> table3
188
+ ```
189
+
190
+ ## Supported SQL Statements
191
+
192
+ - `SELECT` queries
193
+ - `INSERT INTO ... SELECT` statements
194
+ - `CREATE TABLE AS SELECT` (CTAS)
195
+ - `WITH ... AS` (CTE)
196
+ - `JOIN` operations
197
+ - Subqueries
198
+
199
+ ## Development
200
+
201
+ ### Setup
202
+
203
+ ```bash
204
+ # Clone the repository
205
+ git clone https://github.com/yourusername/mini-sqllineage.git
206
+ cd mini-sqllineage
207
+
208
+ # Install in development mode
209
+ pip install -e ".[dev]"
210
+
211
+ # Run tests
212
+ pytest
213
+
214
+ # Run linting
215
+ ruff check .
216
+
217
+ # Run type checking
218
+ mypy sqllineage
219
+ ```
220
+
221
+ ### Project Structure
222
+
223
+ ```
224
+ mini-sqllineage/
225
+ ├── sqllineage/
226
+ │ ├── __init__.py
227
+ │ ├── cli.py # Command-line interface
228
+ │ ├── utils.py # Utility functions
229
+ │ └── core/
230
+ │ ├── graph.py # DAG implementation
231
+ │ ├── helper.py # SQL parser
232
+ │ └── keywords.py # SQL keywords
233
+ ├── tests/ # Test suite
234
+ ├── static/ # Web visualization templates
235
+ └── README.md
236
+ ```
237
+
238
+ ## Contributing
239
+
240
+ Contributions are welcome! Please feel free to submit a Pull Request.
241
+
242
+ ## License
243
+
244
+ This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
245
+
246
+ ## Changelog
247
+
248
+ See [CHANGELOG.md](CHANGELOG.md) for a list of changes.
249
+
250
+ ## TODO
251
+
252
+ - [ ] Fuzzy search for table names with suggestions
253
+ - [ ] Support for more SQL dialects (PostgreSQL, MySQL, etc.)
254
+ - [ ] Database schema import (DESCRIBE TABLE)
255
+ - [ ] Column-level lineage tracking
256
+ - [ ] CI/CD pipeline configuration
257
+
258
+ ## Acknowledgments
259
+
260
+ - [Cytoscape.js](https://js.cytoscape.org/) - Graph visualization library
261
+ - [dagre.js](https://github.com/dagrejs/dagre) - Graph layout algorithm
262
+ - [Mermaid.js](https://mermaid.js.org/) - Diagram generation
263
+
264
+ ---
265
+
266
+ # TODO
267
+ - [] 搜索指定表时考虑模糊匹配,当表名不存在时,返回提示或者给出可能的表名(相似度)
268
+
269
+ ## CLI
270
+
271
+ ### output 格式:
272
+ - json: 默认格式, 输出 json 格式
273
+
274
+ 1. 搜索命令的输出
275
+ ```json
276
+ {
277
+ "status": "ok",
278
+ "command": "search-table",
279
+ "data": {
280
+ "nodes": [
281
+ {"id": "table1", "label": "ods"},
282
+ {"id": "table2", "label": "dwd"},
283
+ ],
284
+ "edges": [
285
+ {"source": "table1", "target": "table2"}
286
+ ]
287
+ },
288
+ "mermaid": "graph LR\n table1 --> table2",
289
+ "meta": {
290
+ "node_count": 2
291
+ }
292
+ }
293
+ ```
294
+
295
+
296
+ 2. 列举命令的输出
297
+ ```json
298
+ {
299
+ "status": "ok",
300
+ "command": "list-tables",
301
+ "tables": [
302
+ "table1",
303
+ "table2"
304
+ ],
305
+ "meta": {
306
+ "table_count": 2
307
+ }
308
+ }
309
+
310
+ ```
311
+
312
+ ### CLI 参数
313
+ ```bash
314
+ # 获取所有表名
315
+ sqlh list --all --path </path/to/sql-files> --output-format <json|text>
316
+
317
+ # 获取所有 root 表名
318
+ sqlh list --root --path </path/to/sql-files> --output-format <json|text>
319
+
320
+ # 获取所有 leaf 表名
321
+ sqlh list --leaf --path </path/to/sql-files> --output-format <json|text>
322
+
323
+ # 搜索指定表的root 表名
324
+ sqlh search --root --path </path/to/sql-files> --table <table-name> --output-format <json|text>
325
+
326
+ # 搜索指定表的所有上游表名
327
+ sqlh search --upstream --path </path/to/sql-files> --table <table-name> --output-format <json|web|text>
328
+
329
+ # 搜索指定表的所有下游表名
330
+ sqlh search --downstream --path </path/to/sql-files> --table <table-name> --output-format <json|web|text>
331
+
332
+
333
+ # 搜索指定表的所有相关表
334
+ sqlh search --all --path </path/to/sql-files> --table <table-name> --output-format <json|web|text>
335
+
336
+ # 打开全部血缘关系图 web
337
+ sqlh web --path </path/to/sql-files>
338
+ ```
sqlh-0.2.3/README.md ADDED
@@ -0,0 +1,328 @@
1
+ # mini-sqllineage
2
+
3
+ [![PyPI version](https://img.shields.io/pypi/v/sqlh.svg)](https://pypi.org/project/sqlh/)
4
+ [![Python version](https://img.shields.io/pypi/pyversions/sqlh.svg)](https://pypi.org/project/sqlh/)
5
+ [![License](https://img.shields.io/pypi/l/sqlh.svg)](https://github.com/dupen01/mini-sqllineage/blob/main/LICENSE)
6
+ [![Tests](https://img.shields.io/badge/tests-passing-brightgreen.svg)](https://github.com/dupen01/mini-sqllineage/actions)
7
+
8
+ A lightweight Python library for analyzing SQL lineage and tracking table dependencies in data pipelines.
9
+
10
+ ## Features
11
+
12
+ - 📊 **SQL Lineage Analysis**: Parse SQL statements and extract table dependencies
13
+ - 🔄 **DAG Visualization**: Visualize data lineage as Directed Acyclic Graph
14
+ - 🔍 **Dependency Search**: Find upstream/downstream tables and related dependencies
15
+ - 🎯 **Root/Leaf Detection**: Identify source tables (ODS) and target tables (ADS)
16
+ - 🖥️ **CLI Tool**: Command-line interface for quick analysis
17
+ - 🌐 **Web Visualization**: Interactive web UI for exploring lineage
18
+ - ⚡ **Fast**: Token-based parsing, lightweight and efficient
19
+
20
+ ## Installation
21
+
22
+ ```bash
23
+ pip install mini-sqllineage
24
+ ```
25
+
26
+ ## Quick Start
27
+
28
+ ### Python API
29
+
30
+ ```python
31
+ from sqllineage import (
32
+ get_all_tables,
33
+ get_all_root_tables,
34
+ search_related_upstream_tables,
35
+ )
36
+
37
+ # Get all tables from SQL
38
+ sql = """
39
+ INSERT INTO dwd.user_dim SELECT * FROM ods.user;
40
+ INSERT INTO ads.user_report SELECT * FROM dwd.user_dim;
41
+ """
42
+ tables = get_all_tables(sql)
43
+ print(tables) # ['ods.user', 'dwd.user_dim', 'ads.user_report']
44
+
45
+ # Get root tables (no upstream dependencies)
46
+ root_tables = get_all_root_tables(sql)
47
+ print(root_tables) # ['ods.user']
48
+
49
+ # Search upstream dependencies
50
+ upstream = search_related_upstream_tables(sql, 'ads.user_report')
51
+ print(upstream[0]) # ['dwd.user_dim', 'ods.user']
52
+ ```
53
+
54
+ ### CLI Usage
55
+
56
+ ```bash
57
+ # List all tables
58
+ sqlh list --all -p /path/to/sql/files
59
+
60
+ # List root tables
61
+ sqlh list --root -p /path/to/sql/files
62
+
63
+ # Search upstream tables
64
+ sqlh search --upstream -t ads.user_report -p /path/to/sql/files
65
+
66
+ # Open web visualization
67
+ sqlh web -p /path/to/sql/files
68
+ ```
69
+
70
+ ## API Reference
71
+
72
+ ### Core Functions
73
+
74
+ | Function | Description |
75
+ |----------|-------------|
76
+ | `get_all_tables(sql)` | Get all tables from SQL statements |
77
+ | `get_all_root_tables(sql)` | Get tables with no upstream dependencies |
78
+ | `get_all_leaf_tables(sql)` | Get tables with no downstream dependencies |
79
+ | `search_related_tables(sql, table)` | Search all related tables (upstream + downstream) |
80
+ | `search_related_upstream_tables(sql, table)` | Search upstream dependencies |
81
+ | `search_related_downstream_tables(sql, table)` | Search downstream dependents |
82
+ | `search_related_root_tables(sql, table)` | Search root tables in the dependency path |
83
+ | `read_sql_from_directory(path)` | Read SQL files from directory |
84
+
85
+ ### DagGraph Class
86
+
87
+ ```python
88
+ from sqllineage import DagGraph
89
+
90
+ dag = DagGraph()
91
+ dag.add_edge("table_a", "table_b")
92
+ dag.add_edge("table_b", "table_c")
93
+
94
+ # Export to Mermaid
95
+ mermaid_str = dag.to_mermaid()
96
+
97
+ # Export to HTML
98
+ html_content = dag.to_html()
99
+
100
+ # Find upstream/downstream
101
+ upstream = dag.find_upstream("table_c")
102
+ downstream = dag.find_downstream("table_a")
103
+ ```
104
+
105
+ ## CLI Commands
106
+
107
+ ### List Tables
108
+
109
+ ```bash
110
+ # Get all tables
111
+ sqlh list --all -p /path/to/sql/files
112
+
113
+ # Get root tables
114
+ sqlh list --root -p /path/to/sql/files
115
+
116
+ # Get leaf tables
117
+ sqlh list --leaf -p /path/to/sql/files
118
+
119
+ # Output formats
120
+ sqlh list --all -p /path/to/sql/files --output-format json
121
+ sqlh list --all -p /path/to/sql/files --output-format text
122
+ ```
123
+
124
+ ### Search Tables
125
+
126
+ ```bash
127
+ # Search upstream tables
128
+ sqlh search --upstream -t table_name -p /path/to/sql/files
129
+
130
+ # Search downstream tables
131
+ sqlh search --downstream -t table_name -p /path/to/sql/files
132
+
133
+ # Search all related tables
134
+ sqlh search --all -t table_name -p /path/to/sql/files
135
+
136
+ # Search root tables
137
+ sqlh search --root -t table_name -p /path/to/sql/files
138
+ ```
139
+
140
+ ### Web Visualization
141
+
142
+ ```bash
143
+ # Open web server
144
+ sqlh web -p /path/to/sql/files
145
+
146
+ # Specify HTML output path
147
+ sqlh web -p /path/to/sql/files --html-path ./custom.html
148
+ ```
149
+
150
+ ## Output Formats
151
+
152
+ ### JSON Format
153
+
154
+ ```json
155
+ {
156
+ "status": "ok",
157
+ "command": "list-tables",
158
+ "tables": ["table1", "table2"],
159
+ "meta": {
160
+ "table_count": 2
161
+ }
162
+ }
163
+ ```
164
+
165
+ ### Text Format
166
+
167
+ ```
168
+ table1
169
+ table2
170
+ ```
171
+
172
+ ### Mermaid Format
173
+
174
+ ```mermaid
175
+ graph LR
176
+ table1 --> table2
177
+ table2 --> table3
178
+ ```
179
+
180
+ ## Supported SQL Statements
181
+
182
+ - `SELECT` queries
183
+ - `INSERT INTO ... SELECT` statements
184
+ - `CREATE TABLE AS SELECT` (CTAS)
185
+ - `WITH ... AS` (CTE)
186
+ - `JOIN` operations
187
+ - Subqueries
188
+
189
+ ## Development
190
+
191
+ ### Setup
192
+
193
+ ```bash
194
+ # Clone the repository
195
+ git clone https://github.com/yourusername/mini-sqllineage.git
196
+ cd mini-sqllineage
197
+
198
+ # Install in development mode
199
+ pip install -e ".[dev]"
200
+
201
+ # Run tests
202
+ pytest
203
+
204
+ # Run linting
205
+ ruff check .
206
+
207
+ # Run type checking
208
+ mypy sqllineage
209
+ ```
210
+
211
+ ### Project Structure
212
+
213
+ ```
214
+ mini-sqllineage/
215
+ ├── sqllineage/
216
+ │ ├── __init__.py
217
+ │ ├── cli.py # Command-line interface
218
+ │ ├── utils.py # Utility functions
219
+ │ └── core/
220
+ │ ├── graph.py # DAG implementation
221
+ │ ├── helper.py # SQL parser
222
+ │ └── keywords.py # SQL keywords
223
+ ├── tests/ # Test suite
224
+ ├── static/ # Web visualization templates
225
+ └── README.md
226
+ ```
227
+
228
+ ## Contributing
229
+
230
+ Contributions are welcome! Please feel free to submit a Pull Request.
231
+
232
+ ## License
233
+
234
+ This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
235
+
236
+ ## Changelog
237
+
238
+ See [CHANGELOG.md](CHANGELOG.md) for a list of changes.
239
+
240
+ ## TODO
241
+
242
+ - [ ] Fuzzy search for table names with suggestions
243
+ - [ ] Support for more SQL dialects (PostgreSQL, MySQL, etc.)
244
+ - [ ] Database schema import (DESCRIBE TABLE)
245
+ - [ ] Column-level lineage tracking
246
+ - [ ] CI/CD pipeline configuration
247
+
248
+ ## Acknowledgments
249
+
250
+ - [Cytoscape.js](https://js.cytoscape.org/) - Graph visualization library
251
+ - [dagre.js](https://github.com/dagrejs/dagre) - Graph layout algorithm
252
+ - [Mermaid.js](https://mermaid.js.org/) - Diagram generation
253
+
254
+ ---
255
+
256
+ # TODO
257
+ - [] 搜索指定表时考虑模糊匹配,当表名不存在时,返回提示或者给出可能的表名(相似度)
258
+
259
+ ## CLI
260
+
261
+ ### output 格式:
262
+ - json: 默认格式, 输出 json 格式
263
+
264
+ 1. 搜索命令的输出
265
+ ```json
266
+ {
267
+ "status": "ok",
268
+ "command": "search-table",
269
+ "data": {
270
+ "nodes": [
271
+ {"id": "table1", "label": "ods"},
272
+ {"id": "table2", "label": "dwd"},
273
+ ],
274
+ "edges": [
275
+ {"source": "table1", "target": "table2"}
276
+ ]
277
+ },
278
+ "mermaid": "graph LR\n table1 --> table2",
279
+ "meta": {
280
+ "node_count": 2
281
+ }
282
+ }
283
+ ```
284
+
285
+
286
+ 2. 列举命令的输出
287
+ ```json
288
+ {
289
+ "status": "ok",
290
+ "command": "list-tables",
291
+ "tables": [
292
+ "table1",
293
+ "table2"
294
+ ],
295
+ "meta": {
296
+ "table_count": 2
297
+ }
298
+ }
299
+
300
+ ```
301
+
302
+ ### CLI 参数
303
+ ```bash
304
+ # 获取所有表名
305
+ sqlh list --all --path </path/to/sql-files> --output-format <json|text>
306
+
307
+ # 获取所有 root 表名
308
+ sqlh list --root --path </path/to/sql-files> --output-format <json|text>
309
+
310
+ # 获取所有 leaf 表名
311
+ sqlh list --leaf --path </path/to/sql-files> --output-format <json|text>
312
+
313
+ # 搜索指定表的root 表名
314
+ sqlh search --root --path </path/to/sql-files> --table <table-name> --output-format <json|text>
315
+
316
+ # 搜索指定表的所有上游表名
317
+ sqlh search --upstream --path </path/to/sql-files> --table <table-name> --output-format <json|web|text>
318
+
319
+ # 搜索指定表的所有下游表名
320
+ sqlh search --downstream --path </path/to/sql-files> --table <table-name> --output-format <json|web|text>
321
+
322
+
323
+ # 搜索指定表的所有相关表
324
+ sqlh search --all --path </path/to/sql-files> --table <table-name> --output-format <json|web|text>
325
+
326
+ # 打开全部血缘关系图 web
327
+ sqlh web --path </path/to/sql-files>
328
+ ```
@@ -0,0 +1,41 @@
1
+ [project]
2
+ name = "sqlh"
3
+ version = "0.2.3"
4
+ # dynamic = ["version"]
5
+ maintainers = [
6
+ {name = "Perry DU", email = "duneite@gmail.com"}
7
+ ]
8
+ description = "A lightweight SQL lineage analysis library for tracking table dependencies in data pipelines"
9
+ readme = "README.md"
10
+ requires-python = ">=3.9"
11
+ keywords = [
12
+ "sql",
13
+ "lineage",
14
+ "data-pipeline",
15
+ "dag",
16
+ "dependency",
17
+ "database",
18
+ "etl",
19
+ "data-engineering"
20
+ ]
21
+
22
+ dependencies = []
23
+
24
+ [project.scripts]
25
+ sqlh = "sqlh.cli:main"
26
+
27
+
28
+ [dependency-groups]
29
+ dev = [
30
+ "pytest>=9.0.2",
31
+ ]
32
+
33
+
34
+ [build-system]
35
+ requires = ["uv_build>=0.10.9,<0.11.0"]
36
+ build-backend = "uv_build"
37
+
38
+
39
+ [tool.uv.build-backend]
40
+ module-root = ""
41
+ module-name = "sqlh"
Binary file