tff-dbt 0.2.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tff_dbt-0.2.0/.gitignore +10 -0
- tff_dbt-0.2.0/LICENSE +21 -0
- tff_dbt-0.2.0/PKG-INFO +221 -0
- tff_dbt-0.2.0/README.md +210 -0
- tff_dbt-0.2.0/pyproject.toml +26 -0
- tff_dbt-0.2.0/src/tff/dbt/__init__.py +9 -0
- tff_dbt-0.2.0/src/tff/dbt/cli.py +96 -0
- tff_dbt-0.2.0/src/tff/dbt/manifest.py +116 -0
- tff_dbt-0.2.0/src/tff/dbt/runner.py +103 -0
- tff_dbt-0.2.0/tests/test_dbt.py +176 -0
tff_dbt-0.2.0/.gitignore
ADDED
tff_dbt-0.2.0/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Bart Schuijt
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
tff_dbt-0.2.0/PKG-INFO
ADDED
|
@@ -0,0 +1,221 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: tff-dbt
|
|
3
|
+
Version: 0.2.0
|
|
4
|
+
Summary: dbt adapter for Transformation Fitness Functions (tff)
|
|
5
|
+
Author-email: Bart Schuijt <schuijt.bart@gmail.com>
|
|
6
|
+
License: MIT
|
|
7
|
+
License-File: LICENSE
|
|
8
|
+
Requires-Python: >=3.12
|
|
9
|
+
Requires-Dist: tff-core
|
|
10
|
+
Description-Content-Type: text/markdown
|
|
11
|
+
|
|
12
|
+
# sqlmesh-ff
|
|
13
|
+
|
|
14
|
+
[](https://pypi.org/project/sqlmesh-ff/)
|
|
15
|
+
[](https://pypi.org/project/sqlmesh-ff/)
|
|
16
|
+
|
|
17
|
+
Configurable fitness functions plugin for [SQLMesh](https://sqlmesh.com) projects.
|
|
18
|
+
|
|
19
|
+
Ships SQLMesh linter rules (classification macros, SQL complexity, metadata, naming) and architectural checks (layer integrity, custom exclusions, schema contracts, dependency graph) with a unified Rich lint report.
|
|
20
|
+
|
|
21
|
+
## Installation
|
|
22
|
+
|
|
23
|
+
```bash
|
|
24
|
+
# Install from PyPI:
|
|
25
|
+
uv add sqlmesh-ff
|
|
26
|
+
|
|
27
|
+
# Or using pip:
|
|
28
|
+
pip install sqlmesh-ff
|
|
29
|
+
```
|
|
30
|
+
|
|
31
|
+
## Quick start
|
|
32
|
+
|
|
33
|
+
1. Add `fitness_functions.yaml` to your SQLMesh project root (see [Configuration](#configuration)).
|
|
34
|
+
2. Add a small `config.py` bootstrap (see [Where configuration lives](#where-configuration-lives)) — SQLMesh requires the loader as a Python class and cannot load `config.py` and `config.yaml` in the same folder.
|
|
35
|
+
3. Run lint:
|
|
36
|
+
|
|
37
|
+
```bash
|
|
38
|
+
sqlmesh-ff lint
|
|
39
|
+
```
|
|
40
|
+
|
|
41
|
+
## Where configuration lives
|
|
42
|
+
|
|
43
|
+
There are three layers. Only the YAML/JSON files in your project are user-editable settings.
|
|
44
|
+
|
|
45
|
+
| Layer | File | Role | You edit this? |
|
|
46
|
+
|-------|------|------|----------------|
|
|
47
|
+
| Plugin defaults | `sqlmesh_ff/config.py` (installed package) | Pydantic schema and built-in defaults (e.g. `fan_out_warn: 15`) | No — library code, never overwritten |
|
|
48
|
+
| SQLMesh project | `settings.yaml` | Gateways, `linter.rules`, variables, CI/CD bot | Yes — normal SQLMesh config |
|
|
49
|
+
| Fitness functions | `fitness_functions.yaml` | Thresholds, rule toggles, column naming/type rules, paths to JSON data | Yes — main FF config |
|
|
50
|
+
| Loader bootstrap | `config.py` (project root) | Loads `settings.yaml` and registers `FitnessLoader` | Rarely — ~15 lines of wiring |
|
|
51
|
+
| Contract data | `linter_contract_groups.json`, `linter_exclusions.json` | Repo-specific schema parity and dependency exclusions | Yes — project data |
|
|
52
|
+
|
|
53
|
+
**Merge order for fitness settings:** plugin defaults → `fitness_functions.yaml` → optional `loader_kwargs` overrides in `config.py`. Your YAML always wins over plugin defaults. The project `config.py` does not hold fitness thresholds — it only points at `fitness_functions.yaml`.
|
|
54
|
+
|
|
55
|
+
**Why `config.py` exists:** SQLMesh accepts `loader: FitnessLoader` only as a Python class, not as a YAML string. Because SQLMesh rejects having both `config.py` and `config.yaml` in one folder, projects use `settings.yaml` (SQLMesh settings) plus `config.py` (loader registration).
|
|
56
|
+
|
|
57
|
+
Example bootstrap:
|
|
58
|
+
|
|
59
|
+
```python
|
|
60
|
+
from pathlib import Path
|
|
61
|
+
|
|
62
|
+
from sqlmesh.core.config import Config
|
|
63
|
+
from sqlmesh.utils.yaml import load as yaml_load
|
|
64
|
+
from sqlmesh_ff.loader import FitnessLoader
|
|
65
|
+
|
|
66
|
+
_settings = yaml_load(Path(__file__).parent / "settings.yaml")
|
|
67
|
+
config = Config.parse_obj(_settings).update_with({
|
|
68
|
+
"loader": FitnessLoader,
|
|
69
|
+
"loader_kwargs": {"fitness_functions_config": "fitness_functions.yaml"},
|
|
70
|
+
})
|
|
71
|
+
```
|
|
72
|
+
|
|
73
|
+
Enable individual SQLMesh rules in `settings.yaml` under `linter.rules` / `linter.warn_rules`.
|
|
74
|
+
|
|
75
|
+
## Configuration
|
|
76
|
+
|
|
77
|
+
Fitness function settings live in `fitness_functions.yaml` at the project root. Override the file path or individual keys via `loader_kwargs` in `config.py` (advanced — most projects only set `fitness_functions_config`).
|
|
78
|
+
|
|
79
|
+
### Example `fitness_functions.yaml`
|
|
80
|
+
|
|
81
|
+
```yaml
|
|
82
|
+
contract_groups_path: linter_contract_groups.json
|
|
83
|
+
exclusions_path: linter_exclusions.json
|
|
84
|
+
|
|
85
|
+
layers:
|
|
86
|
+
order: [sources, derived, core, marts, export]
|
|
87
|
+
|
|
88
|
+
checks:
|
|
89
|
+
layer_integrity: { enabled: true }
|
|
90
|
+
custom_exclusions: { enabled: true }
|
|
91
|
+
schema_contracts: { enabled: true }
|
|
92
|
+
dependency_graph:
|
|
93
|
+
enabled: true
|
|
94
|
+
fan_out_warn: 15
|
|
95
|
+
fan_out_fail: 25
|
|
96
|
+
fan_in_warn: 10
|
|
97
|
+
|
|
98
|
+
rules:
|
|
99
|
+
classification_macros:
|
|
100
|
+
enabled: true
|
|
101
|
+
skip_layers: [sources]
|
|
102
|
+
columns:
|
|
103
|
+
product_type: "@product_type\\b|@PRODUCT_TYPE\\b"
|
|
104
|
+
sql_complexity:
|
|
105
|
+
enabled: true
|
|
106
|
+
thresholds:
|
|
107
|
+
decision_points: [15, 25]
|
|
108
|
+
cte_count: [8, 12]
|
|
109
|
+
join_count: [8, 12]
|
|
110
|
+
line_count: [250, 400]
|
|
111
|
+
mart_naming:
|
|
112
|
+
enabled: true
|
|
113
|
+
layer_name: marts
|
|
114
|
+
rule: prefix_with_subdirectory
|
|
115
|
+
column_names:
|
|
116
|
+
enabled: true
|
|
117
|
+
replacements: {}
|
|
118
|
+
column_types:
|
|
119
|
+
enabled: true
|
|
120
|
+
rules: []
|
|
121
|
+
equivalent_types:
|
|
122
|
+
text: [text, varchar]
|
|
123
|
+
metadata:
|
|
124
|
+
owner: true
|
|
125
|
+
description: true
|
|
126
|
+
grain: true
|
|
127
|
+
filename_equals_modelname:
|
|
128
|
+
enabled: true
|
|
129
|
+
```
|
|
130
|
+
|
|
131
|
+
### Project-specific JSON
|
|
132
|
+
|
|
133
|
+
Keep repo-specific contract and exclusion data in your project:
|
|
134
|
+
|
|
135
|
+
- `linter_contract_groups.json` — cross-model schema parity groups
|
|
136
|
+
- `linter_exclusions.json` — blocked dependency patterns and allowed exceptions
|
|
137
|
+
|
|
138
|
+
Reference their paths from `fitness_functions.yaml`. The plugin ships generic engines only; examples live in this README.
|
|
139
|
+
|
|
140
|
+
### Rule name mapping
|
|
141
|
+
|
|
142
|
+
SQLMesh uses lowercase class names in `linter.rules`:
|
|
143
|
+
|
|
144
|
+
| Config key | SQLMesh rule name |
|
|
145
|
+
|------------|-------------------|
|
|
146
|
+
| `classification_macros` | `classificationmacros` |
|
|
147
|
+
| `sql_complexity` | `sqlcomplexity` |
|
|
148
|
+
| `mart_naming` | `martmodelnamingconvention` |
|
|
149
|
+
| `column_names` | `columnnames` |
|
|
150
|
+
| `column_types` | `columntypes` |
|
|
151
|
+
| `metadata.owner` | `nomissingowner` |
|
|
152
|
+
| `metadata.description` | `nomissingdescription` |
|
|
153
|
+
| `metadata.grain` | `nomissinggrain` |
|
|
154
|
+
| `filename_equals_modelname` | `filenameequalsmodelname` |
|
|
155
|
+
|
|
156
|
+
## CLI
|
|
157
|
+
|
|
158
|
+
```
|
|
159
|
+
sqlmesh-ff lint [--project PATH] [--config PATH] [--checks CHECK,...] [--fail-level error|warning] [--group-by connascence|model]
|
|
160
|
+
```
|
|
161
|
+
|
|
162
|
+
- **Default:** all enabled checks plus SQLMesh linter rules
|
|
163
|
+
- **`--checks layer_integrity,custom_exclusions`:** run subset (for pre-push hooks)
|
|
164
|
+
- **`--fail-level warning`:** treat warnings as failures
|
|
165
|
+
- **`--group-by connascence|model`:** change how violations are grouped in the report (default: `connascence`)
|
|
166
|
+
|
|
167
|
+
## Integration example
|
|
168
|
+
|
|
169
|
+
Example overrides. `api_request` should always be named `api_call`. `_id` columns should always be of type `text` and `is_` columns should always be of type `boolean`.
|
|
170
|
+
|
|
171
|
+
```yaml
|
|
172
|
+
column_names:
|
|
173
|
+
replacements:
|
|
174
|
+
api_request: api_call
|
|
175
|
+
column_types:
|
|
176
|
+
rules:
|
|
177
|
+
- name: id_is_text
|
|
178
|
+
pattern: "_id$"
|
|
179
|
+
data_type: text
|
|
180
|
+
- name: boolean
|
|
181
|
+
pattern: "^is_"
|
|
182
|
+
data_type: boolean
|
|
183
|
+
```
|
|
184
|
+
|
|
185
|
+
## Examples
|
|
186
|
+
|
|
187
|
+
A complete, runnable example project showcasing the configuration of `sqlmesh-ff` rules, exclusions, contracts, and a continuous integration workflow is located in the [examples/](file:///Users/bartschuijt/git/sqlmesh-ff/examples/) directory.
|
|
188
|
+
|
|
189
|
+
To run the linter against the example project locally, run:
|
|
190
|
+
```bash
|
|
191
|
+
sqlmesh-ff lint --project examples/minimal-sqlmesh-project
|
|
192
|
+
```
|
|
193
|
+
|
|
194
|
+
See [examples/minimal-sqlmesh-project/fitness_functions.yaml](file:///Users/bartschuijt/git/sqlmesh-ff/examples/minimal-sqlmesh-project/fitness_functions.yaml) to inspect the configured rules.
|
|
195
|
+
|
|
196
|
+
## Development
|
|
197
|
+
|
|
198
|
+
Initialize your local environment and configure the Git pre-push hook:
|
|
199
|
+
```bash
|
|
200
|
+
make init
|
|
201
|
+
```
|
|
202
|
+
|
|
203
|
+
Run linter, tests, or check diff coverage:
|
|
204
|
+
```bash
|
|
205
|
+
make lint
|
|
206
|
+
make test
|
|
207
|
+
make coverage
|
|
208
|
+
```
|
|
209
|
+
|
|
210
|
+
### Releases and PR titles
|
|
211
|
+
|
|
212
|
+
Releases are automated with [release-please](https://github.com/googleapis/release-please) on merges to `main`. Use [Conventional Commits](https://www.conventionalcommits.org/) in PR titles so changelog entries and semver bumps are correct.
|
|
213
|
+
|
|
214
|
+
PR titles must start with a type prefix, for example:
|
|
215
|
+
|
|
216
|
+
- `feat: add dependency graph fan-in check`
|
|
217
|
+
- `fix: remove unused import in loader tests`
|
|
218
|
+
- `docs: document fitness_functions.yaml merge order`
|
|
219
|
+
- `ci: add release-please workflow`
|
|
220
|
+
|
|
221
|
+
Supported types include `feat`, `fix`, `docs`, `style`, `refactor`, `perf`, `test`, `build`, `ci`, and `chore`. The PR title check in CI enforces this format.
|
tff_dbt-0.2.0/README.md
ADDED
|
@@ -0,0 +1,210 @@
|
|
|
1
|
+
# sqlmesh-ff
|
|
2
|
+
|
|
3
|
+
[](https://pypi.org/project/sqlmesh-ff/)
|
|
4
|
+
[](https://pypi.org/project/sqlmesh-ff/)
|
|
5
|
+
|
|
6
|
+
Configurable fitness functions plugin for [SQLMesh](https://sqlmesh.com) projects.
|
|
7
|
+
|
|
8
|
+
Ships SQLMesh linter rules (classification macros, SQL complexity, metadata, naming) and architectural checks (layer integrity, custom exclusions, schema contracts, dependency graph) with a unified Rich lint report.
|
|
9
|
+
|
|
10
|
+
## Installation
|
|
11
|
+
|
|
12
|
+
```bash
|
|
13
|
+
# Install from PyPI:
|
|
14
|
+
uv add sqlmesh-ff
|
|
15
|
+
|
|
16
|
+
# Or using pip:
|
|
17
|
+
pip install sqlmesh-ff
|
|
18
|
+
```
|
|
19
|
+
|
|
20
|
+
## Quick start
|
|
21
|
+
|
|
22
|
+
1. Add `fitness_functions.yaml` to your SQLMesh project root (see [Configuration](#configuration)).
|
|
23
|
+
2. Add a small `config.py` bootstrap (see [Where configuration lives](#where-configuration-lives)) — SQLMesh requires the loader as a Python class and cannot load `config.py` and `config.yaml` in the same folder.
|
|
24
|
+
3. Run lint:
|
|
25
|
+
|
|
26
|
+
```bash
|
|
27
|
+
sqlmesh-ff lint
|
|
28
|
+
```
|
|
29
|
+
|
|
30
|
+
## Where configuration lives
|
|
31
|
+
|
|
32
|
+
There are three layers. Only the YAML/JSON files in your project are user-editable settings.
|
|
33
|
+
|
|
34
|
+
| Layer | File | Role | You edit this? |
|
|
35
|
+
|-------|------|------|----------------|
|
|
36
|
+
| Plugin defaults | `sqlmesh_ff/config.py` (installed package) | Pydantic schema and built-in defaults (e.g. `fan_out_warn: 15`) | No — library code, never overwritten |
|
|
37
|
+
| SQLMesh project | `settings.yaml` | Gateways, `linter.rules`, variables, CI/CD bot | Yes — normal SQLMesh config |
|
|
38
|
+
| Fitness functions | `fitness_functions.yaml` | Thresholds, rule toggles, column naming/type rules, paths to JSON data | Yes — main FF config |
|
|
39
|
+
| Loader bootstrap | `config.py` (project root) | Loads `settings.yaml` and registers `FitnessLoader` | Rarely — ~15 lines of wiring |
|
|
40
|
+
| Contract data | `linter_contract_groups.json`, `linter_exclusions.json` | Repo-specific schema parity and dependency exclusions | Yes — project data |
|
|
41
|
+
|
|
42
|
+
**Merge order for fitness settings:** plugin defaults → `fitness_functions.yaml` → optional `loader_kwargs` overrides in `config.py`. Your YAML always wins over plugin defaults. The project `config.py` does not hold fitness thresholds — it only points at `fitness_functions.yaml`.
|
|
43
|
+
|
|
44
|
+
**Why `config.py` exists:** SQLMesh accepts `loader: FitnessLoader` only as a Python class, not as a YAML string. Because SQLMesh rejects having both `config.py` and `config.yaml` in one folder, projects use `settings.yaml` (SQLMesh settings) plus `config.py` (loader registration).
|
|
45
|
+
|
|
46
|
+
Example bootstrap:
|
|
47
|
+
|
|
48
|
+
```python
|
|
49
|
+
from pathlib import Path
|
|
50
|
+
|
|
51
|
+
from sqlmesh.core.config import Config
|
|
52
|
+
from sqlmesh.utils.yaml import load as yaml_load
|
|
53
|
+
from sqlmesh_ff.loader import FitnessLoader
|
|
54
|
+
|
|
55
|
+
_settings = yaml_load(Path(__file__).parent / "settings.yaml")
|
|
56
|
+
config = Config.parse_obj(_settings).update_with({
|
|
57
|
+
"loader": FitnessLoader,
|
|
58
|
+
"loader_kwargs": {"fitness_functions_config": "fitness_functions.yaml"},
|
|
59
|
+
})
|
|
60
|
+
```
|
|
61
|
+
|
|
62
|
+
Enable individual SQLMesh rules in `settings.yaml` under `linter.rules` / `linter.warn_rules`.
|
|
63
|
+
|
|
64
|
+
## Configuration
|
|
65
|
+
|
|
66
|
+
Fitness function settings live in `fitness_functions.yaml` at the project root. Override the file path or individual keys via `loader_kwargs` in `config.py` (advanced — most projects only set `fitness_functions_config`).
|
|
67
|
+
|
|
68
|
+
### Example `fitness_functions.yaml`
|
|
69
|
+
|
|
70
|
+
```yaml
|
|
71
|
+
contract_groups_path: linter_contract_groups.json
|
|
72
|
+
exclusions_path: linter_exclusions.json
|
|
73
|
+
|
|
74
|
+
layers:
|
|
75
|
+
order: [sources, derived, core, marts, export]
|
|
76
|
+
|
|
77
|
+
checks:
|
|
78
|
+
layer_integrity: { enabled: true }
|
|
79
|
+
custom_exclusions: { enabled: true }
|
|
80
|
+
schema_contracts: { enabled: true }
|
|
81
|
+
dependency_graph:
|
|
82
|
+
enabled: true
|
|
83
|
+
fan_out_warn: 15
|
|
84
|
+
fan_out_fail: 25
|
|
85
|
+
fan_in_warn: 10
|
|
86
|
+
|
|
87
|
+
rules:
|
|
88
|
+
classification_macros:
|
|
89
|
+
enabled: true
|
|
90
|
+
skip_layers: [sources]
|
|
91
|
+
columns:
|
|
92
|
+
product_type: "@product_type\\b|@PRODUCT_TYPE\\b"
|
|
93
|
+
sql_complexity:
|
|
94
|
+
enabled: true
|
|
95
|
+
thresholds:
|
|
96
|
+
decision_points: [15, 25]
|
|
97
|
+
cte_count: [8, 12]
|
|
98
|
+
join_count: [8, 12]
|
|
99
|
+
line_count: [250, 400]
|
|
100
|
+
mart_naming:
|
|
101
|
+
enabled: true
|
|
102
|
+
layer_name: marts
|
|
103
|
+
rule: prefix_with_subdirectory
|
|
104
|
+
column_names:
|
|
105
|
+
enabled: true
|
|
106
|
+
replacements: {}
|
|
107
|
+
column_types:
|
|
108
|
+
enabled: true
|
|
109
|
+
rules: []
|
|
110
|
+
equivalent_types:
|
|
111
|
+
text: [text, varchar]
|
|
112
|
+
metadata:
|
|
113
|
+
owner: true
|
|
114
|
+
description: true
|
|
115
|
+
grain: true
|
|
116
|
+
filename_equals_modelname:
|
|
117
|
+
enabled: true
|
|
118
|
+
```
|
|
119
|
+
|
|
120
|
+
### Project-specific JSON
|
|
121
|
+
|
|
122
|
+
Keep repo-specific contract and exclusion data in your project:
|
|
123
|
+
|
|
124
|
+
- `linter_contract_groups.json` — cross-model schema parity groups
|
|
125
|
+
- `linter_exclusions.json` — blocked dependency patterns and allowed exceptions
|
|
126
|
+
|
|
127
|
+
Reference their paths from `fitness_functions.yaml`. The plugin ships generic engines only; examples live in this README.
|
|
128
|
+
|
|
129
|
+
### Rule name mapping
|
|
130
|
+
|
|
131
|
+
SQLMesh uses lowercase class names in `linter.rules`:
|
|
132
|
+
|
|
133
|
+
| Config key | SQLMesh rule name |
|
|
134
|
+
|------------|-------------------|
|
|
135
|
+
| `classification_macros` | `classificationmacros` |
|
|
136
|
+
| `sql_complexity` | `sqlcomplexity` |
|
|
137
|
+
| `mart_naming` | `martmodelnamingconvention` |
|
|
138
|
+
| `column_names` | `columnnames` |
|
|
139
|
+
| `column_types` | `columntypes` |
|
|
140
|
+
| `metadata.owner` | `nomissingowner` |
|
|
141
|
+
| `metadata.description` | `nomissingdescription` |
|
|
142
|
+
| `metadata.grain` | `nomissinggrain` |
|
|
143
|
+
| `filename_equals_modelname` | `filenameequalsmodelname` |
|
|
144
|
+
|
|
145
|
+
## CLI
|
|
146
|
+
|
|
147
|
+
```
|
|
148
|
+
sqlmesh-ff lint [--project PATH] [--config PATH] [--checks CHECK,...] [--fail-level error|warning] [--group-by connascence|model]
|
|
149
|
+
```
|
|
150
|
+
|
|
151
|
+
- **Default:** all enabled checks plus SQLMesh linter rules
|
|
152
|
+
- **`--checks layer_integrity,custom_exclusions`:** run subset (for pre-push hooks)
|
|
153
|
+
- **`--fail-level warning`:** treat warnings as failures
|
|
154
|
+
- **`--group-by connascence|model`:** change how violations are grouped in the report (default: `connascence`)
|
|
155
|
+
|
|
156
|
+
## Integration example
|
|
157
|
+
|
|
158
|
+
Example overrides. `api_request` should always be named `api_call`. `_id` columns should always be of type `text` and `is_` columns should always be of type `boolean`.
|
|
159
|
+
|
|
160
|
+
```yaml
|
|
161
|
+
column_names:
|
|
162
|
+
replacements:
|
|
163
|
+
api_request: api_call
|
|
164
|
+
column_types:
|
|
165
|
+
rules:
|
|
166
|
+
- name: id_is_text
|
|
167
|
+
pattern: "_id$"
|
|
168
|
+
data_type: text
|
|
169
|
+
- name: boolean
|
|
170
|
+
pattern: "^is_"
|
|
171
|
+
data_type: boolean
|
|
172
|
+
```
|
|
173
|
+
|
|
174
|
+
## Examples
|
|
175
|
+
|
|
176
|
+
A complete, runnable example project showcasing the configuration of `sqlmesh-ff` rules, exclusions, contracts, and a continuous integration workflow is located in the [examples/](file:///Users/bartschuijt/git/sqlmesh-ff/examples/) directory.
|
|
177
|
+
|
|
178
|
+
To run the linter against the example project locally, run:
|
|
179
|
+
```bash
|
|
180
|
+
sqlmesh-ff lint --project examples/minimal-sqlmesh-project
|
|
181
|
+
```
|
|
182
|
+
|
|
183
|
+
See [examples/minimal-sqlmesh-project/fitness_functions.yaml](file:///Users/bartschuijt/git/sqlmesh-ff/examples/minimal-sqlmesh-project/fitness_functions.yaml) to inspect the configured rules.
|
|
184
|
+
|
|
185
|
+
## Development
|
|
186
|
+
|
|
187
|
+
Initialize your local environment and configure the Git pre-push hook:
|
|
188
|
+
```bash
|
|
189
|
+
make init
|
|
190
|
+
```
|
|
191
|
+
|
|
192
|
+
Run linter, tests, or check diff coverage:
|
|
193
|
+
```bash
|
|
194
|
+
make lint
|
|
195
|
+
make test
|
|
196
|
+
make coverage
|
|
197
|
+
```
|
|
198
|
+
|
|
199
|
+
### Releases and PR titles
|
|
200
|
+
|
|
201
|
+
Releases are automated with [release-please](https://github.com/googleapis/release-please) on merges to `main`. Use [Conventional Commits](https://www.conventionalcommits.org/) in PR titles so changelog entries and semver bumps are correct.
|
|
202
|
+
|
|
203
|
+
PR titles must start with a type prefix, for example:
|
|
204
|
+
|
|
205
|
+
- `feat: add dependency graph fan-in check`
|
|
206
|
+
- `fix: remove unused import in loader tests`
|
|
207
|
+
- `docs: document fitness_functions.yaml merge order`
|
|
208
|
+
- `ci: add release-please workflow`
|
|
209
|
+
|
|
210
|
+
Supported types include `feat`, `fix`, `docs`, `style`, `refactor`, `perf`, `test`, `build`, `ci`, and `chore`. The PR title check in CI enforces this format.
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
[project]
|
|
2
|
+
name = "tff-dbt"
|
|
3
|
+
version = "0.2.0"
|
|
4
|
+
description = "dbt adapter for Transformation Fitness Functions (tff)"
|
|
5
|
+
readme = "README.md"
|
|
6
|
+
requires-python = ">=3.12"
|
|
7
|
+
license = { text = "MIT" }
|
|
8
|
+
authors = [
|
|
9
|
+
{ name = "Bart Schuijt", email = "schuijt.bart@gmail.com" }
|
|
10
|
+
]
|
|
11
|
+
dependencies = [
|
|
12
|
+
"tff-core",
|
|
13
|
+
]
|
|
14
|
+
|
|
15
|
+
[project.scripts]
|
|
16
|
+
tff-dbt = "tff.dbt.cli:main"
|
|
17
|
+
|
|
18
|
+
[tool.uv.sources]
|
|
19
|
+
tff-core = { workspace = true }
|
|
20
|
+
|
|
21
|
+
[build-system]
|
|
22
|
+
requires = ["hatchling"]
|
|
23
|
+
build-backend = "hatchling.build"
|
|
24
|
+
|
|
25
|
+
[tool.hatch.build.targets.wheel]
|
|
26
|
+
packages = ["src/tff"]
|
|
@@ -0,0 +1,96 @@
|
|
|
1
|
+
"""Command-line interface for tff-dbt."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import argparse
|
|
6
|
+
import logging
|
|
7
|
+
import sys
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
|
|
10
|
+
from tff.core.config import load_fitness_config
|
|
11
|
+
from tff.core.context import set_ff_config
|
|
12
|
+
from tff.core.report import render_lint_report
|
|
13
|
+
from tff.dbt.runner import run_all_checks
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def _parse_checks(value: str | None) -> list[str] | None:
|
|
17
|
+
if not value:
|
|
18
|
+
return None
|
|
19
|
+
return [part.strip() for part in value.split(",") if part.strip()]
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def main(argv: list[str] | None = None) -> int:
|
|
23
|
+
parser = argparse.ArgumentParser(
|
|
24
|
+
prog="tff-dbt",
|
|
25
|
+
description="Run dbt Transformation Fitness Function (tff) checks",
|
|
26
|
+
)
|
|
27
|
+
subparsers = parser.add_subparsers(dest="command", required=True)
|
|
28
|
+
|
|
29
|
+
lint_parser = subparsers.add_parser("lint", help="Run all enabled fitness checks")
|
|
30
|
+
lint_parser.add_argument(
|
|
31
|
+
"--project",
|
|
32
|
+
type=Path,
|
|
33
|
+
default=Path.cwd(),
|
|
34
|
+
help="dbt project root (default: current directory)",
|
|
35
|
+
)
|
|
36
|
+
lint_parser.add_argument(
|
|
37
|
+
"--config",
|
|
38
|
+
default="fitness_functions.yaml",
|
|
39
|
+
help="Path to fitness_functions.yaml (relative to project root)",
|
|
40
|
+
)
|
|
41
|
+
lint_parser.add_argument(
|
|
42
|
+
"--checks",
|
|
43
|
+
default=None,
|
|
44
|
+
help="Comma-separated checks to run (default: all enabled). "
|
|
45
|
+
"Use 'rules' for general linter rules only.",
|
|
46
|
+
)
|
|
47
|
+
lint_parser.add_argument(
|
|
48
|
+
"--fail-level",
|
|
49
|
+
choices=["error", "warning"],
|
|
50
|
+
default="error",
|
|
51
|
+
help="Exit non-zero when findings at or above this severity exist",
|
|
52
|
+
)
|
|
53
|
+
lint_parser.add_argument(
|
|
54
|
+
"--group-by",
|
|
55
|
+
choices=["connascence", "model"],
|
|
56
|
+
default="connascence",
|
|
57
|
+
help="How to group violations in the report (default: connascence)",
|
|
58
|
+
)
|
|
59
|
+
lint_parser.add_argument(
|
|
60
|
+
"--dialect",
|
|
61
|
+
default="bigquery",
|
|
62
|
+
help="SQL dialect of the dbt models (default: bigquery)",
|
|
63
|
+
)
|
|
64
|
+
|
|
65
|
+
args = parser.parse_args(argv)
|
|
66
|
+
|
|
67
|
+
if args.command == "lint":
|
|
68
|
+
logging.basicConfig(level=logging.ERROR)
|
|
69
|
+
project_root = args.project.resolve()
|
|
70
|
+
config = load_fitness_config(
|
|
71
|
+
project_root,
|
|
72
|
+
config_path=args.config,
|
|
73
|
+
)
|
|
74
|
+
set_ff_config(config)
|
|
75
|
+
checks = _parse_checks(args.checks)
|
|
76
|
+
|
|
77
|
+
findings, models_checked, executed_checks = run_all_checks(
|
|
78
|
+
project_root=project_root,
|
|
79
|
+
config=config,
|
|
80
|
+
checks=checks,
|
|
81
|
+
dialect=args.dialect,
|
|
82
|
+
)
|
|
83
|
+
passed = render_lint_report(
|
|
84
|
+
findings,
|
|
85
|
+
models_checked=models_checked,
|
|
86
|
+
executed_checks=executed_checks,
|
|
87
|
+
fail_level=args.fail_level, # type: ignore[arg-type]
|
|
88
|
+
group_by=args.group_by, # type: ignore[arg-type]
|
|
89
|
+
)
|
|
90
|
+
return 0 if passed else 1
|
|
91
|
+
|
|
92
|
+
return 1
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
if __name__ == "__main__":
|
|
96
|
+
sys.exit(main())
|
|
@@ -0,0 +1,116 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
|
|
6
|
+
from tff.core.model import ModelRepresentation
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def load_dbt_models(
|
|
10
|
+
project_root: Path,
|
|
11
|
+
target_dir: str = "target",
|
|
12
|
+
dialect: str = "bigquery",
|
|
13
|
+
) -> dict[str, ModelRepresentation]:
|
|
14
|
+
manifest_path = project_root / target_dir / "manifest.json"
|
|
15
|
+
if not manifest_path.exists():
|
|
16
|
+
raise FileNotFoundError(
|
|
17
|
+
f"dbt manifest not found at {manifest_path}. Please run 'dbt compile' first."
|
|
18
|
+
)
|
|
19
|
+
|
|
20
|
+
with open(manifest_path, encoding="utf-8") as f:
|
|
21
|
+
manifest = json.load(f)
|
|
22
|
+
|
|
23
|
+
# 1. Collect tests by model unique ID
|
|
24
|
+
model_tests: dict[str, list[tuple[str, dict]]] = {}
|
|
25
|
+
for unique_id, node in manifest.get("nodes", {}).items():
|
|
26
|
+
if node.get("resource_type") == "test":
|
|
27
|
+
test_metadata = node.get("test_metadata", {})
|
|
28
|
+
test_name = test_metadata.get("name")
|
|
29
|
+
if not test_name:
|
|
30
|
+
continue
|
|
31
|
+
|
|
32
|
+
depends_on_nodes = node.get("depends_on", {}).get("nodes", [])
|
|
33
|
+
for dep in depends_on_nodes:
|
|
34
|
+
if dep.startswith("model.") or dep.startswith("seed."):
|
|
35
|
+
if dep not in model_tests:
|
|
36
|
+
model_tests[dep] = []
|
|
37
|
+
model_tests[dep].append((test_name, test_metadata.get("kwargs", {})))
|
|
38
|
+
|
|
39
|
+
# 2. Map nodes of type 'model' and 'seed' to ModelRepresentation
|
|
40
|
+
mapped_models: dict[str, ModelRepresentation] = {}
|
|
41
|
+
for unique_id, node in manifest.get("nodes", {}).items():
|
|
42
|
+
resource_type = node.get("resource_type")
|
|
43
|
+
if resource_type not in ("model", "seed"):
|
|
44
|
+
continue
|
|
45
|
+
|
|
46
|
+
name = node.get("name", "")
|
|
47
|
+
|
|
48
|
+
# Map column types
|
|
49
|
+
columns_to_types = {}
|
|
50
|
+
for col_name, col_meta in node.get("columns", {}).items():
|
|
51
|
+
col_type = col_meta.get("data_type") or "unknown"
|
|
52
|
+
columns_to_types[col_name.lower()] = col_type.lower()
|
|
53
|
+
|
|
54
|
+
# Metadata parsing
|
|
55
|
+
meta = node.get("meta", {})
|
|
56
|
+
owner = meta.get("owner") or node.get("config", {}).get("meta", {}).get("owner")
|
|
57
|
+
|
|
58
|
+
grains_raw = meta.get("grain") or meta.get("grains") or []
|
|
59
|
+
if isinstance(grains_raw, str):
|
|
60
|
+
grains = [grains_raw]
|
|
61
|
+
elif isinstance(grains_raw, list):
|
|
62
|
+
grains = [str(g) for g in grains_raw]
|
|
63
|
+
else:
|
|
64
|
+
grains = []
|
|
65
|
+
|
|
66
|
+
# Dependencies
|
|
67
|
+
depends_on = set(node.get("depends_on", {}).get("nodes", []))
|
|
68
|
+
depends_on = {
|
|
69
|
+
dep
|
|
70
|
+
for dep in depends_on
|
|
71
|
+
if dep.startswith("model.") or dep.startswith("seed.") or dep.startswith("source.")
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
# Ephemeral models behave like symbolic models
|
|
75
|
+
is_symbolic = node.get("config", {}).get("materialized") == "ephemeral"
|
|
76
|
+
|
|
77
|
+
rel_path = node.get("original_file_path", "")
|
|
78
|
+
abs_path = str(project_root / rel_path)
|
|
79
|
+
|
|
80
|
+
audits = model_tests.get(unique_id, [])
|
|
81
|
+
|
|
82
|
+
mapped_models[unique_id] = ModelRepresentation(
|
|
83
|
+
name=name,
|
|
84
|
+
path=abs_path,
|
|
85
|
+
dialect=dialect,
|
|
86
|
+
is_symbolic=is_symbolic,
|
|
87
|
+
is_external=False,
|
|
88
|
+
columns_to_types=columns_to_types,
|
|
89
|
+
depends_on=depends_on,
|
|
90
|
+
description=node.get("description"),
|
|
91
|
+
owner=owner,
|
|
92
|
+
grains=grains,
|
|
93
|
+
audits=audits,
|
|
94
|
+
)
|
|
95
|
+
|
|
96
|
+
# 3. Map sources to ModelRepresentation so graph checks resolve them
|
|
97
|
+
for source_id, source in manifest.get("sources", {}).items():
|
|
98
|
+
name = source.get("name", "")
|
|
99
|
+
rel_path = source.get("original_file_path", "")
|
|
100
|
+
abs_path = str(project_root / rel_path)
|
|
101
|
+
|
|
102
|
+
mapped_models[source_id] = ModelRepresentation(
|
|
103
|
+
name=name,
|
|
104
|
+
path=abs_path,
|
|
105
|
+
dialect=dialect,
|
|
106
|
+
is_symbolic=True,
|
|
107
|
+
is_external=True,
|
|
108
|
+
columns_to_types={},
|
|
109
|
+
depends_on=set(),
|
|
110
|
+
description=source.get("description"),
|
|
111
|
+
owner=source.get("meta", {}).get("owner"),
|
|
112
|
+
grains=[],
|
|
113
|
+
audits=[],
|
|
114
|
+
)
|
|
115
|
+
|
|
116
|
+
return mapped_models
|
|
@@ -0,0 +1,103 @@
|
|
|
1
|
+
"""Orchestrator runner executing tff-core rules and checks against dbt manifest models."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import logging
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
|
|
8
|
+
from tff.core.checks.custom_exclusions import collect_custom_exclusion_findings
|
|
9
|
+
from tff.core.checks.dependency_graph import collect_dependency_graph_findings
|
|
10
|
+
from tff.core.checks.layer_integrity import collect_layer_integrity_findings
|
|
11
|
+
from tff.core.checks.schema_contracts import collect_schema_contract_findings
|
|
12
|
+
from tff.core.config import FitnessFunctionsConfig, load_fitness_config
|
|
13
|
+
from tff.core.context import set_ff_config
|
|
14
|
+
from tff.core.report import LintFinding
|
|
15
|
+
from tff.core.rules import ALL_RULES
|
|
16
|
+
from tff.core.utils.paths import model_path_relative
|
|
17
|
+
from tff.core.model import ModelRepresentation
|
|
18
|
+
from tff.dbt.manifest import load_dbt_models
|
|
19
|
+
|
|
20
|
+
logger = logging.getLogger(__name__)
|
|
21
|
+
|
|
22
|
+
CHECK_COLLECTORS = {
|
|
23
|
+
"layer_integrity": lambda models, cfg: collect_layer_integrity_findings(models, cfg),
|
|
24
|
+
"custom_exclusions": lambda models, cfg: collect_custom_exclusion_findings(models, cfg),
|
|
25
|
+
"schema_contracts": lambda _models, cfg: collect_schema_contract_findings(cfg),
|
|
26
|
+
"dependency_graph": lambda models, cfg: collect_dependency_graph_findings(models, cfg),
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def collect_dbt_rules_findings(models: dict[str, ModelRepresentation]) -> list[LintFinding]:
|
|
31
|
+
findings = []
|
|
32
|
+
rules = [rule_cls() for rule_cls in ALL_RULES]
|
|
33
|
+
|
|
34
|
+
for model in models.values():
|
|
35
|
+
if model.is_external or model.is_symbolic:
|
|
36
|
+
continue
|
|
37
|
+
|
|
38
|
+
for rule in rules:
|
|
39
|
+
violation = rule.check_model(model)
|
|
40
|
+
if violation:
|
|
41
|
+
msgs = violation.violation_msg
|
|
42
|
+
if isinstance(msgs, str):
|
|
43
|
+
msgs = [msgs]
|
|
44
|
+
for msg in msgs:
|
|
45
|
+
# Strip model name prefix from message if the rule prepended it
|
|
46
|
+
model_label = f"{model.name}: "
|
|
47
|
+
clean_msg = msg.removeprefix(model_label)
|
|
48
|
+
|
|
49
|
+
findings.append(
|
|
50
|
+
LintFinding(
|
|
51
|
+
check=rule.name,
|
|
52
|
+
severity="error",
|
|
53
|
+
model=model.name,
|
|
54
|
+
path=model_path_relative(model),
|
|
55
|
+
message=clean_msg,
|
|
56
|
+
)
|
|
57
|
+
)
|
|
58
|
+
return findings
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def _check_enabled(config: FitnessFunctionsConfig, check_name: str) -> bool:
|
|
62
|
+
check = getattr(config.checks, check_name, None)
|
|
63
|
+
return bool(getattr(check, "enabled", False))
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def run_all_checks(
|
|
67
|
+
project_root: Path | None = None,
|
|
68
|
+
config: FitnessFunctionsConfig | None = None,
|
|
69
|
+
checks: list[str] | None = None,
|
|
70
|
+
dialect: str = "bigquery",
|
|
71
|
+
) -> tuple[list[LintFinding], int, list[str]]:
|
|
72
|
+
project_root = project_root or Path.cwd()
|
|
73
|
+
if config is None:
|
|
74
|
+
config = load_fitness_config(project_root)
|
|
75
|
+
set_ff_config(config)
|
|
76
|
+
|
|
77
|
+
# Parse and load manifest.json
|
|
78
|
+
models = load_dbt_models(project_root, dialect=dialect)
|
|
79
|
+
|
|
80
|
+
if checks is None:
|
|
81
|
+
selected = ["rules"] + [
|
|
82
|
+
name
|
|
83
|
+
for name in CHECK_COLLECTORS
|
|
84
|
+
if _check_enabled(config, name)
|
|
85
|
+
]
|
|
86
|
+
else:
|
|
87
|
+
selected = checks
|
|
88
|
+
|
|
89
|
+
findings: list[LintFinding] = []
|
|
90
|
+
|
|
91
|
+
if "rules" in selected:
|
|
92
|
+
findings.extend(collect_dbt_rules_findings(models))
|
|
93
|
+
|
|
94
|
+
for check_name, collector in CHECK_COLLECTORS.items():
|
|
95
|
+
if check_name not in selected:
|
|
96
|
+
continue
|
|
97
|
+
findings.extend(collector(models, config))
|
|
98
|
+
|
|
99
|
+
models_checked = sum(
|
|
100
|
+
1 for m in models.values() if not m.is_external and not m.is_symbolic
|
|
101
|
+
)
|
|
102
|
+
|
|
103
|
+
return findings, models_checked, selected
|
|
@@ -0,0 +1,176 @@
|
|
|
1
|
+
import json
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
|
|
4
|
+
from tff.dbt.manifest import load_dbt_models
|
|
5
|
+
from tff.dbt.runner import run_all_checks
|
|
6
|
+
from tff.core.config import FitnessFunctionsConfig
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def test_load_dbt_models(tmp_path: Path):
|
|
10
|
+
target_dir = tmp_path / "target"
|
|
11
|
+
target_dir.mkdir(parents=True, exist_ok=True)
|
|
12
|
+
manifest_file = target_dir / "manifest.json"
|
|
13
|
+
|
|
14
|
+
# Mock a simple manifest.json structure
|
|
15
|
+
manifest_data = {
|
|
16
|
+
"nodes": {
|
|
17
|
+
"model.my_project.stg_users": {
|
|
18
|
+
"resource_type": "model",
|
|
19
|
+
"name": "stg_users",
|
|
20
|
+
"original_file_path": "models/staging/stg_users.sql",
|
|
21
|
+
"columns": {
|
|
22
|
+
"id": {"data_type": "INT"},
|
|
23
|
+
"name": {"data_type": "VARCHAR"},
|
|
24
|
+
},
|
|
25
|
+
"config": {
|
|
26
|
+
"materialized": "view",
|
|
27
|
+
},
|
|
28
|
+
"meta": {
|
|
29
|
+
"owner": "data-team",
|
|
30
|
+
"grain": "user_id", # string grain
|
|
31
|
+
},
|
|
32
|
+
"description": "Staging table for users",
|
|
33
|
+
"depends_on": {
|
|
34
|
+
"nodes": ["source.my_project.raw_users"]
|
|
35
|
+
}
|
|
36
|
+
},
|
|
37
|
+
"model.my_project.invalid_grain": {
|
|
38
|
+
"resource_type": "model",
|
|
39
|
+
"name": "invalid_grain",
|
|
40
|
+
"original_file_path": "models/staging/invalid_grain.sql",
|
|
41
|
+
"columns": {},
|
|
42
|
+
"meta": {
|
|
43
|
+
"grain": 123, # invalid grain type (neither list nor str)
|
|
44
|
+
},
|
|
45
|
+
"depends_on": {"nodes": []}
|
|
46
|
+
},
|
|
47
|
+
"test.my_project.not_null_stg_users_id": {
|
|
48
|
+
"resource_type": "test",
|
|
49
|
+
"name": "not_null_stg_users_id",
|
|
50
|
+
"test_metadata": {
|
|
51
|
+
"name": "not_null",
|
|
52
|
+
"kwargs": {"column_name": "id"}
|
|
53
|
+
},
|
|
54
|
+
"depends_on": {
|
|
55
|
+
"nodes": ["model.my_project.stg_users"]
|
|
56
|
+
}
|
|
57
|
+
},
|
|
58
|
+
"test.my_project.no_name_test": {
|
|
59
|
+
"resource_type": "test",
|
|
60
|
+
"name": "no_name_test",
|
|
61
|
+
"test_metadata": {}, # missing name
|
|
62
|
+
"depends_on": {
|
|
63
|
+
"nodes": ["model.my_project.stg_users"]
|
|
64
|
+
}
|
|
65
|
+
}
|
|
66
|
+
},
|
|
67
|
+
"sources": {
|
|
68
|
+
"source.my_project.raw_users": {
|
|
69
|
+
"resource_type": "source",
|
|
70
|
+
"name": "raw_users",
|
|
71
|
+
"original_file_path": "models/sources/raw_users.yml",
|
|
72
|
+
"description": "Raw users source table",
|
|
73
|
+
"meta": {"owner": "ingest-team"}
|
|
74
|
+
}
|
|
75
|
+
}
|
|
76
|
+
}
|
|
77
|
+
manifest_file.write_text(json.dumps(manifest_data), encoding="utf-8")
|
|
78
|
+
|
|
79
|
+
models = load_dbt_models(tmp_path)
|
|
80
|
+
assert "model.my_project.stg_users" in models
|
|
81
|
+
assert "source.my_project.raw_users" in models
|
|
82
|
+
|
|
83
|
+
user_model = models["model.my_project.stg_users"]
|
|
84
|
+
assert user_model.name == "stg_users"
|
|
85
|
+
assert user_model.columns_to_types == {"id": "int", "name": "varchar"}
|
|
86
|
+
assert user_model.owner == "data-team"
|
|
87
|
+
assert user_model.description == "Staging table for users"
|
|
88
|
+
assert user_model.depends_on == {"source.my_project.raw_users"}
|
|
89
|
+
assert user_model.audits == [("not_null", {"column_name": "id"})]
|
|
90
|
+
assert user_model.grains == ["user_id"]
|
|
91
|
+
|
|
92
|
+
invalid_grain_model = models["model.my_project.invalid_grain"]
|
|
93
|
+
assert invalid_grain_model.grains == []
|
|
94
|
+
|
|
95
|
+
source_node = models["source.my_project.raw_users"]
|
|
96
|
+
assert source_node.name == "raw_users"
|
|
97
|
+
assert source_node.is_external is True
|
|
98
|
+
assert source_node.owner == "ingest-team"
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
def test_load_dbt_models_missing_manifest():
|
|
102
|
+
import pytest
|
|
103
|
+
with pytest.raises(FileNotFoundError):
|
|
104
|
+
load_dbt_models(Path("/non_existent_path"))
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
def test_run_all_checks(tmp_path: Path):
|
|
108
|
+
target_dir = tmp_path / "target"
|
|
109
|
+
target_dir.mkdir(parents=True, exist_ok=True)
|
|
110
|
+
manifest_file = target_dir / "manifest.json"
|
|
111
|
+
|
|
112
|
+
# Mock manifest with standard model, symbolic model, and external source
|
|
113
|
+
manifest_data = {
|
|
114
|
+
"nodes": {
|
|
115
|
+
"model.my_project.stg_users": {
|
|
116
|
+
"resource_type": "model",
|
|
117
|
+
"name": "stg_users",
|
|
118
|
+
"original_file_path": "models/staging/stg_users.sql",
|
|
119
|
+
"columns": {
|
|
120
|
+
"id": {"data_type": "INT"},
|
|
121
|
+
},
|
|
122
|
+
"config": {},
|
|
123
|
+
"meta": {"owner": "data-team"},
|
|
124
|
+
"depends_on": {"nodes": []}
|
|
125
|
+
},
|
|
126
|
+
"model.my_project.symbolic_model": {
|
|
127
|
+
"resource_type": "model",
|
|
128
|
+
"name": "symbolic_model",
|
|
129
|
+
"original_file_path": "models/staging/symbolic.sql",
|
|
130
|
+
"columns": {},
|
|
131
|
+
"config": {"materialized": "ephemeral"}, # symbolic
|
|
132
|
+
"meta": {},
|
|
133
|
+
"depends_on": {"nodes": []}
|
|
134
|
+
}
|
|
135
|
+
},
|
|
136
|
+
"sources": {}
|
|
137
|
+
}
|
|
138
|
+
manifest_file.write_text(json.dumps(manifest_data), encoding="utf-8")
|
|
139
|
+
|
|
140
|
+
# Mock SQL files
|
|
141
|
+
sql_file = tmp_path / "models/staging/stg_users.sql"
|
|
142
|
+
sql_file.parent.mkdir(parents=True, exist_ok=True)
|
|
143
|
+
sql_file.write_text("SELECT id FROM raw", encoding="utf-8")
|
|
144
|
+
|
|
145
|
+
# 1. Test passing config explicitly
|
|
146
|
+
config = FitnessFunctionsConfig()
|
|
147
|
+
config.rules.metadata.enabled = True
|
|
148
|
+
config.rules.metadata.owner = True
|
|
149
|
+
config.rules.metadata.description = True # will violate
|
|
150
|
+
|
|
151
|
+
findings, models_checked, selected = run_all_checks(
|
|
152
|
+
project_root=tmp_path,
|
|
153
|
+
config=config,
|
|
154
|
+
)
|
|
155
|
+
assert models_checked == 1 # symbolic is skipped
|
|
156
|
+
assert len(findings) > 0
|
|
157
|
+
assert any("description" in f.check for f in findings)
|
|
158
|
+
|
|
159
|
+
# 2. Test running with config=None (auto-discovers config file)
|
|
160
|
+
yaml_file = tmp_path / "fitness_functions.yaml"
|
|
161
|
+
yaml_file.write_text("rules:\n metadata:\n enabled: true\n description: true\n", encoding="utf-8")
|
|
162
|
+
findings_auto, _, _ = run_all_checks(
|
|
163
|
+
project_root=tmp_path,
|
|
164
|
+
config=None,
|
|
165
|
+
)
|
|
166
|
+
assert len(findings_auto) > 0
|
|
167
|
+
|
|
168
|
+
# 3. Test specifying checks list explicitly
|
|
169
|
+
findings_subset, _, selected_subset = run_all_checks(
|
|
170
|
+
project_root=tmp_path,
|
|
171
|
+
config=config,
|
|
172
|
+
checks=["rules"],
|
|
173
|
+
)
|
|
174
|
+
assert selected_subset == ["rules"]
|
|
175
|
+
assert len(findings_subset) > 0
|
|
176
|
+
|