tff-dbt 0.2.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,10 @@
1
+ .venv/
2
+ __pycache__/
3
+ *.egg-info/
4
+ .pytest_cache/
5
+ .ruff_cache/
6
+ dist/
7
+ build/
8
+ .cache/
9
+ .coverage
10
+ coverage.xml
tff_dbt-0.2.0/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Bart Schuijt
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
tff_dbt-0.2.0/PKG-INFO ADDED
@@ -0,0 +1,221 @@
1
+ Metadata-Version: 2.4
2
+ Name: tff-dbt
3
+ Version: 0.2.0
4
+ Summary: dbt adapter for Transformation Fitness Functions (tff)
5
+ Author-email: Bart Schuijt <schuijt.bart@gmail.com>
6
+ License: MIT
7
+ License-File: LICENSE
8
+ Requires-Python: >=3.12
9
+ Requires-Dist: tff-core
10
+ Description-Content-Type: text/markdown
11
+
12
+ # sqlmesh-ff
13
+
14
+ [![PyPI version](https://img.shields.io/pypi/v/sqlmesh-ff.svg)](https://pypi.org/project/sqlmesh-ff/)
15
+ [![Python versions](https://img.shields.io/pypi/pyversions/sqlmesh-ff.svg)](https://pypi.org/project/sqlmesh-ff/)
16
+
17
+ Configurable fitness functions plugin for [SQLMesh](https://sqlmesh.com) projects.
18
+
19
+ Ships SQLMesh linter rules (classification macros, SQL complexity, metadata, naming) and architectural checks (layer integrity, custom exclusions, schema contracts, dependency graph) with a unified Rich lint report.
20
+
21
+ ## Installation
22
+
23
+ ```bash
24
+ # Install from PyPI:
25
+ uv add sqlmesh-ff
26
+
27
+ # Or using pip:
28
+ pip install sqlmesh-ff
29
+ ```
30
+
31
+ ## Quick start
32
+
33
+ 1. Add `fitness_functions.yaml` to your SQLMesh project root (see [Configuration](#configuration)).
34
+ 2. Add a small `config.py` bootstrap (see [Where configuration lives](#where-configuration-lives)) — SQLMesh requires the loader as a Python class and cannot load `config.py` and `config.yaml` in the same folder.
35
+ 3. Run lint:
36
+
37
+ ```bash
38
+ sqlmesh-ff lint
39
+ ```
40
+
41
+ ## Where configuration lives
42
+
43
+ There are three layers. Only the YAML/JSON files in your project are user-editable settings.
44
+
45
+ | Layer | File | Role | You edit this? |
46
+ |-------|------|------|----------------|
47
+ | Plugin defaults | `sqlmesh_ff/config.py` (installed package) | Pydantic schema and built-in defaults (e.g. `fan_out_warn: 15`) | No — library code, never overwritten |
48
+ | SQLMesh project | `settings.yaml` | Gateways, `linter.rules`, variables, CI/CD bot | Yes — normal SQLMesh config |
49
+ | Fitness functions | `fitness_functions.yaml` | Thresholds, rule toggles, column naming/type rules, paths to JSON data | Yes — main FF config |
50
+ | Loader bootstrap | `config.py` (project root) | Loads `settings.yaml` and registers `FitnessLoader` | Rarely — ~15 lines of wiring |
51
+ | Contract data | `linter_contract_groups.json`, `linter_exclusions.json` | Repo-specific schema parity and dependency exclusions | Yes — project data |
52
+
53
+ **Merge order for fitness settings:** plugin defaults → `fitness_functions.yaml` → optional `loader_kwargs` overrides in `config.py`. Your YAML always wins over plugin defaults. The project `config.py` does not hold fitness thresholds — it only points at `fitness_functions.yaml`.
54
+
55
+ **Why `config.py` exists:** SQLMesh accepts `loader: FitnessLoader` only as a Python class, not as a YAML string. Because SQLMesh rejects having both `config.py` and `config.yaml` in one folder, projects use `settings.yaml` (SQLMesh settings) plus `config.py` (loader registration).
56
+
57
+ Example bootstrap:
58
+
59
+ ```python
60
+ from pathlib import Path
61
+
62
+ from sqlmesh.core.config import Config
63
+ from sqlmesh.utils.yaml import load as yaml_load
64
+ from sqlmesh_ff.loader import FitnessLoader
65
+
66
+ _settings = yaml_load(Path(__file__).parent / "settings.yaml")
67
+ config = Config.parse_obj(_settings).update_with({
68
+ "loader": FitnessLoader,
69
+ "loader_kwargs": {"fitness_functions_config": "fitness_functions.yaml"},
70
+ })
71
+ ```
72
+
73
+ Enable individual SQLMesh rules in `settings.yaml` under `linter.rules` / `linter.warn_rules`.
74
+
75
+ ## Configuration
76
+
77
+ Fitness function settings live in `fitness_functions.yaml` at the project root. Override the file path or individual keys via `loader_kwargs` in `config.py` (advanced — most projects only set `fitness_functions_config`).
78
+
79
+ ### Example `fitness_functions.yaml`
80
+
81
+ ```yaml
82
+ contract_groups_path: linter_contract_groups.json
83
+ exclusions_path: linter_exclusions.json
84
+
85
+ layers:
86
+ order: [sources, derived, core, marts, export]
87
+
88
+ checks:
89
+ layer_integrity: { enabled: true }
90
+ custom_exclusions: { enabled: true }
91
+ schema_contracts: { enabled: true }
92
+ dependency_graph:
93
+ enabled: true
94
+ fan_out_warn: 15
95
+ fan_out_fail: 25
96
+ fan_in_warn: 10
97
+
98
+ rules:
99
+ classification_macros:
100
+ enabled: true
101
+ skip_layers: [sources]
102
+ columns:
103
+ product_type: "@product_type\\b|@PRODUCT_TYPE\\b"
104
+ sql_complexity:
105
+ enabled: true
106
+ thresholds:
107
+ decision_points: [15, 25]
108
+ cte_count: [8, 12]
109
+ join_count: [8, 12]
110
+ line_count: [250, 400]
111
+ mart_naming:
112
+ enabled: true
113
+ layer_name: marts
114
+ rule: prefix_with_subdirectory
115
+ column_names:
116
+ enabled: true
117
+ replacements: {}
118
+ column_types:
119
+ enabled: true
120
+ rules: []
121
+ equivalent_types:
122
+ text: [text, varchar]
123
+ metadata:
124
+ owner: true
125
+ description: true
126
+ grain: true
127
+ filename_equals_modelname:
128
+ enabled: true
129
+ ```
130
+
131
+ ### Project-specific JSON
132
+
133
+ Keep repo-specific contract and exclusion data in your project:
134
+
135
+ - `linter_contract_groups.json` — cross-model schema parity groups
136
+ - `linter_exclusions.json` — blocked dependency patterns and allowed exceptions
137
+
138
+ Reference their paths from `fitness_functions.yaml`. The plugin ships generic engines only; examples live in this README.
139
+
140
+ ### Rule name mapping
141
+
142
+ SQLMesh uses lowercase class names in `linter.rules`:
143
+
144
+ | Config key | SQLMesh rule name |
145
+ |------------|-------------------|
146
+ | `classification_macros` | `classificationmacros` |
147
+ | `sql_complexity` | `sqlcomplexity` |
148
+ | `mart_naming` | `martmodelnamingconvention` |
149
+ | `column_names` | `columnnames` |
150
+ | `column_types` | `columntypes` |
151
+ | `metadata.owner` | `nomissingowner` |
152
+ | `metadata.description` | `nomissingdescription` |
153
+ | `metadata.grain` | `nomissinggrain` |
154
+ | `filename_equals_modelname` | `filenameequalsmodelname` |
155
+
156
+ ## CLI
157
+
158
+ ```
159
+ sqlmesh-ff lint [--project PATH] [--config PATH] [--checks CHECK,...] [--fail-level error|warning] [--group-by connascence|model]
160
+ ```
161
+
162
+ - **Default:** all enabled checks plus SQLMesh linter rules
163
+ - **`--checks layer_integrity,custom_exclusions`:** run subset (for pre-push hooks)
164
+ - **`--fail-level warning`:** treat warnings as failures
165
+ - **`--group-by connascence|model`:** change how violations are grouped in the report (default: `connascence`)
166
+
167
+ ## Integration example
168
+
169
+ Example overrides. `api_request` should always be named `api_call`. `_id` columns should always be of type `text` and `is_` columns should always be of type `boolean`.
170
+
171
+ ```yaml
172
+ column_names:
173
+ replacements:
174
+ api_request: api_call
175
+ column_types:
176
+ rules:
177
+ - name: id_is_text
178
+ pattern: "_id$"
179
+ data_type: text
180
+ - name: boolean
181
+ pattern: "^is_"
182
+ data_type: boolean
183
+ ```
184
+
185
+ ## Examples
186
+
187
+ A complete, runnable example project showcasing the configuration of `sqlmesh-ff` rules, exclusions, contracts, and a continuous integration workflow is located in the [examples/](file:///Users/bartschuijt/git/sqlmesh-ff/examples/) directory.
188
+
189
+ To run the linter against the example project locally, run:
190
+ ```bash
191
+ sqlmesh-ff lint --project examples/minimal-sqlmesh-project
192
+ ```
193
+
194
+ See [examples/minimal-sqlmesh-project/fitness_functions.yaml](file:///Users/bartschuijt/git/sqlmesh-ff/examples/minimal-sqlmesh-project/fitness_functions.yaml) to inspect the configured rules.
195
+
196
+ ## Development
197
+
198
+ Initialize your local environment and configure the Git pre-push hook:
199
+ ```bash
200
+ make init
201
+ ```
202
+
203
+ Run linter, tests, or check diff coverage:
204
+ ```bash
205
+ make lint
206
+ make test
207
+ make coverage
208
+ ```
209
+
210
+ ### Releases and PR titles
211
+
212
+ Releases are automated with [release-please](https://github.com/googleapis/release-please) on merges to `main`. Use [Conventional Commits](https://www.conventionalcommits.org/) in PR titles so changelog entries and semver bumps are correct.
213
+
214
+ PR titles must start with a type prefix, for example:
215
+
216
+ - `feat: add dependency graph fan-in check`
217
+ - `fix: remove unused import in loader tests`
218
+ - `docs: document fitness_functions.yaml merge order`
219
+ - `ci: add release-please workflow`
220
+
221
+ Supported types include `feat`, `fix`, `docs`, `style`, `refactor`, `perf`, `test`, `build`, `ci`, and `chore`. The PR title check in CI enforces this format.
@@ -0,0 +1,210 @@
1
+ # sqlmesh-ff
2
+
3
+ [![PyPI version](https://img.shields.io/pypi/v/sqlmesh-ff.svg)](https://pypi.org/project/sqlmesh-ff/)
4
+ [![Python versions](https://img.shields.io/pypi/pyversions/sqlmesh-ff.svg)](https://pypi.org/project/sqlmesh-ff/)
5
+
6
+ Configurable fitness functions plugin for [SQLMesh](https://sqlmesh.com) projects.
7
+
8
+ Ships SQLMesh linter rules (classification macros, SQL complexity, metadata, naming) and architectural checks (layer integrity, custom exclusions, schema contracts, dependency graph) with a unified Rich lint report.
9
+
10
+ ## Installation
11
+
12
+ ```bash
13
+ # Install from PyPI:
14
+ uv add sqlmesh-ff
15
+
16
+ # Or using pip:
17
+ pip install sqlmesh-ff
18
+ ```
19
+
20
+ ## Quick start
21
+
22
+ 1. Add `fitness_functions.yaml` to your SQLMesh project root (see [Configuration](#configuration)).
23
+ 2. Add a small `config.py` bootstrap (see [Where configuration lives](#where-configuration-lives)) — SQLMesh requires the loader as a Python class and cannot load `config.py` and `config.yaml` in the same folder.
24
+ 3. Run lint:
25
+
26
+ ```bash
27
+ sqlmesh-ff lint
28
+ ```
29
+
30
+ ## Where configuration lives
31
+
32
+ There are three layers. Only the YAML/JSON files in your project are user-editable settings.
33
+
34
+ | Layer | File | Role | You edit this? |
35
+ |-------|------|------|----------------|
36
+ | Plugin defaults | `sqlmesh_ff/config.py` (installed package) | Pydantic schema and built-in defaults (e.g. `fan_out_warn: 15`) | No — library code, never overwritten |
37
+ | SQLMesh project | `settings.yaml` | Gateways, `linter.rules`, variables, CI/CD bot | Yes — normal SQLMesh config |
38
+ | Fitness functions | `fitness_functions.yaml` | Thresholds, rule toggles, column naming/type rules, paths to JSON data | Yes — main FF config |
39
+ | Loader bootstrap | `config.py` (project root) | Loads `settings.yaml` and registers `FitnessLoader` | Rarely — ~15 lines of wiring |
40
+ | Contract data | `linter_contract_groups.json`, `linter_exclusions.json` | Repo-specific schema parity and dependency exclusions | Yes — project data |
41
+
42
+ **Merge order for fitness settings:** plugin defaults → `fitness_functions.yaml` → optional `loader_kwargs` overrides in `config.py`. Your YAML always wins over plugin defaults. The project `config.py` does not hold fitness thresholds — it only points at `fitness_functions.yaml`.
43
+
44
+ **Why `config.py` exists:** SQLMesh accepts `loader: FitnessLoader` only as a Python class, not as a YAML string. Because SQLMesh rejects having both `config.py` and `config.yaml` in one folder, projects use `settings.yaml` (SQLMesh settings) plus `config.py` (loader registration).
45
+
46
+ Example bootstrap:
47
+
48
+ ```python
49
+ from pathlib import Path
50
+
51
+ from sqlmesh.core.config import Config
52
+ from sqlmesh.utils.yaml import load as yaml_load
53
+ from sqlmesh_ff.loader import FitnessLoader
54
+
55
+ _settings = yaml_load(Path(__file__).parent / "settings.yaml")
56
+ config = Config.parse_obj(_settings).update_with({
57
+ "loader": FitnessLoader,
58
+ "loader_kwargs": {"fitness_functions_config": "fitness_functions.yaml"},
59
+ })
60
+ ```
61
+
62
+ Enable individual SQLMesh rules in `settings.yaml` under `linter.rules` / `linter.warn_rules`.
63
+
64
+ ## Configuration
65
+
66
+ Fitness function settings live in `fitness_functions.yaml` at the project root. Override the file path or individual keys via `loader_kwargs` in `config.py` (advanced — most projects only set `fitness_functions_config`).
67
+
68
+ ### Example `fitness_functions.yaml`
69
+
70
+ ```yaml
71
+ contract_groups_path: linter_contract_groups.json
72
+ exclusions_path: linter_exclusions.json
73
+
74
+ layers:
75
+ order: [sources, derived, core, marts, export]
76
+
77
+ checks:
78
+ layer_integrity: { enabled: true }
79
+ custom_exclusions: { enabled: true }
80
+ schema_contracts: { enabled: true }
81
+ dependency_graph:
82
+ enabled: true
83
+ fan_out_warn: 15
84
+ fan_out_fail: 25
85
+ fan_in_warn: 10
86
+
87
+ rules:
88
+ classification_macros:
89
+ enabled: true
90
+ skip_layers: [sources]
91
+ columns:
92
+ product_type: "@product_type\\b|@PRODUCT_TYPE\\b"
93
+ sql_complexity:
94
+ enabled: true
95
+ thresholds:
96
+ decision_points: [15, 25]
97
+ cte_count: [8, 12]
98
+ join_count: [8, 12]
99
+ line_count: [250, 400]
100
+ mart_naming:
101
+ enabled: true
102
+ layer_name: marts
103
+ rule: prefix_with_subdirectory
104
+ column_names:
105
+ enabled: true
106
+ replacements: {}
107
+ column_types:
108
+ enabled: true
109
+ rules: []
110
+ equivalent_types:
111
+ text: [text, varchar]
112
+ metadata:
113
+ owner: true
114
+ description: true
115
+ grain: true
116
+ filename_equals_modelname:
117
+ enabled: true
118
+ ```
119
+
120
+ ### Project-specific JSON
121
+
122
+ Keep repo-specific contract and exclusion data in your project:
123
+
124
+ - `linter_contract_groups.json` — cross-model schema parity groups
125
+ - `linter_exclusions.json` — blocked dependency patterns and allowed exceptions
126
+
127
+ Reference their paths from `fitness_functions.yaml`. The plugin ships generic engines only; examples live in this README.
128
+
129
+ ### Rule name mapping
130
+
131
+ SQLMesh uses lowercase class names in `linter.rules`:
132
+
133
+ | Config key | SQLMesh rule name |
134
+ |------------|-------------------|
135
+ | `classification_macros` | `classificationmacros` |
136
+ | `sql_complexity` | `sqlcomplexity` |
137
+ | `mart_naming` | `martmodelnamingconvention` |
138
+ | `column_names` | `columnnames` |
139
+ | `column_types` | `columntypes` |
140
+ | `metadata.owner` | `nomissingowner` |
141
+ | `metadata.description` | `nomissingdescription` |
142
+ | `metadata.grain` | `nomissinggrain` |
143
+ | `filename_equals_modelname` | `filenameequalsmodelname` |
144
+
145
+ ## CLI
146
+
147
+ ```
148
+ sqlmesh-ff lint [--project PATH] [--config PATH] [--checks CHECK,...] [--fail-level error|warning] [--group-by connascence|model]
149
+ ```
150
+
151
+ - **Default:** all enabled checks plus SQLMesh linter rules
152
+ - **`--checks layer_integrity,custom_exclusions`:** run subset (for pre-push hooks)
153
+ - **`--fail-level warning`:** treat warnings as failures
154
+ - **`--group-by connascence|model`:** change how violations are grouped in the report (default: `connascence`)
155
+
156
+ ## Integration example
157
+
158
+ Example overrides. `api_request` should always be named `api_call`. `_id` columns should always be of type `text` and `is_` columns should always be of type `boolean`.
159
+
160
+ ```yaml
161
+ column_names:
162
+ replacements:
163
+ api_request: api_call
164
+ column_types:
165
+ rules:
166
+ - name: id_is_text
167
+ pattern: "_id$"
168
+ data_type: text
169
+ - name: boolean
170
+ pattern: "^is_"
171
+ data_type: boolean
172
+ ```
173
+
174
+ ## Examples
175
+
176
+ A complete, runnable example project showcasing the configuration of `sqlmesh-ff` rules, exclusions, contracts, and a continuous integration workflow is located in the [examples/](file:///Users/bartschuijt/git/sqlmesh-ff/examples/) directory.
177
+
178
+ To run the linter against the example project locally, run:
179
+ ```bash
180
+ sqlmesh-ff lint --project examples/minimal-sqlmesh-project
181
+ ```
182
+
183
+ See [examples/minimal-sqlmesh-project/fitness_functions.yaml](file:///Users/bartschuijt/git/sqlmesh-ff/examples/minimal-sqlmesh-project/fitness_functions.yaml) to inspect the configured rules.
184
+
185
+ ## Development
186
+
187
+ Initialize your local environment and configure the Git pre-push hook:
188
+ ```bash
189
+ make init
190
+ ```
191
+
192
+ Run linter, tests, or check diff coverage:
193
+ ```bash
194
+ make lint
195
+ make test
196
+ make coverage
197
+ ```
198
+
199
+ ### Releases and PR titles
200
+
201
+ Releases are automated with [release-please](https://github.com/googleapis/release-please) on merges to `main`. Use [Conventional Commits](https://www.conventionalcommits.org/) in PR titles so changelog entries and semver bumps are correct.
202
+
203
+ PR titles must start with a type prefix, for example:
204
+
205
+ - `feat: add dependency graph fan-in check`
206
+ - `fix: remove unused import in loader tests`
207
+ - `docs: document fitness_functions.yaml merge order`
208
+ - `ci: add release-please workflow`
209
+
210
+ Supported types include `feat`, `fix`, `docs`, `style`, `refactor`, `perf`, `test`, `build`, `ci`, and `chore`. The PR title check in CI enforces this format.
@@ -0,0 +1,26 @@
1
+ [project]
2
+ name = "tff-dbt"
3
+ version = "0.2.0"
4
+ description = "dbt adapter for Transformation Fitness Functions (tff)"
5
+ readme = "README.md"
6
+ requires-python = ">=3.12"
7
+ license = { text = "MIT" }
8
+ authors = [
9
+ { name = "Bart Schuijt", email = "schuijt.bart@gmail.com" }
10
+ ]
11
+ dependencies = [
12
+ "tff-core",
13
+ ]
14
+
15
+ [project.scripts]
16
+ tff-dbt = "tff.dbt.cli:main"
17
+
18
+ [tool.uv.sources]
19
+ tff-core = { workspace = true }
20
+
21
+ [build-system]
22
+ requires = ["hatchling"]
23
+ build-backend = "hatchling.build"
24
+
25
+ [tool.hatch.build.targets.wheel]
26
+ packages = ["src/tff"]
@@ -0,0 +1,9 @@
1
+ """dbt adapter for Transformation Fitness Functions (tff)."""
2
+
3
+ from tff.dbt.manifest import load_dbt_models
4
+ from tff.dbt.runner import run_all_checks
5
+
6
+ __all__ = [
7
+ "load_dbt_models",
8
+ "run_all_checks",
9
+ ]
@@ -0,0 +1,96 @@
1
+ """Command-line interface for tff-dbt."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import argparse
6
+ import logging
7
+ import sys
8
+ from pathlib import Path
9
+
10
+ from tff.core.config import load_fitness_config
11
+ from tff.core.context import set_ff_config
12
+ from tff.core.report import render_lint_report
13
+ from tff.dbt.runner import run_all_checks
14
+
15
+
16
+ def _parse_checks(value: str | None) -> list[str] | None:
17
+ if not value:
18
+ return None
19
+ return [part.strip() for part in value.split(",") if part.strip()]
20
+
21
+
22
+ def main(argv: list[str] | None = None) -> int:
23
+ parser = argparse.ArgumentParser(
24
+ prog="tff-dbt",
25
+ description="Run dbt Transformation Fitness Function (tff) checks",
26
+ )
27
+ subparsers = parser.add_subparsers(dest="command", required=True)
28
+
29
+ lint_parser = subparsers.add_parser("lint", help="Run all enabled fitness checks")
30
+ lint_parser.add_argument(
31
+ "--project",
32
+ type=Path,
33
+ default=Path.cwd(),
34
+ help="dbt project root (default: current directory)",
35
+ )
36
+ lint_parser.add_argument(
37
+ "--config",
38
+ default="fitness_functions.yaml",
39
+ help="Path to fitness_functions.yaml (relative to project root)",
40
+ )
41
+ lint_parser.add_argument(
42
+ "--checks",
43
+ default=None,
44
+ help="Comma-separated checks to run (default: all enabled). "
45
+ "Use 'rules' for general linter rules only.",
46
+ )
47
+ lint_parser.add_argument(
48
+ "--fail-level",
49
+ choices=["error", "warning"],
50
+ default="error",
51
+ help="Exit non-zero when findings at or above this severity exist",
52
+ )
53
+ lint_parser.add_argument(
54
+ "--group-by",
55
+ choices=["connascence", "model"],
56
+ default="connascence",
57
+ help="How to group violations in the report (default: connascence)",
58
+ )
59
+ lint_parser.add_argument(
60
+ "--dialect",
61
+ default="bigquery",
62
+ help="SQL dialect of the dbt models (default: bigquery)",
63
+ )
64
+
65
+ args = parser.parse_args(argv)
66
+
67
+ if args.command == "lint":
68
+ logging.basicConfig(level=logging.ERROR)
69
+ project_root = args.project.resolve()
70
+ config = load_fitness_config(
71
+ project_root,
72
+ config_path=args.config,
73
+ )
74
+ set_ff_config(config)
75
+ checks = _parse_checks(args.checks)
76
+
77
+ findings, models_checked, executed_checks = run_all_checks(
78
+ project_root=project_root,
79
+ config=config,
80
+ checks=checks,
81
+ dialect=args.dialect,
82
+ )
83
+ passed = render_lint_report(
84
+ findings,
85
+ models_checked=models_checked,
86
+ executed_checks=executed_checks,
87
+ fail_level=args.fail_level, # type: ignore[arg-type]
88
+ group_by=args.group_by, # type: ignore[arg-type]
89
+ )
90
+ return 0 if passed else 1
91
+
92
+ return 1
93
+
94
+
95
+ if __name__ == "__main__":
96
+ sys.exit(main())
@@ -0,0 +1,116 @@
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ from pathlib import Path
5
+
6
+ from tff.core.model import ModelRepresentation
7
+
8
+
9
+ def load_dbt_models(
10
+ project_root: Path,
11
+ target_dir: str = "target",
12
+ dialect: str = "bigquery",
13
+ ) -> dict[str, ModelRepresentation]:
14
+ manifest_path = project_root / target_dir / "manifest.json"
15
+ if not manifest_path.exists():
16
+ raise FileNotFoundError(
17
+ f"dbt manifest not found at {manifest_path}. Please run 'dbt compile' first."
18
+ )
19
+
20
+ with open(manifest_path, encoding="utf-8") as f:
21
+ manifest = json.load(f)
22
+
23
+ # 1. Collect tests by model unique ID
24
+ model_tests: dict[str, list[tuple[str, dict]]] = {}
25
+ for unique_id, node in manifest.get("nodes", {}).items():
26
+ if node.get("resource_type") == "test":
27
+ test_metadata = node.get("test_metadata", {})
28
+ test_name = test_metadata.get("name")
29
+ if not test_name:
30
+ continue
31
+
32
+ depends_on_nodes = node.get("depends_on", {}).get("nodes", [])
33
+ for dep in depends_on_nodes:
34
+ if dep.startswith("model.") or dep.startswith("seed."):
35
+ if dep not in model_tests:
36
+ model_tests[dep] = []
37
+ model_tests[dep].append((test_name, test_metadata.get("kwargs", {})))
38
+
39
+ # 2. Map nodes of type 'model' and 'seed' to ModelRepresentation
40
+ mapped_models: dict[str, ModelRepresentation] = {}
41
+ for unique_id, node in manifest.get("nodes", {}).items():
42
+ resource_type = node.get("resource_type")
43
+ if resource_type not in ("model", "seed"):
44
+ continue
45
+
46
+ name = node.get("name", "")
47
+
48
+ # Map column types
49
+ columns_to_types = {}
50
+ for col_name, col_meta in node.get("columns", {}).items():
51
+ col_type = col_meta.get("data_type") or "unknown"
52
+ columns_to_types[col_name.lower()] = col_type.lower()
53
+
54
+ # Metadata parsing
55
+ meta = node.get("meta", {})
56
+ owner = meta.get("owner") or node.get("config", {}).get("meta", {}).get("owner")
57
+
58
+ grains_raw = meta.get("grain") or meta.get("grains") or []
59
+ if isinstance(grains_raw, str):
60
+ grains = [grains_raw]
61
+ elif isinstance(grains_raw, list):
62
+ grains = [str(g) for g in grains_raw]
63
+ else:
64
+ grains = []
65
+
66
+ # Dependencies
67
+ depends_on = set(node.get("depends_on", {}).get("nodes", []))
68
+ depends_on = {
69
+ dep
70
+ for dep in depends_on
71
+ if dep.startswith("model.") or dep.startswith("seed.") or dep.startswith("source.")
72
+ }
73
+
74
+ # Ephemeral models behave like symbolic models
75
+ is_symbolic = node.get("config", {}).get("materialized") == "ephemeral"
76
+
77
+ rel_path = node.get("original_file_path", "")
78
+ abs_path = str(project_root / rel_path)
79
+
80
+ audits = model_tests.get(unique_id, [])
81
+
82
+ mapped_models[unique_id] = ModelRepresentation(
83
+ name=name,
84
+ path=abs_path,
85
+ dialect=dialect,
86
+ is_symbolic=is_symbolic,
87
+ is_external=False,
88
+ columns_to_types=columns_to_types,
89
+ depends_on=depends_on,
90
+ description=node.get("description"),
91
+ owner=owner,
92
+ grains=grains,
93
+ audits=audits,
94
+ )
95
+
96
+ # 3. Map sources to ModelRepresentation so graph checks resolve them
97
+ for source_id, source in manifest.get("sources", {}).items():
98
+ name = source.get("name", "")
99
+ rel_path = source.get("original_file_path", "")
100
+ abs_path = str(project_root / rel_path)
101
+
102
+ mapped_models[source_id] = ModelRepresentation(
103
+ name=name,
104
+ path=abs_path,
105
+ dialect=dialect,
106
+ is_symbolic=True,
107
+ is_external=True,
108
+ columns_to_types={},
109
+ depends_on=set(),
110
+ description=source.get("description"),
111
+ owner=source.get("meta", {}).get("owner"),
112
+ grains=[],
113
+ audits=[],
114
+ )
115
+
116
+ return mapped_models
@@ -0,0 +1,103 @@
1
+ """Orchestrator runner executing tff-core rules and checks against dbt manifest models."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import logging
6
+ from pathlib import Path
7
+
8
+ from tff.core.checks.custom_exclusions import collect_custom_exclusion_findings
9
+ from tff.core.checks.dependency_graph import collect_dependency_graph_findings
10
+ from tff.core.checks.layer_integrity import collect_layer_integrity_findings
11
+ from tff.core.checks.schema_contracts import collect_schema_contract_findings
12
+ from tff.core.config import FitnessFunctionsConfig, load_fitness_config
13
+ from tff.core.context import set_ff_config
14
+ from tff.core.report import LintFinding
15
+ from tff.core.rules import ALL_RULES
16
+ from tff.core.utils.paths import model_path_relative
17
+ from tff.core.model import ModelRepresentation
18
+ from tff.dbt.manifest import load_dbt_models
19
+
20
+ logger = logging.getLogger(__name__)
21
+
22
+ CHECK_COLLECTORS = {
23
+ "layer_integrity": lambda models, cfg: collect_layer_integrity_findings(models, cfg),
24
+ "custom_exclusions": lambda models, cfg: collect_custom_exclusion_findings(models, cfg),
25
+ "schema_contracts": lambda _models, cfg: collect_schema_contract_findings(cfg),
26
+ "dependency_graph": lambda models, cfg: collect_dependency_graph_findings(models, cfg),
27
+ }
28
+
29
+
30
+ def collect_dbt_rules_findings(models: dict[str, ModelRepresentation]) -> list[LintFinding]:
31
+ findings = []
32
+ rules = [rule_cls() for rule_cls in ALL_RULES]
33
+
34
+ for model in models.values():
35
+ if model.is_external or model.is_symbolic:
36
+ continue
37
+
38
+ for rule in rules:
39
+ violation = rule.check_model(model)
40
+ if violation:
41
+ msgs = violation.violation_msg
42
+ if isinstance(msgs, str):
43
+ msgs = [msgs]
44
+ for msg in msgs:
45
+ # Strip model name prefix from message if the rule prepended it
46
+ model_label = f"{model.name}: "
47
+ clean_msg = msg.removeprefix(model_label)
48
+
49
+ findings.append(
50
+ LintFinding(
51
+ check=rule.name,
52
+ severity="error",
53
+ model=model.name,
54
+ path=model_path_relative(model),
55
+ message=clean_msg,
56
+ )
57
+ )
58
+ return findings
59
+
60
+
61
+ def _check_enabled(config: FitnessFunctionsConfig, check_name: str) -> bool:
62
+ check = getattr(config.checks, check_name, None)
63
+ return bool(getattr(check, "enabled", False))
64
+
65
+
66
+ def run_all_checks(
67
+ project_root: Path | None = None,
68
+ config: FitnessFunctionsConfig | None = None,
69
+ checks: list[str] | None = None,
70
+ dialect: str = "bigquery",
71
+ ) -> tuple[list[LintFinding], int, list[str]]:
72
+ project_root = project_root or Path.cwd()
73
+ if config is None:
74
+ config = load_fitness_config(project_root)
75
+ set_ff_config(config)
76
+
77
+ # Parse and load manifest.json
78
+ models = load_dbt_models(project_root, dialect=dialect)
79
+
80
+ if checks is None:
81
+ selected = ["rules"] + [
82
+ name
83
+ for name in CHECK_COLLECTORS
84
+ if _check_enabled(config, name)
85
+ ]
86
+ else:
87
+ selected = checks
88
+
89
+ findings: list[LintFinding] = []
90
+
91
+ if "rules" in selected:
92
+ findings.extend(collect_dbt_rules_findings(models))
93
+
94
+ for check_name, collector in CHECK_COLLECTORS.items():
95
+ if check_name not in selected:
96
+ continue
97
+ findings.extend(collector(models, config))
98
+
99
+ models_checked = sum(
100
+ 1 for m in models.values() if not m.is_external and not m.is_symbolic
101
+ )
102
+
103
+ return findings, models_checked, selected
@@ -0,0 +1,176 @@
1
+ import json
2
+ from pathlib import Path
3
+
4
+ from tff.dbt.manifest import load_dbt_models
5
+ from tff.dbt.runner import run_all_checks
6
+ from tff.core.config import FitnessFunctionsConfig
7
+
8
+
9
+ def test_load_dbt_models(tmp_path: Path):
10
+ target_dir = tmp_path / "target"
11
+ target_dir.mkdir(parents=True, exist_ok=True)
12
+ manifest_file = target_dir / "manifest.json"
13
+
14
+ # Mock a simple manifest.json structure
15
+ manifest_data = {
16
+ "nodes": {
17
+ "model.my_project.stg_users": {
18
+ "resource_type": "model",
19
+ "name": "stg_users",
20
+ "original_file_path": "models/staging/stg_users.sql",
21
+ "columns": {
22
+ "id": {"data_type": "INT"},
23
+ "name": {"data_type": "VARCHAR"},
24
+ },
25
+ "config": {
26
+ "materialized": "view",
27
+ },
28
+ "meta": {
29
+ "owner": "data-team",
30
+ "grain": "user_id", # string grain
31
+ },
32
+ "description": "Staging table for users",
33
+ "depends_on": {
34
+ "nodes": ["source.my_project.raw_users"]
35
+ }
36
+ },
37
+ "model.my_project.invalid_grain": {
38
+ "resource_type": "model",
39
+ "name": "invalid_grain",
40
+ "original_file_path": "models/staging/invalid_grain.sql",
41
+ "columns": {},
42
+ "meta": {
43
+ "grain": 123, # invalid grain type (neither list nor str)
44
+ },
45
+ "depends_on": {"nodes": []}
46
+ },
47
+ "test.my_project.not_null_stg_users_id": {
48
+ "resource_type": "test",
49
+ "name": "not_null_stg_users_id",
50
+ "test_metadata": {
51
+ "name": "not_null",
52
+ "kwargs": {"column_name": "id"}
53
+ },
54
+ "depends_on": {
55
+ "nodes": ["model.my_project.stg_users"]
56
+ }
57
+ },
58
+ "test.my_project.no_name_test": {
59
+ "resource_type": "test",
60
+ "name": "no_name_test",
61
+ "test_metadata": {}, # missing name
62
+ "depends_on": {
63
+ "nodes": ["model.my_project.stg_users"]
64
+ }
65
+ }
66
+ },
67
+ "sources": {
68
+ "source.my_project.raw_users": {
69
+ "resource_type": "source",
70
+ "name": "raw_users",
71
+ "original_file_path": "models/sources/raw_users.yml",
72
+ "description": "Raw users source table",
73
+ "meta": {"owner": "ingest-team"}
74
+ }
75
+ }
76
+ }
77
+ manifest_file.write_text(json.dumps(manifest_data), encoding="utf-8")
78
+
79
+ models = load_dbt_models(tmp_path)
80
+ assert "model.my_project.stg_users" in models
81
+ assert "source.my_project.raw_users" in models
82
+
83
+ user_model = models["model.my_project.stg_users"]
84
+ assert user_model.name == "stg_users"
85
+ assert user_model.columns_to_types == {"id": "int", "name": "varchar"}
86
+ assert user_model.owner == "data-team"
87
+ assert user_model.description == "Staging table for users"
88
+ assert user_model.depends_on == {"source.my_project.raw_users"}
89
+ assert user_model.audits == [("not_null", {"column_name": "id"})]
90
+ assert user_model.grains == ["user_id"]
91
+
92
+ invalid_grain_model = models["model.my_project.invalid_grain"]
93
+ assert invalid_grain_model.grains == []
94
+
95
+ source_node = models["source.my_project.raw_users"]
96
+ assert source_node.name == "raw_users"
97
+ assert source_node.is_external is True
98
+ assert source_node.owner == "ingest-team"
99
+
100
+
101
+ def test_load_dbt_models_missing_manifest():
102
+ import pytest
103
+ with pytest.raises(FileNotFoundError):
104
+ load_dbt_models(Path("/non_existent_path"))
105
+
106
+
107
+ def test_run_all_checks(tmp_path: Path):
108
+ target_dir = tmp_path / "target"
109
+ target_dir.mkdir(parents=True, exist_ok=True)
110
+ manifest_file = target_dir / "manifest.json"
111
+
112
+ # Mock manifest with standard model, symbolic model, and external source
113
+ manifest_data = {
114
+ "nodes": {
115
+ "model.my_project.stg_users": {
116
+ "resource_type": "model",
117
+ "name": "stg_users",
118
+ "original_file_path": "models/staging/stg_users.sql",
119
+ "columns": {
120
+ "id": {"data_type": "INT"},
121
+ },
122
+ "config": {},
123
+ "meta": {"owner": "data-team"},
124
+ "depends_on": {"nodes": []}
125
+ },
126
+ "model.my_project.symbolic_model": {
127
+ "resource_type": "model",
128
+ "name": "symbolic_model",
129
+ "original_file_path": "models/staging/symbolic.sql",
130
+ "columns": {},
131
+ "config": {"materialized": "ephemeral"}, # symbolic
132
+ "meta": {},
133
+ "depends_on": {"nodes": []}
134
+ }
135
+ },
136
+ "sources": {}
137
+ }
138
+ manifest_file.write_text(json.dumps(manifest_data), encoding="utf-8")
139
+
140
+ # Mock SQL files
141
+ sql_file = tmp_path / "models/staging/stg_users.sql"
142
+ sql_file.parent.mkdir(parents=True, exist_ok=True)
143
+ sql_file.write_text("SELECT id FROM raw", encoding="utf-8")
144
+
145
+ # 1. Test passing config explicitly
146
+ config = FitnessFunctionsConfig()
147
+ config.rules.metadata.enabled = True
148
+ config.rules.metadata.owner = True
149
+ config.rules.metadata.description = True # will violate
150
+
151
+ findings, models_checked, selected = run_all_checks(
152
+ project_root=tmp_path,
153
+ config=config,
154
+ )
155
+ assert models_checked == 1 # symbolic is skipped
156
+ assert len(findings) > 0
157
+ assert any("description" in f.check for f in findings)
158
+
159
+ # 2. Test running with config=None (auto-discovers config file)
160
+ yaml_file = tmp_path / "fitness_functions.yaml"
161
+ yaml_file.write_text("rules:\n metadata:\n enabled: true\n description: true\n", encoding="utf-8")
162
+ findings_auto, _, _ = run_all_checks(
163
+ project_root=tmp_path,
164
+ config=None,
165
+ )
166
+ assert len(findings_auto) > 0
167
+
168
+ # 3. Test specifying checks list explicitly
169
+ findings_subset, _, selected_subset = run_all_checks(
170
+ project_root=tmp_path,
171
+ config=config,
172
+ checks=["rules"],
173
+ )
174
+ assert selected_subset == ["rules"]
175
+ assert len(findings_subset) > 0
176
+