vgi-lint-check 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. vgi_lint_check-0.1.0/LICENSE +134 -0
  2. vgi_lint_check-0.1.0/PKG-INFO +320 -0
  3. vgi_lint_check-0.1.0/README.md +153 -0
  4. vgi_lint_check-0.1.0/pyproject.toml +70 -0
  5. vgi_lint_check-0.1.0/src/vgi_lint_check/__init__.py +27 -0
  6. vgi_lint_check-0.1.0/src/vgi_lint_check/__main__.py +4 -0
  7. vgi_lint_check-0.1.0/src/vgi_lint_check/baseline.py +76 -0
  8. vgi_lint_check-0.1.0/src/vgi_lint_check/cli.py +267 -0
  9. vgi_lint_check-0.1.0/src/vgi_lint_check/comparison.py +90 -0
  10. vgi_lint_check-0.1.0/src/vgi_lint_check/config.py +166 -0
  11. vgi_lint_check-0.1.0/src/vgi_lint_check/connection.py +145 -0
  12. vgi_lint_check-0.1.0/src/vgi_lint_check/core.py +103 -0
  13. vgi_lint_check-0.1.0/src/vgi_lint_check/diff.py +68 -0
  14. vgi_lint_check-0.1.0/src/vgi_lint_check/exit_codes.py +18 -0
  15. vgi_lint_check-0.1.0/src/vgi_lint_check/findings.py +72 -0
  16. vgi_lint_check-0.1.0/src/vgi_lint_check/loader.py +209 -0
  17. vgi_lint_check-0.1.0/src/vgi_lint_check/model.py +254 -0
  18. vgi_lint_check-0.1.0/src/vgi_lint_check/py.typed +0 -0
  19. vgi_lint_check-0.1.0/src/vgi_lint_check/reporting/__init__.py +32 -0
  20. vgi_lint_check-0.1.0/src/vgi_lint_check/reporting/agent_reporter.py +69 -0
  21. vgi_lint_check-0.1.0/src/vgi_lint_check/reporting/json_reporter.py +112 -0
  22. vgi_lint_check-0.1.0/src/vgi_lint_check/reporting/terminal.py +136 -0
  23. vgi_lint_check-0.1.0/src/vgi_lint_check/result.py +66 -0
  24. vgi_lint_check-0.1.0/src/vgi_lint_check/rules/__init__.py +32 -0
  25. vgi_lint_check-0.1.0/src/vgi_lint_check/rules/_util.py +20 -0
  26. vgi_lint_check-0.1.0/src/vgi_lint_check/rules/base.py +46 -0
  27. vgi_lint_check-0.1.0/src/vgi_lint_check/rules/columns.py +74 -0
  28. vgi_lint_check-0.1.0/src/vgi_lint_check/rules/descriptions.py +132 -0
  29. vgi_lint_check-0.1.0/src/vgi_lint_check/rules/engine.py +22 -0
  30. vgi_lint_check-0.1.0/src/vgi_lint_check/rules/examples.py +130 -0
  31. vgi_lint_check-0.1.0/src/vgi_lint_check/rules/execution.py +92 -0
  32. vgi_lint_check-0.1.0/src/vgi_lint_check/rules/functions.py +72 -0
  33. vgi_lint_check-0.1.0/src/vgi_lint_check/rules/pragmas.py +50 -0
  34. vgi_lint_check-0.1.0/src/vgi_lint_check/rules/registry.py +44 -0
  35. vgi_lint_check-0.1.0/src/vgi_lint_check/rules/settings.py +57 -0
  36. vgi_lint_check-0.1.0/src/vgi_lint_check/rules/tags.py +88 -0
  37. vgi_lint_check-0.1.0/src/vgi_lint_check/scoring.py +108 -0
  38. vgi_lint_check-0.1.0/src/vgi_lint_check/snapshot.py +42 -0
  39. vgi_lint_check-0.1.0/src/vgi_lint_check/tags.py +74 -0
  40. vgi_lint_check-0.1.0/src/vgi_lint_check/versions.py +113 -0
@@ -0,0 +1,134 @@
1
+ Query Farm Source-Available License, Version 1.0
2
+
3
+ Copyright (c) 2025, 2026 Query Farm LLC. All rights reserved.
4
+
5
+ ## 1. Definitions
6
+
7
+ "Licensor" means Query Farm LLC (http://query.farm, hello@query.farm) and its
8
+ affiliates under common control.
9
+
10
+ "VGI" means the Vector Gateway Interface, the DuckDB extension technology developed
11
+ by the Licensor, also referred to by the Licensor as its "Hyperfederation" database
12
+ technology.
13
+
14
+ "Licensed Work" means VGI, including its source code, object code, and any
15
+ documentation distributed with it, in each version made available by the Licensor
16
+ under this License.
17
+
18
+ "You" (or "Your") means the individual or legal entity exercising rights under this
19
+ License, together with all affiliates under common control with that entity.
20
+
21
+ "Production Use" means any use of the Licensed Work other than for development,
22
+ testing, evaluation, experimentation, or other non-production purposes.
23
+
24
+ "Hyperfederation Services" means services relating to the federation, gateway,
25
+ integration, querying, or interoperation of data sources using VGI or
26
+ functionally equivalent technology, including services that expose, broker, or
27
+ provide access to such federated or gateway capabilities.
28
+
29
+ "Commercial Marketplace" means any platform, exchange, or intermediary service,
30
+ whether or not operated for a fee, that connects providers and consumers of
31
+ Hyperfederation Services, or that facilitates the offering, discovery, exchange,
32
+ sale, or licensing of Hyperfederation Services among third parties.
33
+
34
+ "Competing Offering" means a product or service that You make available to third
35
+ parties, on a paid basis (including through paid support, subscription, or hosting
36
+ arrangements), whose capabilities significantly overlap with those of the Licensor's
37
+ version(s) of the Licensed Work.
38
+
39
+ ## 2. Grant of Rights
40
+
41
+ Subject to the terms and limitations of this License, the Licensor grants You a
42
+ worldwide, royalty-free, non-exclusive license to:
43
+
44
+ (a) use, copy, and run the Licensed Work for any non-production purpose;
45
+
46
+ (b) modify the Licensed Work and create derivative works of it;
47
+
48
+ (c) redistribute the Licensed Work and Your derivative works, provided You comply
49
+ with Section 5; and
50
+
51
+ (d) make Production Use of the Licensed Work, except where such use is restricted by
52
+ Section 3 or reserved to the Licensor by Section 4.
53
+
54
+ ## 3. Production Use Conditions
55
+
56
+ The grant of Production Use in Section 2(d) does not extend to, and You may not
57
+ without a separate commercial license from the Licensor:
58
+
59
+ (a) provide a Competing Offering to third parties; or
60
+
61
+ (b) offer the Licensed Work, or any derivative work of it, to third parties on a
62
+ hosted, embedded, or as-a-service basis where doing so competes with the Licensor's
63
+ commercial interests in the Licensed Work.
64
+
65
+ "Embedded" includes incorporating the source or object code of the Licensed Work
66
+ into a Competing Offering, and packaging a Competing Offering such that the Licensed
67
+ Work must be accessed or downloaded for that offering to function.
68
+
69
+ Hosting or using the Licensed Work for Your own internal purposes is not a Competing
70
+ Offering and is permitted, including across Your affiliates under common control.
71
+
72
+ ## 4. Reserved Rights
73
+
74
+ Notwithstanding any other provision of this License, the Licensor reserves to itself
75
+ the exclusive right to build, operate, offer, or authorize a Commercial Marketplace
76
+ that incorporates, integrates, is built upon, or otherwise uses the Licensed Work.
77
+
78
+ This License grants You no right to construct, operate, or enable a Commercial
79
+ Marketplace using the Licensed Work, whether on a commercial or non-commercial basis,
80
+ and any such use requires a separate written agreement with the Licensor.
81
+
82
+ ## 5. Redistribution
83
+
84
+ If You redistribute the Licensed Work or any derivative work of it, in original or
85
+ modified form, You must:
86
+
87
+ (a) include a complete, unmodified copy of this License with each copy; and
88
+
89
+ (b) cause any recipient to receive the Licensed Work subject to the terms of this
90
+ License.
91
+
92
+ The conditions in Sections 3 and 4 apply to every recipient of the Licensed Work,
93
+ whether received directly from the Licensor or through a third party.
94
+
95
+ ## 6. Conversion to Open Source
96
+
97
+ For each version of the Licensed Work, on the tenth anniversary of the date the
98
+ Licensor first made that version publicly available (the "Change Date" for that
99
+ version), the Licensor additionally grants You the right to use that version under
100
+ the terms of the Apache License, Version 2.0, and on and after that version's Change
101
+ Date the restrictions in Sections 3 and 4 no longer apply to that version.
102
+
103
+ This License applies separately to each version of the Licensed Work, and the Change
104
+ Date may differ between versions.
105
+
106
+ ## 7. Commercial Licensing
107
+
108
+ If Your intended use is not permitted under this License, You may obtain a separate
109
+ commercial license from the Licensor by contacting hello@query.farm. Absent such a
110
+ license, You must refrain from the restricted use.
111
+
112
+ ## 8. Trademarks
113
+
114
+ This License does not grant You any right to use the names, trademarks, service
115
+ marks, or logos of the Licensor, including "Vector Gateway Interface," "VGI," and
116
+ "Hyperfederation," except as required for reasonable and customary use in describing
117
+ the origin of the Licensed Work.
118
+
119
+ ## 9. Termination
120
+
121
+ Any use of the Licensed Work in violation of this License automatically terminates
122
+ Your rights under this License for the current and all other versions of the Licensed
123
+ Work. Your rights may be reinstated only by a writing signed by the Licensor.
124
+
125
+ ## 10. Disclaimer of Warranty and Limitation of Liability
126
+
127
+ TO THE MAXIMUM EXTENT PERMITTED BY APPLICABLE LAW, THE LICENSED WORK IS PROVIDED ON
128
+ AN "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, EXPRESS OR IMPLIED,
129
+ INCLUDING WITHOUT LIMITATION ANY WARRANTIES OR CONDITIONS OF MERCHANTABILITY, FITNESS
130
+ FOR A PARTICULAR PURPOSE, NON-INFRINGEMENT, OR TITLE.
131
+
132
+ TO THE MAXIMUM EXTENT PERMITTED BY APPLICABLE LAW, IN NO EVENT WILL THE LICENSOR BE
133
+ LIABLE TO YOU FOR ANY DAMAGES ARISING OUT OF OR RELATING TO THIS LICENSE OR THE USE
134
+ OF THE LICENSED WORK, WHETHER IN CONTRACT, TORT, OR OTHERWISE.
@@ -0,0 +1,320 @@
1
+ Metadata-Version: 2.3
2
+ Name: vgi-lint-check
3
+ Version: 0.1.0
4
+ Summary: A pydoclint-style metadata-quality linter for VGI workers.
5
+ Keywords: vgi,duckdb,haybarn,linter,metadata,data-quality,documentation,catalog,data-engineering,cli
6
+ Author: Rusty Conover
7
+ Author-email: Rusty Conover <rusty@query.farm>
8
+ License: Query Farm Source-Available License, Version 1.0
9
+
10
+ Copyright (c) 2025, 2026 Query Farm LLC. All rights reserved.
11
+
12
+ ## 1. Definitions
13
+
14
+ "Licensor" means Query Farm LLC (http://query.farm, hello@query.farm) and its
15
+ affiliates under common control.
16
+
17
+ "VGI" means the Vector Gateway Interface, the DuckDB extension technology developed
18
+ by the Licensor, also referred to by the Licensor as its "Hyperfederation" database
19
+ technology.
20
+
21
+ "Licensed Work" means VGI, including its source code, object code, and any
22
+ documentation distributed with it, in each version made available by the Licensor
23
+ under this License.
24
+
25
+ "You" (or "Your") means the individual or legal entity exercising rights under this
26
+ License, together with all affiliates under common control with that entity.
27
+
28
+ "Production Use" means any use of the Licensed Work other than for development,
29
+ testing, evaluation, experimentation, or other non-production purposes.
30
+
31
+ "Hyperfederation Services" means services relating to the federation, gateway,
32
+ integration, querying, or interoperation of data sources using VGI or
33
+ functionally equivalent technology, including services that expose, broker, or
34
+ provide access to such federated or gateway capabilities.
35
+
36
+ "Commercial Marketplace" means any platform, exchange, or intermediary service,
37
+ whether or not operated for a fee, that connects providers and consumers of
38
+ Hyperfederation Services, or that facilitates the offering, discovery, exchange,
39
+ sale, or licensing of Hyperfederation Services among third parties.
40
+
41
+ "Competing Offering" means a product or service that You make available to third
42
+ parties, on a paid basis (including through paid support, subscription, or hosting
43
+ arrangements), whose capabilities significantly overlap with those of the Licensor's
44
+ version(s) of the Licensed Work.
45
+
46
+ ## 2. Grant of Rights
47
+
48
+ Subject to the terms and limitations of this License, the Licensor grants You a
49
+ worldwide, royalty-free, non-exclusive license to:
50
+
51
+ (a) use, copy, and run the Licensed Work for any non-production purpose;
52
+
53
+ (b) modify the Licensed Work and create derivative works of it;
54
+
55
+ (c) redistribute the Licensed Work and Your derivative works, provided You comply
56
+ with Section 5; and
57
+
58
+ (d) make Production Use of the Licensed Work, except where such use is restricted by
59
+ Section 3 or reserved to the Licensor by Section 4.
60
+
61
+ ## 3. Production Use Conditions
62
+
63
+ The grant of Production Use in Section 2(d) does not extend to, and You may not
64
+ without a separate commercial license from the Licensor:
65
+
66
+ (a) provide a Competing Offering to third parties; or
67
+
68
+ (b) offer the Licensed Work, or any derivative work of it, to third parties on a
69
+ hosted, embedded, or as-a-service basis where doing so competes with the Licensor's
70
+ commercial interests in the Licensed Work.
71
+
72
+ "Embedded" includes incorporating the source or object code of the Licensed Work
73
+ into a Competing Offering, and packaging a Competing Offering such that the Licensed
74
+ Work must be accessed or downloaded for that offering to function.
75
+
76
+ Hosting or using the Licensed Work for Your own internal purposes is not a Competing
77
+ Offering and is permitted, including across Your affiliates under common control.
78
+
79
+ ## 4. Reserved Rights
80
+
81
+ Notwithstanding any other provision of this License, the Licensor reserves to itself
82
+ the exclusive right to build, operate, offer, or authorize a Commercial Marketplace
83
+ that incorporates, integrates, is built upon, or otherwise uses the Licensed Work.
84
+
85
+ This License grants You no right to construct, operate, or enable a Commercial
86
+ Marketplace using the Licensed Work, whether on a commercial or non-commercial basis,
87
+ and any such use requires a separate written agreement with the Licensor.
88
+
89
+ ## 5. Redistribution
90
+
91
+ If You redistribute the Licensed Work or any derivative work of it, in original or
92
+ modified form, You must:
93
+
94
+ (a) include a complete, unmodified copy of this License with each copy; and
95
+
96
+ (b) cause any recipient to receive the Licensed Work subject to the terms of this
97
+ License.
98
+
99
+ The conditions in Sections 3 and 4 apply to every recipient of the Licensed Work,
100
+ whether received directly from the Licensor or through a third party.
101
+
102
+ ## 6. Conversion to Open Source
103
+
104
+ For each version of the Licensed Work, on the tenth anniversary of the date the
105
+ Licensor first made that version publicly available (the "Change Date" for that
106
+ version), the Licensor additionally grants You the right to use that version under
107
+ the terms of the Apache License, Version 2.0, and on and after that version's Change
108
+ Date the restrictions in Sections 3 and 4 no longer apply to that version.
109
+
110
+ This License applies separately to each version of the Licensed Work, and the Change
111
+ Date may differ between versions.
112
+
113
+ ## 7. Commercial Licensing
114
+
115
+ If Your intended use is not permitted under this License, You may obtain a separate
116
+ commercial license from the Licensor by contacting hello@query.farm. Absent such a
117
+ license, You must refrain from the restricted use.
118
+
119
+ ## 8. Trademarks
120
+
121
+ This License does not grant You any right to use the names, trademarks, service
122
+ marks, or logos of the Licensor, including "Vector Gateway Interface," "VGI," and
123
+ "Hyperfederation," except as required for reasonable and customary use in describing
124
+ the origin of the Licensed Work.
125
+
126
+ ## 9. Termination
127
+
128
+ Any use of the Licensed Work in violation of this License automatically terminates
129
+ Your rights under this License for the current and all other versions of the Licensed
130
+ Work. Your rights may be reinstated only by a writing signed by the Licensor.
131
+
132
+ ## 10. Disclaimer of Warranty and Limitation of Liability
133
+
134
+ TO THE MAXIMUM EXTENT PERMITTED BY APPLICABLE LAW, THE LICENSED WORK IS PROVIDED ON
135
+ AN "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, EXPRESS OR IMPLIED,
136
+ INCLUDING WITHOUT LIMITATION ANY WARRANTIES OR CONDITIONS OF MERCHANTABILITY, FITNESS
137
+ FOR A PARTICULAR PURPOSE, NON-INFRINGEMENT, OR TITLE.
138
+
139
+ TO THE MAXIMUM EXTENT PERMITTED BY APPLICABLE LAW, IN NO EVENT WILL THE LICENSOR BE
140
+ LIABLE TO YOU FOR ANY DAMAGES ARISING OUT OF OR RELATING TO THIS LICENSE OR THE USE
141
+ OF THE LICENSED WORK, WHETHER IN CONTRACT, TORT, OR OTHERWISE.
142
+ Classifier: License :: Other/Proprietary License
143
+ Classifier: Development Status :: 4 - Beta
144
+ Classifier: Environment :: Console
145
+ Classifier: Intended Audience :: Developers
146
+ Classifier: Programming Language :: Python :: 3
147
+ Classifier: Programming Language :: Python :: 3 :: Only
148
+ Classifier: Programming Language :: Python :: 3.11
149
+ Classifier: Programming Language :: Python :: 3.12
150
+ Classifier: Programming Language :: Python :: 3.13
151
+ Classifier: Topic :: Database
152
+ Classifier: Topic :: Software Development :: Quality Assurance
153
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
154
+ Classifier: Operating System :: OS Independent
155
+ Classifier: Typing :: Typed
156
+ Requires-Dist: haybarn>=1.5.4rc1
157
+ Requires-Dist: click>=8.1
158
+ Requires-Dist: rich>=13
159
+ Requires-Dist: pytz>=2024.1
160
+ Maintainer: Query Farm LLC
161
+ Maintainer-email: Query Farm LLC <hello@query.farm>
162
+ Requires-Python: >=3.11
163
+ Project-URL: Homepage, https://query.farm
164
+ Project-URL: Repository, https://github.com/Query-farm/vgi-lint-check
165
+ Project-URL: Issues, https://github.com/Query-farm/vgi-lint-check/issues
166
+ Description-Content-Type: text/markdown
167
+
168
+ # vgi-lint
169
+
170
+ A `pydoclint`-style **metadata-quality linter for VGI workers**. It attaches to an
171
+ arbitrary VGI worker, reads everything the worker contributes through DuckDB
172
+ system tables, and reports quality findings — missing descriptions, undocumented
173
+ columns/functions, absent or malformed example queries, untagged objects, and
174
+ more — with a quality score, per-data-version baselines, and machine output for
175
+ coding agents.
176
+
177
+ It works with **any** VGI worker regardless of implementation language (Python,
178
+ Go, Rust, Java, TypeScript, …): it treats the worker as a black box and inspects
179
+ only what surfaces post-attach.
180
+
181
+ ## Install / run
182
+
183
+ ```bash
184
+ uv sync # haybarn is RC-only; prerelease = "allow" is set
185
+ uv run vgi-lint --help
186
+ ```
187
+
188
+ ## Quick start
189
+
190
+ ```bash
191
+ # Lint a local subprocess worker
192
+ uv run vgi-lint 'uv run volcano_worker.py'
193
+
194
+ # Lint a no-auth HTTP worker
195
+ uv run vgi-lint http://localhost:9009
196
+
197
+ # Machine output for a coding agent / CI
198
+ uv run vgi-lint http://localhost:9009 --format agent
199
+ uv run vgi-lint http://localhost:9009 --format json
200
+ ```
201
+
202
+ In a worker's own repo, add a `[tool.vgi-lint-check]` block (see `vgi-lint init`)
203
+ with a `location`, then just run `vgi-lint` with no arguments.
204
+
205
+ > v1 supports **local subprocess** and **no-auth HTTP** workers. Authenticated
206
+ > (OAuth) workers are not yet supported.
207
+
208
+ ## What it checks
209
+
210
+ Object coverage: schemas, tables, views, columns, scalar functions, macros,
211
+ settings, and pragmas. Rule families:
212
+
213
+ | Family | Codes | Examples |
214
+ | --- | --- | --- |
215
+ | Descriptions | VGI1xx | schema/table/view comment, `vgi.description_llm`, `vgi.description_md` |
216
+ | Columns | VGI2xx | column-comment coverage, comment-not-echo |
217
+ | Functions | VGI3xx | function/macro description, parameter documentation, macro examples |
218
+ | Tags | VGI4xx | required tag keys, reserved-tag validity |
219
+ | Examples | VGI5xx | `vgi.example_queries` present, valid JSON, complete entries, **catalog-qualified** |
220
+ | Settings | VGI6xx | setting descriptions |
221
+ | Pragmas | VGI7xx | pragma descriptions |
222
+ | Execution | VGI9xx | example queries bind/execute (opt-in, `--execute`) |
223
+
224
+ Run `vgi-lint rules` to list every rule, or `vgi-lint explain VGI112` for one.
225
+
226
+ ## Data versions
227
+
228
+ A VGI worker can publish multiple data versions whose metadata differs. The tool
229
+ can lint one or all of them and compare quality across versions:
230
+
231
+ ```bash
232
+ uv run vgi-lint versions <location> # list published versions
233
+ uv run vgi-lint <location> --data-version 2.0.0
234
+ uv run vgi-lint <location> --all-data-versions # per-version report + comparison
235
+ ```
236
+
237
+ ## Baselines (grandfathering)
238
+
239
+ Adopt the linter on an existing worker without a wall of failures: record current
240
+ findings as a baseline, then fail CI only on **new** findings. Baselines are
241
+ per data version (`<prefix>.<version>.json`).
242
+
243
+ ```bash
244
+ uv run vgi-lint <location> --baseline vgi-lint-baseline --update-baseline
245
+ uv run vgi-lint <location> --baseline vgi-lint-baseline --fail-on warning
246
+ ```
247
+
248
+ ## Configuration
249
+
250
+ `[tool.vgi-lint-check]` in `pyproject.toml` (or a dedicated `vgi-lint.toml`):
251
+
252
+ ```toml
253
+ [tool.vgi-lint-check]
254
+ location = "uv run worker.py"
255
+ select = ["ALL"]
256
+ ignore = ["VGI113"]
257
+ fail_on = "error"
258
+
259
+ [tool.vgi-lint-check.severity]
260
+ VGI201 = "error"
261
+
262
+ [tool.vgi-lint-check.options]
263
+ required_schema_tags = ["provider", "domain"]
264
+ column_comment_min_ratio = 0.8
265
+
266
+ [tool.vgi-lint-check.per-object]
267
+ "volcanos.hans.*" = { ignore = ["VGI112"] }
268
+ ```
269
+
270
+ Precedence: defaults < `pyproject.toml` < `vgi-lint.toml` < CLI flags.
271
+
272
+ ## Exit codes
273
+
274
+ `0` clean (or below `--fail-on`) · `1` config/tool error · `2` findings ≥
275
+ `--fail-on` (regressions only when a baseline is set) · `3` connection error.
276
+
277
+ ## Development
278
+
279
+ ```bash
280
+ uv run pytest # unit tests (offline)
281
+ uv run pytest --run-live # also run live tests against real workers
282
+ uv build # build sdist + wheel into dist/
283
+ ```
284
+
285
+ ## Releasing (GitHub Actions → PyPI)
286
+
287
+ Publishing is automated via GitHub Actions using **PyPI Trusted Publishing**
288
+ (OIDC — no API token secret to store):
289
+
290
+ - `.github/workflows/ci.yml` runs the offline test suite (Python 3.11–3.13) and
291
+ a smoke build on every push/PR.
292
+ - `.github/workflows/publish.yml` builds, validates (`twine check`), and uploads
293
+ to PyPI when a **GitHub Release is published**. It first checks that the
294
+ release tag matches the `version` in `pyproject.toml`.
295
+
296
+ One-time setup on PyPI (Trusted Publisher), under the project's *Publishing*
297
+ settings (use a "pending publisher" before the first release):
298
+
299
+ | Field | Value |
300
+ | --- | --- |
301
+ | Owner | `Query-farm` |
302
+ | Repository | `vgi-lint-check` |
303
+ | Workflow | `publish.yml` |
304
+ | Environment | `pypi` |
305
+
306
+ Also create a GitHub Environment named `pypi` in the repo settings (it gates the
307
+ publish job and is referenced for the OIDC claim).
308
+
309
+ To cut a release:
310
+
311
+ ```bash
312
+ # bump version in pyproject.toml, commit, then tag + create the release
313
+ git tag v0.1.0 && git push origin v0.1.0
314
+ gh release create v0.1.0 --generate-notes
315
+ ```
316
+
317
+ The release publishing event triggers the workflow. (Prefer a token instead of
318
+ OIDC? Replace the `publish` job's trusted-publishing step with
319
+ `pypa/gh-action-pypi-publish` configured with `password: ${{ secrets.PYPI_API_TOKEN }}`
320
+ and add that repository secret.)
@@ -0,0 +1,153 @@
1
+ # vgi-lint
2
+
3
+ A `pydoclint`-style **metadata-quality linter for VGI workers**. It attaches to an
4
+ arbitrary VGI worker, reads everything the worker contributes through DuckDB
5
+ system tables, and reports quality findings — missing descriptions, undocumented
6
+ columns/functions, absent or malformed example queries, untagged objects, and
7
+ more — with a quality score, per-data-version baselines, and machine output for
8
+ coding agents.
9
+
10
+ It works with **any** VGI worker regardless of implementation language (Python,
11
+ Go, Rust, Java, TypeScript, …): it treats the worker as a black box and inspects
12
+ only what surfaces post-attach.
13
+
14
+ ## Install / run
15
+
16
+ ```bash
17
+ uv sync # haybarn is RC-only; prerelease = "allow" is set
18
+ uv run vgi-lint --help
19
+ ```
20
+
21
+ ## Quick start
22
+
23
+ ```bash
24
+ # Lint a local subprocess worker
25
+ uv run vgi-lint 'uv run volcano_worker.py'
26
+
27
+ # Lint a no-auth HTTP worker
28
+ uv run vgi-lint http://localhost:9009
29
+
30
+ # Machine output for a coding agent / CI
31
+ uv run vgi-lint http://localhost:9009 --format agent
32
+ uv run vgi-lint http://localhost:9009 --format json
33
+ ```
34
+
35
+ In a worker's own repo, add a `[tool.vgi-lint-check]` block (see `vgi-lint init`)
36
+ with a `location`, then just run `vgi-lint` with no arguments.
37
+
38
+ > v1 supports **local subprocess** and **no-auth HTTP** workers. Authenticated
39
+ > (OAuth) workers are not yet supported.
40
+
41
+ ## What it checks
42
+
43
+ Object coverage: schemas, tables, views, columns, scalar functions, macros,
44
+ settings, and pragmas. Rule families:
45
+
46
+ | Family | Codes | Examples |
47
+ | --- | --- | --- |
48
+ | Descriptions | VGI1xx | schema/table/view comment, `vgi.description_llm`, `vgi.description_md` |
49
+ | Columns | VGI2xx | column-comment coverage, comment-not-echo |
50
+ | Functions | VGI3xx | function/macro description, parameter documentation, macro examples |
51
+ | Tags | VGI4xx | required tag keys, reserved-tag validity |
52
+ | Examples | VGI5xx | `vgi.example_queries` present, valid JSON, complete entries, **catalog-qualified** |
53
+ | Settings | VGI6xx | setting descriptions |
54
+ | Pragmas | VGI7xx | pragma descriptions |
55
+ | Execution | VGI9xx | example queries bind/execute (opt-in, `--execute`) |
56
+
57
+ Run `vgi-lint rules` to list every rule, or `vgi-lint explain VGI112` for one.
58
+
59
+ ## Data versions
60
+
61
+ A VGI worker can publish multiple data versions whose metadata differs. The tool
62
+ can lint one or all of them and compare quality across versions:
63
+
64
+ ```bash
65
+ uv run vgi-lint versions <location> # list published versions
66
+ uv run vgi-lint <location> --data-version 2.0.0
67
+ uv run vgi-lint <location> --all-data-versions # per-version report + comparison
68
+ ```
69
+
70
+ ## Baselines (grandfathering)
71
+
72
+ Adopt the linter on an existing worker without a wall of failures: record current
73
+ findings as a baseline, then fail CI only on **new** findings. Baselines are
74
+ per data version (`<prefix>.<version>.json`).
75
+
76
+ ```bash
77
+ uv run vgi-lint <location> --baseline vgi-lint-baseline --update-baseline
78
+ uv run vgi-lint <location> --baseline vgi-lint-baseline --fail-on warning
79
+ ```
80
+
81
+ ## Configuration
82
+
83
+ `[tool.vgi-lint-check]` in `pyproject.toml` (or a dedicated `vgi-lint.toml`):
84
+
85
+ ```toml
86
+ [tool.vgi-lint-check]
87
+ location = "uv run worker.py"
88
+ select = ["ALL"]
89
+ ignore = ["VGI113"]
90
+ fail_on = "error"
91
+
92
+ [tool.vgi-lint-check.severity]
93
+ VGI201 = "error"
94
+
95
+ [tool.vgi-lint-check.options]
96
+ required_schema_tags = ["provider", "domain"]
97
+ column_comment_min_ratio = 0.8
98
+
99
+ [tool.vgi-lint-check.per-object]
100
+ "volcanos.hans.*" = { ignore = ["VGI112"] }
101
+ ```
102
+
103
+ Precedence: defaults < `pyproject.toml` < `vgi-lint.toml` < CLI flags.
104
+
105
+ ## Exit codes
106
+
107
+ `0` clean (or below `--fail-on`) · `1` config/tool error · `2` findings ≥
108
+ `--fail-on` (regressions only when a baseline is set) · `3` connection error.
109
+
110
+ ## Development
111
+
112
+ ```bash
113
+ uv run pytest # unit tests (offline)
114
+ uv run pytest --run-live # also run live tests against real workers
115
+ uv build # build sdist + wheel into dist/
116
+ ```
117
+
118
+ ## Releasing (GitHub Actions → PyPI)
119
+
120
+ Publishing is automated via GitHub Actions using **PyPI Trusted Publishing**
121
+ (OIDC — no API token secret to store):
122
+
123
+ - `.github/workflows/ci.yml` runs the offline test suite (Python 3.11–3.13) and
124
+ a smoke build on every push/PR.
125
+ - `.github/workflows/publish.yml` builds, validates (`twine check`), and uploads
126
+ to PyPI when a **GitHub Release is published**. It first checks that the
127
+ release tag matches the `version` in `pyproject.toml`.
128
+
129
+ One-time setup on PyPI (Trusted Publisher), under the project's *Publishing*
130
+ settings (use a "pending publisher" before the first release):
131
+
132
+ | Field | Value |
133
+ | --- | --- |
134
+ | Owner | `Query-farm` |
135
+ | Repository | `vgi-lint-check` |
136
+ | Workflow | `publish.yml` |
137
+ | Environment | `pypi` |
138
+
139
+ Also create a GitHub Environment named `pypi` in the repo settings (it gates the
140
+ publish job and is referenced for the OIDC claim).
141
+
142
+ To cut a release:
143
+
144
+ ```bash
145
+ # bump version in pyproject.toml, commit, then tag + create the release
146
+ git tag v0.1.0 && git push origin v0.1.0
147
+ gh release create v0.1.0 --generate-notes
148
+ ```
149
+
150
+ The release publishing event triggers the workflow. (Prefer a token instead of
151
+ OIDC? Replace the `publish` job's trusted-publishing step with
152
+ `pypa/gh-action-pypi-publish` configured with `password: ${{ secrets.PYPI_API_TOKEN }}`
153
+ and add that repository secret.)
@@ -0,0 +1,70 @@
1
+ [project]
2
+ name = "vgi-lint-check"
3
+ version = "0.1.0"
4
+ description = "A pydoclint-style metadata-quality linter for VGI workers."
5
+ readme = "README.md"
6
+ keywords = [
7
+ "vgi",
8
+ "duckdb",
9
+ "haybarn",
10
+ "linter",
11
+ "metadata",
12
+ "data-quality",
13
+ "documentation",
14
+ "catalog",
15
+ "data-engineering",
16
+ "cli",
17
+ ]
18
+ license = { file = "LICENSE" }
19
+ authors = [{ name = "Rusty Conover", email = "rusty@query.farm" }]
20
+ maintainers = [{ name = "Query Farm LLC", email = "hello@query.farm" }]
21
+ classifiers = [
22
+ "License :: Other/Proprietary License",
23
+ "Development Status :: 4 - Beta",
24
+ "Environment :: Console",
25
+ "Intended Audience :: Developers",
26
+ "Programming Language :: Python :: 3",
27
+ "Programming Language :: Python :: 3 :: Only",
28
+ "Programming Language :: Python :: 3.11",
29
+ "Programming Language :: Python :: 3.12",
30
+ "Programming Language :: Python :: 3.13",
31
+ "Topic :: Database",
32
+ "Topic :: Software Development :: Quality Assurance",
33
+ "Topic :: Software Development :: Libraries :: Python Modules",
34
+ "Operating System :: OS Independent",
35
+ "Typing :: Typed",
36
+ ]
37
+ requires-python = ">=3.11"
38
+ dependencies = [
39
+ # haybarn ships only release candidates; the >=...rc1 specifier lets PEP 440
40
+ # installers (pip/uv) select the prerelease for this dependency.
41
+ "haybarn>=1.5.4rc1",
42
+ "click>=8.1",
43
+ "rich>=13",
44
+ # Needed by DuckDB/haybarn to materialize timezone-aware timestamps that
45
+ # workers publish in vgi_catalogs() release metadata.
46
+ "pytz>=2024.1",
47
+ ]
48
+
49
+ [project.urls]
50
+ Homepage = "https://query.farm"
51
+ Repository = "https://github.com/Query-farm/vgi-lint-check"
52
+ Issues = "https://github.com/Query-farm/vgi-lint-check/issues"
53
+
54
+ [project.scripts]
55
+ vgi-lint = "vgi_lint_check.cli:app"
56
+
57
+ [dependency-groups]
58
+ dev = ["pytest>=8", "twine>=5"]
59
+
60
+ [tool.uv]
61
+ prerelease = "allow"
62
+
63
+ [tool.pytest.ini_options]
64
+ markers = [
65
+ "live: tests that attach to a running VGI worker (network/subprocess); skipped by default",
66
+ ]
67
+
68
+ [build-system]
69
+ requires = ["uv_build>=0.11.7,<0.12.0"]
70
+ build-backend = "uv_build"