two-brain-audit 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- two_brain_audit-0.1.0/.github/workflows/ci.yml +31 -0
- two_brain_audit-0.1.0/.github/workflows/publish.yml +29 -0
- two_brain_audit-0.1.0/.gitignore +39 -0
- two_brain_audit-0.1.0/LICENSE +21 -0
- two_brain_audit-0.1.0/PKG-INFO +180 -0
- two_brain_audit-0.1.0/README.md +130 -0
- two_brain_audit-0.1.0/docs/ARCHITECTURE.md +191 -0
- two_brain_audit-0.1.0/docs/QUICKSTART.md +341 -0
- two_brain_audit-0.1.0/examples/biged/__init__.py +11 -0
- two_brain_audit-0.1.0/examples/biged/dimensions.py +141 -0
- two_brain_audit-0.1.0/integrations/__init__.py +26 -0
- two_brain_audit-0.1.0/integrations/github.py +99 -0
- two_brain_audit-0.1.0/integrations/ollama.py +46 -0
- two_brain_audit-0.1.0/integrations/pypi.py +100 -0
- two_brain_audit-0.1.0/integrations/semgrep.py +82 -0
- two_brain_audit-0.1.0/presets/__init__.py +18 -0
- two_brain_audit-0.1.0/presets/api_service.py +69 -0
- two_brain_audit-0.1.0/presets/database.py +62 -0
- two_brain_audit-0.1.0/presets/infrastructure.py +69 -0
- two_brain_audit-0.1.0/presets/ml_pipeline.py +62 -0
- two_brain_audit-0.1.0/presets/python_project.py +135 -0
- two_brain_audit-0.1.0/pyproject.toml +74 -0
- two_brain_audit-0.1.0/run_demo.py +32 -0
- two_brain_audit-0.1.0/src/two_brain_audit/__init__.py +17 -0
- two_brain_audit-0.1.0/src/two_brain_audit/__main__.py +3 -0
- two_brain_audit-0.1.0/src/two_brain_audit/app.py +115 -0
- two_brain_audit-0.1.0/src/two_brain_audit/cli.py +208 -0
- two_brain_audit-0.1.0/src/two_brain_audit/dashboard/__init__.py +126 -0
- two_brain_audit-0.1.0/src/two_brain_audit/dashboard/ui.py +829 -0
- two_brain_audit-0.1.0/src/two_brain_audit/db.py +222 -0
- two_brain_audit-0.1.0/src/two_brain_audit/engine.py +275 -0
- two_brain_audit-0.1.0/src/two_brain_audit/exporters/__init__.py +7 -0
- two_brain_audit-0.1.0/src/two_brain_audit/exporters/csv_export.py +35 -0
- two_brain_audit-0.1.0/src/two_brain_audit/exporters/json_export.py +41 -0
- two_brain_audit-0.1.0/src/two_brain_audit/exporters/markdown_export.py +54 -0
- two_brain_audit-0.1.0/src/two_brain_audit/feedback.py +45 -0
- two_brain_audit-0.1.0/src/two_brain_audit/grades.py +47 -0
- two_brain_audit-0.1.0/src/two_brain_audit/py.typed +0 -0
- two_brain_audit-0.1.0/src/two_brain_audit/reconciler.py +71 -0
- two_brain_audit-0.1.0/src/two_brain_audit/sidecar.py +140 -0
- two_brain_audit-0.1.0/src/two_brain_audit/tiers.py +50 -0
- two_brain_audit-0.1.0/tests/__init__.py +0 -0
- two_brain_audit-0.1.0/tests/test_db.py +32 -0
- two_brain_audit-0.1.0/tests/test_engine.py +131 -0
- two_brain_audit-0.1.0/tests/test_grades.py +56 -0
- two_brain_audit-0.1.0/tests/test_reconciler.py +47 -0
- two_brain_audit-0.1.0/tests/test_sidecar.py +58 -0
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
name: CI
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
branches: [master]
|
|
6
|
+
pull_request:
|
|
7
|
+
branches: [master]
|
|
8
|
+
|
|
9
|
+
jobs:
|
|
10
|
+
test:
|
|
11
|
+
runs-on: ${{ matrix.os }}
|
|
12
|
+
strategy:
|
|
13
|
+
matrix:
|
|
14
|
+
os: [ubuntu-latest, windows-latest]
|
|
15
|
+
python-version: ["3.10", "3.12"]
|
|
16
|
+
|
|
17
|
+
steps:
|
|
18
|
+
- uses: actions/checkout@v4
|
|
19
|
+
|
|
20
|
+
- uses: actions/setup-python@v5
|
|
21
|
+
with:
|
|
22
|
+
python-version: ${{ matrix.python-version }}
|
|
23
|
+
|
|
24
|
+
- name: Install
|
|
25
|
+
run: pip install -e ".[dev]"
|
|
26
|
+
|
|
27
|
+
- name: Lint
|
|
28
|
+
run: ruff check src/ tests/
|
|
29
|
+
|
|
30
|
+
- name: Test
|
|
31
|
+
run: pytest --tb=short -q
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
name: Publish to PyPI
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
tags:
|
|
6
|
+
- "v*"
|
|
7
|
+
|
|
8
|
+
permissions:
|
|
9
|
+
id-token: write
|
|
10
|
+
|
|
11
|
+
jobs:
|
|
12
|
+
publish:
|
|
13
|
+
runs-on: ubuntu-latest
|
|
14
|
+
environment: pypi
|
|
15
|
+
steps:
|
|
16
|
+
- uses: actions/checkout@v4
|
|
17
|
+
|
|
18
|
+
- uses: actions/setup-python@v5
|
|
19
|
+
with:
|
|
20
|
+
python-version: "3.12"
|
|
21
|
+
|
|
22
|
+
- name: Install build tools
|
|
23
|
+
run: pip install build
|
|
24
|
+
|
|
25
|
+
- name: Build
|
|
26
|
+
run: python -m build
|
|
27
|
+
|
|
28
|
+
- name: Publish to PyPI
|
|
29
|
+
uses: pypa/gh-action-pypi-publish@release/v1
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
# Python
|
|
2
|
+
__pycache__/
|
|
3
|
+
*.py[cod]
|
|
4
|
+
*$py.class
|
|
5
|
+
*.egg-info/
|
|
6
|
+
dist/
|
|
7
|
+
build/
|
|
8
|
+
*.egg
|
|
9
|
+
|
|
10
|
+
# Virtual environments
|
|
11
|
+
.venv/
|
|
12
|
+
venv/
|
|
13
|
+
env/
|
|
14
|
+
|
|
15
|
+
# IDE
|
|
16
|
+
.idea/
|
|
17
|
+
.vscode/
|
|
18
|
+
*.swp
|
|
19
|
+
*.swo
|
|
20
|
+
|
|
21
|
+
# Testing
|
|
22
|
+
.pytest_cache/
|
|
23
|
+
htmlcov/
|
|
24
|
+
.coverage
|
|
25
|
+
coverage.xml
|
|
26
|
+
|
|
27
|
+
# Type checking
|
|
28
|
+
.mypy_cache/
|
|
29
|
+
|
|
30
|
+
# Runtime artifacts
|
|
31
|
+
*.db
|
|
32
|
+
*.db-shm
|
|
33
|
+
*.db-wal
|
|
34
|
+
audit_baseline.json
|
|
35
|
+
demo_baseline.json
|
|
36
|
+
|
|
37
|
+
# OS
|
|
38
|
+
.DS_Store
|
|
39
|
+
Thumbs.db
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Max
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,180 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: two-brain-audit
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Dual-layer audit system combining automated scoring (left brain) with manual qualitative grading (right brain) and reconciliation.
|
|
5
|
+
Project-URL: Homepage, https://github.com/maxtheman/two-brain-audit
|
|
6
|
+
Project-URL: Repository, https://github.com/maxtheman/two-brain-audit
|
|
7
|
+
Project-URL: Issues, https://github.com/maxtheman/two-brain-audit/issues
|
|
8
|
+
Author: Max
|
|
9
|
+
License-Expression: MIT
|
|
10
|
+
License-File: LICENSE
|
|
11
|
+
Keywords: audit,grading,quality,reconciliation,scoring
|
|
12
|
+
Classifier: Development Status :: 3 - Alpha
|
|
13
|
+
Classifier: Intended Audience :: Developers
|
|
14
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
15
|
+
Classifier: Programming Language :: Python :: 3
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
20
|
+
Classifier: Topic :: Software Development :: Quality Assurance
|
|
21
|
+
Classifier: Topic :: Software Development :: Testing
|
|
22
|
+
Classifier: Typing :: Typed
|
|
23
|
+
Requires-Python: >=3.10
|
|
24
|
+
Provides-Extra: all
|
|
25
|
+
Requires-Dist: flask>=3.0; extra == 'all'
|
|
26
|
+
Requires-Dist: httpx>=0.27; extra == 'all'
|
|
27
|
+
Requires-Dist: pywebview>=5.0; extra == 'all'
|
|
28
|
+
Requires-Dist: semgrep>=1.60; extra == 'all'
|
|
29
|
+
Provides-Extra: dashboard
|
|
30
|
+
Requires-Dist: flask>=3.0; extra == 'dashboard'
|
|
31
|
+
Provides-Extra: dev
|
|
32
|
+
Requires-Dist: flask>=3.0; extra == 'dev'
|
|
33
|
+
Requires-Dist: httpx>=0.27; extra == 'dev'
|
|
34
|
+
Requires-Dist: mypy>=1.10; extra == 'dev'
|
|
35
|
+
Requires-Dist: pytest-cov>=5.0; extra == 'dev'
|
|
36
|
+
Requires-Dist: pytest>=8.0; extra == 'dev'
|
|
37
|
+
Requires-Dist: pywebview>=5.0; extra == 'dev'
|
|
38
|
+
Requires-Dist: ruff>=0.4; extra == 'dev'
|
|
39
|
+
Requires-Dist: semgrep>=1.60; extra == 'dev'
|
|
40
|
+
Provides-Extra: github
|
|
41
|
+
Requires-Dist: httpx>=0.27; extra == 'github'
|
|
42
|
+
Provides-Extra: llm
|
|
43
|
+
Requires-Dist: httpx>=0.27; extra == 'llm'
|
|
44
|
+
Provides-Extra: native
|
|
45
|
+
Requires-Dist: flask>=3.0; extra == 'native'
|
|
46
|
+
Requires-Dist: pywebview>=5.0; extra == 'native'
|
|
47
|
+
Provides-Extra: semgrep
|
|
48
|
+
Requires-Dist: semgrep>=1.60; extra == 'semgrep'
|
|
49
|
+
Description-Content-Type: text/markdown
|
|
50
|
+
|
|
51
|
+
# Two-Brain Audit
|
|
52
|
+
|
|
53
|
+
A dual-layer audit system that combines automated quantitative scoring (**left brain**) with manual qualitative grading (**right brain**) and reconciles them automatically.
|
|
54
|
+
|
|
55
|
+
```
|
|
56
|
+
LEFT BRAIN (Auto) RIGHT BRAIN (Manual)
|
|
57
|
+
───────────────── ────────────────────
|
|
58
|
+
pytest pass rate ──┐ ┌── Human grade (A)
|
|
59
|
+
ruff lint score ──┤ ├── LLM review findings
|
|
60
|
+
semgrep scan ──┤ ├── User feedback (4.2/5)
|
|
61
|
+
endpoint health ──┘ └── Team notes
|
|
62
|
+
│ │
|
|
63
|
+
▼ ▼
|
|
64
|
+
┌──────────┐
|
|
65
|
+
│RECONCILER│
|
|
66
|
+
└────┬─────┘
|
|
67
|
+
│
|
|
68
|
+
┌──────────┼──────────┐
|
|
69
|
+
▼ ▼ ▼
|
|
70
|
+
Aligned Diverged Failing
|
|
71
|
+
(green) (yellow) (red)
|
|
72
|
+
```
|
|
73
|
+
|
|
74
|
+
## Why Two Brains?
|
|
75
|
+
|
|
76
|
+
| Scenario | Auto catches it | Manual catches it |
|
|
77
|
+
|----------|:-:|:-:|
|
|
78
|
+
| Test coverage drops silently | Yes | Maybe |
|
|
79
|
+
| "Feels slow" but metrics are fine | No | Yes |
|
|
80
|
+
| Stale manual grade after major refactor | Yes (divergence) | No |
|
|
81
|
+
| Security vuln in dependency | Yes (scanner) | No |
|
|
82
|
+
| UX regression that tests can't express | No | Yes |
|
|
83
|
+
| Reviewer optimism ("looks good to me") | Yes (cross-validation) | No |
|
|
84
|
+
|
|
85
|
+
## Quick Start
|
|
86
|
+
|
|
87
|
+
```bash
|
|
88
|
+
pip install two-brain-audit
|
|
89
|
+
|
|
90
|
+
two-brain-audit init # create DB + baseline sidecar
|
|
91
|
+
two-brain-audit register --preset python # 8 dimensions for Python projects
|
|
92
|
+
two-brain-audit run light # first scan (~2s)
|
|
93
|
+
two-brain-audit status # view scores + divergences
|
|
94
|
+
```
|
|
95
|
+
|
|
96
|
+
```
|
|
97
|
+
Dimension Auto Grade Manual Status
|
|
98
|
+
-----------------------------------------------------------------
|
|
99
|
+
test_coverage 0.930 A — ok
|
|
100
|
+
lint_score 1.000 S — ok
|
|
101
|
+
type_coverage 0.720 B- — ok
|
|
102
|
+
security 0.500 D — ok
|
|
103
|
+
|
|
104
|
+
Overall: B+ (0.788)
|
|
105
|
+
```
|
|
106
|
+
|
|
107
|
+
### Web Dashboard
|
|
108
|
+
|
|
109
|
+
```bash
|
|
110
|
+
pip install two-brain-audit[dashboard]
|
|
111
|
+
two-brain-audit dashboard # http://localhost:8484/audit/
|
|
112
|
+
```
|
|
113
|
+
|
|
114
|
+
Dark-mode UI with grade ring, score bars, divergence alerts, tier triggers, and a feedback widget. Zero external dependencies.
|
|
115
|
+
|
|
116
|
+
**[Full walkthrough with examples → docs/QUICKSTART.md](docs/QUICKSTART.md)**
|
|
117
|
+
|
|
118
|
+
## Features
|
|
119
|
+
|
|
120
|
+
- **12-grade scale** (S through F) with automatic score-to-grade conversion
|
|
121
|
+
- **4 audit tiers** — light (CI), medium (on-demand), daily (scheduled), weekly (deep scan)
|
|
122
|
+
- **Divergence detection** — auto vs manual disagreement surfaces automatically
|
|
123
|
+
- **Ratchet rules** — prevent silent score regression per dimension
|
|
124
|
+
- **User feedback** — star rating + free text, optionally classified by LLM
|
|
125
|
+
- **5 presets** — Python, REST API, Database, Infrastructure, ML Pipeline
|
|
126
|
+
- **4 integrations** — GitHub, semgrep, PyPI, Ollama (pluggable)
|
|
127
|
+
- **3 exporters** — JSON, CSV, Markdown reports
|
|
128
|
+
- **Web dashboard** — self-contained Flask blueprint, embed anywhere
|
|
129
|
+
- **CLI** — `init`, `run`, `status`, `health`, `export`, `dashboard`
|
|
130
|
+
- **CI-friendly** — `two-brain-audit health` returns exit code 0/1 + JSON
|
|
131
|
+
|
|
132
|
+
## Python API
|
|
133
|
+
|
|
134
|
+
```python
|
|
135
|
+
from two_brain_audit import AuditEngine, Dimension, Tier
|
|
136
|
+
|
|
137
|
+
engine = AuditEngine(db_path="audit.db", baseline_path="audit_baseline.json")
|
|
138
|
+
|
|
139
|
+
engine.register(Dimension(
|
|
140
|
+
name="test_coverage",
|
|
141
|
+
check=lambda: (passed / total, {"passed": passed, "total": total}),
|
|
142
|
+
confidence=0.95,
|
|
143
|
+
tier=Tier.LIGHT,
|
|
144
|
+
))
|
|
145
|
+
|
|
146
|
+
results = engine.run_tier("daily")
|
|
147
|
+
health = engine.health_check() # {"ok": True, "grade": "A", ...}
|
|
148
|
+
engine.record_feedback(score=0.8, text="Looking good")
|
|
149
|
+
```
|
|
150
|
+
|
|
151
|
+
## Flask Integration
|
|
152
|
+
|
|
153
|
+
```python
|
|
154
|
+
from two_brain_audit.dashboard import create_blueprint
|
|
155
|
+
app.register_blueprint(create_blueprint(engine), url_prefix="/audit")
|
|
156
|
+
```
|
|
157
|
+
|
|
158
|
+
## Presets
|
|
159
|
+
|
|
160
|
+
| Preset | Dimensions | Best for |
|
|
161
|
+
|--------|-----------|----------|
|
|
162
|
+
| `python` | test coverage, lint, types, deps, docs, security, complexity, imports | Python repos |
|
|
163
|
+
| `api` | endpoint health, latency, errors, auth, schema, rate limits, CORS, TLS | REST APIs |
|
|
164
|
+
| `database` | schema, indexes, queries, backups, replication, pool, migrations | Databases |
|
|
165
|
+
| `infrastructure` | uptime, certs, resources, config drift, secrets, DNS, CDN, containers | DevOps |
|
|
166
|
+
| `ml_pipeline` | model freshness, data drift, latency, accuracy, features, GPU, experiments | ML workflows |
|
|
167
|
+
|
|
168
|
+
## Docs
|
|
169
|
+
|
|
170
|
+
- **[Quickstart Guide](docs/QUICKSTART.md)** — step-by-step with examples
|
|
171
|
+
- **[Architecture](docs/ARCHITECTURE.md)** — design decisions and data flow
|
|
172
|
+
- **[examples/biged/](examples/biged/)** — 12-dimension reference implementation
|
|
173
|
+
|
|
174
|
+
## Origin
|
|
175
|
+
|
|
176
|
+
Extracted from [BigEd CC](https://github.com/maxtheman/Education) after production use on a 125-skill AI fleet with 12 audit dimensions, 4 tiers, and automated daily/weekly scheduling.
|
|
177
|
+
|
|
178
|
+
## License
|
|
179
|
+
|
|
180
|
+
MIT
|
|
@@ -0,0 +1,130 @@
|
|
|
1
|
+
# Two-Brain Audit
|
|
2
|
+
|
|
3
|
+
A dual-layer audit system that combines automated quantitative scoring (**left brain**) with manual qualitative grading (**right brain**) and reconciles them automatically.
|
|
4
|
+
|
|
5
|
+
```
|
|
6
|
+
LEFT BRAIN (Auto) RIGHT BRAIN (Manual)
|
|
7
|
+
───────────────── ────────────────────
|
|
8
|
+
pytest pass rate ──┐ ┌── Human grade (A)
|
|
9
|
+
ruff lint score ──┤ ├── LLM review findings
|
|
10
|
+
semgrep scan ──┤ ├── User feedback (4.2/5)
|
|
11
|
+
endpoint health ──┘ └── Team notes
|
|
12
|
+
│ │
|
|
13
|
+
▼ ▼
|
|
14
|
+
┌──────────┐
|
|
15
|
+
│RECONCILER│
|
|
16
|
+
└────┬─────┘
|
|
17
|
+
│
|
|
18
|
+
┌──────────┼──────────┐
|
|
19
|
+
▼ ▼ ▼
|
|
20
|
+
Aligned Diverged Failing
|
|
21
|
+
(green) (yellow) (red)
|
|
22
|
+
```
|
|
23
|
+
|
|
24
|
+
## Why Two Brains?
|
|
25
|
+
|
|
26
|
+
| Scenario | Auto catches it | Manual catches it |
|
|
27
|
+
|----------|:-:|:-:|
|
|
28
|
+
| Test coverage drops silently | Yes | Maybe |
|
|
29
|
+
| "Feels slow" but metrics are fine | No | Yes |
|
|
30
|
+
| Stale manual grade after major refactor | Yes (divergence) | No |
|
|
31
|
+
| Security vuln in dependency | Yes (scanner) | No |
|
|
32
|
+
| UX regression that tests can't express | No | Yes |
|
|
33
|
+
| Reviewer optimism ("looks good to me") | Yes (cross-validation) | No |
|
|
34
|
+
|
|
35
|
+
## Quick Start
|
|
36
|
+
|
|
37
|
+
```bash
|
|
38
|
+
pip install two-brain-audit
|
|
39
|
+
|
|
40
|
+
two-brain-audit init # create DB + baseline sidecar
|
|
41
|
+
two-brain-audit register --preset python # 8 dimensions for Python projects
|
|
42
|
+
two-brain-audit run light # first scan (~2s)
|
|
43
|
+
two-brain-audit status # view scores + divergences
|
|
44
|
+
```
|
|
45
|
+
|
|
46
|
+
```
|
|
47
|
+
Dimension Auto Grade Manual Status
|
|
48
|
+
-----------------------------------------------------------------
|
|
49
|
+
test_coverage 0.930 A — ok
|
|
50
|
+
lint_score 1.000 S — ok
|
|
51
|
+
type_coverage 0.720 B- — ok
|
|
52
|
+
security 0.500 D — ok
|
|
53
|
+
|
|
54
|
+
Overall: B+ (0.788)
|
|
55
|
+
```
|
|
56
|
+
|
|
57
|
+
### Web Dashboard
|
|
58
|
+
|
|
59
|
+
```bash
|
|
60
|
+
pip install two-brain-audit[dashboard]
|
|
61
|
+
two-brain-audit dashboard # http://localhost:8484/audit/
|
|
62
|
+
```
|
|
63
|
+
|
|
64
|
+
Dark-mode UI with grade ring, score bars, divergence alerts, tier triggers, and a feedback widget. Zero external dependencies.
|
|
65
|
+
|
|
66
|
+
**[Full walkthrough with examples → docs/QUICKSTART.md](docs/QUICKSTART.md)**
|
|
67
|
+
|
|
68
|
+
## Features
|
|
69
|
+
|
|
70
|
+
- **12-grade scale** (S through F) with automatic score-to-grade conversion
|
|
71
|
+
- **4 audit tiers** — light (CI), medium (on-demand), daily (scheduled), weekly (deep scan)
|
|
72
|
+
- **Divergence detection** — auto vs manual disagreement surfaces automatically
|
|
73
|
+
- **Ratchet rules** — prevent silent score regression per dimension
|
|
74
|
+
- **User feedback** — star rating + free text, optionally classified by LLM
|
|
75
|
+
- **5 presets** — Python, REST API, Database, Infrastructure, ML Pipeline
|
|
76
|
+
- **4 integrations** — GitHub, semgrep, PyPI, Ollama (pluggable)
|
|
77
|
+
- **3 exporters** — JSON, CSV, Markdown reports
|
|
78
|
+
- **Web dashboard** — self-contained Flask blueprint, embed anywhere
|
|
79
|
+
- **CLI** — `init`, `run`, `status`, `health`, `export`, `dashboard`
|
|
80
|
+
- **CI-friendly** — `two-brain-audit health` returns exit code 0/1 + JSON
|
|
81
|
+
|
|
82
|
+
## Python API
|
|
83
|
+
|
|
84
|
+
```python
|
|
85
|
+
from two_brain_audit import AuditEngine, Dimension, Tier
|
|
86
|
+
|
|
87
|
+
engine = AuditEngine(db_path="audit.db", baseline_path="audit_baseline.json")
|
|
88
|
+
|
|
89
|
+
engine.register(Dimension(
|
|
90
|
+
name="test_coverage",
|
|
91
|
+
check=lambda: (passed / total, {"passed": passed, "total": total}),
|
|
92
|
+
confidence=0.95,
|
|
93
|
+
tier=Tier.LIGHT,
|
|
94
|
+
))
|
|
95
|
+
|
|
96
|
+
results = engine.run_tier("daily")
|
|
97
|
+
health = engine.health_check() # {"ok": True, "grade": "A", ...}
|
|
98
|
+
engine.record_feedback(score=0.8, text="Looking good")
|
|
99
|
+
```
|
|
100
|
+
|
|
101
|
+
## Flask Integration
|
|
102
|
+
|
|
103
|
+
```python
|
|
104
|
+
from two_brain_audit.dashboard import create_blueprint
|
|
105
|
+
app.register_blueprint(create_blueprint(engine), url_prefix="/audit")
|
|
106
|
+
```
|
|
107
|
+
|
|
108
|
+
## Presets
|
|
109
|
+
|
|
110
|
+
| Preset | Dimensions | Best for |
|
|
111
|
+
|--------|-----------|----------|
|
|
112
|
+
| `python` | test coverage, lint, types, deps, docs, security, complexity, imports | Python repos |
|
|
113
|
+
| `api` | endpoint health, latency, errors, auth, schema, rate limits, CORS, TLS | REST APIs |
|
|
114
|
+
| `database` | schema, indexes, queries, backups, replication, pool, migrations | Databases |
|
|
115
|
+
| `infrastructure` | uptime, certs, resources, config drift, secrets, DNS, CDN, containers | DevOps |
|
|
116
|
+
| `ml_pipeline` | model freshness, data drift, latency, accuracy, features, GPU, experiments | ML workflows |
|
|
117
|
+
|
|
118
|
+
## Docs
|
|
119
|
+
|
|
120
|
+
- **[Quickstart Guide](docs/QUICKSTART.md)** — step-by-step with examples
|
|
121
|
+
- **[Architecture](docs/ARCHITECTURE.md)** — design decisions and data flow
|
|
122
|
+
- **[examples/biged/](examples/biged/)** — 12-dimension reference implementation
|
|
123
|
+
|
|
124
|
+
## Origin
|
|
125
|
+
|
|
126
|
+
Extracted from [BigEd CC](https://github.com/maxtheman/Education) after production use on a 125-skill AI fleet with 12 audit dimensions, 4 tiers, and automated daily/weekly scheduling.
|
|
127
|
+
|
|
128
|
+
## License
|
|
129
|
+
|
|
130
|
+
MIT
|
|
@@ -0,0 +1,191 @@
|
|
|
1
|
+
# Architecture
|
|
2
|
+
|
|
3
|
+
## Design Philosophy
|
|
4
|
+
|
|
5
|
+
The two-brain audit system is built on one core insight: **neither automated scoring nor manual review alone is sufficient**. Automated checks catch drift, regressions, and known-bad patterns with high reliability but can't assess "feel" or strategic fit. Manual reviews catch what code can't express, but are subject to optimism bias, staleness, and inconsistency.
|
|
6
|
+
|
|
7
|
+
The two-brain pattern combines both and adds **reconciliation with teeth**: when the brains disagree, the system surfaces that disagreement rather than silently choosing a winner.
|
|
8
|
+
|
|
9
|
+
## Architecture Overview
|
|
10
|
+
|
|
11
|
+
```
|
|
12
|
+
┌──────────────────────────────────────────────────────────┐
|
|
13
|
+
│ AuditEngine │
|
|
14
|
+
│ │
|
|
15
|
+
│ ┌─────────────┐ ┌───────────┐ ┌──────────────────┐ │
|
|
16
|
+
│ │ Dimension │ │ Sidecar │ │ Reconciler │ │
|
|
17
|
+
│ │ Registry │ │ (.json) │ │ │ │
|
|
18
|
+
│ │ │ │ │ │ divergence │ │
|
|
19
|
+
│ │ check()→ │ │ manual │ │ detection │ │
|
|
20
|
+
│ │ (score, │ │ grades │ │ ratchet check │ │
|
|
21
|
+
│ │ detail) │ │ ratchets │ │ weekly merge │ │
|
|
22
|
+
│ │ │ │ feedback │ │ status classify │ │
|
|
23
|
+
│ └──────┬───────┘ └─────┬─────┘ └────────┬─────────┘ │
|
|
24
|
+
│ │ │ │ │
|
|
25
|
+
│ └────────┬───────┘──────────────────┘ │
|
|
26
|
+
│ │ │
|
|
27
|
+
│ ┌────────▼────────┐ │
|
|
28
|
+
│ │ AuditDB │ │
|
|
29
|
+
│ │ (SQLite) │ │
|
|
30
|
+
│ │ │ │
|
|
31
|
+
│ │ audit_scores │ │
|
|
32
|
+
│ │ user_feedback │ │
|
|
33
|
+
│ └─────────────────┘ │
|
|
34
|
+
└──────────────────────────────────────────────────────────┘
|
|
35
|
+
│ │ │
|
|
36
|
+
┌────▼────┐ ┌─────▼─────┐ ┌────▼────┐
|
|
37
|
+
│ CLI │ │ Dashboard │ │ Export │
|
|
38
|
+
│ │ │ (Flask) │ │ JSON/ │
|
|
39
|
+
│ init │ │ │ │ CSV/ │
|
|
40
|
+
│ run │ │ REST API │ │ Markdown│
|
|
41
|
+
│ status │ │ │ │ │
|
|
42
|
+
└─────────┘ └───────────┘ └─────────┘
|
|
43
|
+
```
|
|
44
|
+
|
|
45
|
+
## Key Design Decisions
|
|
46
|
+
|
|
47
|
+
### 1. Dimensions are callables, not configs
|
|
48
|
+
|
|
49
|
+
A dimension's `check` is a plain `Callable[[], tuple[float, dict]]`. No YAML schemas, no special DSL. This means:
|
|
50
|
+
- Any Python function can be a check
|
|
51
|
+
- Checks can call subprocess, HTTP, DB, or pure logic
|
|
52
|
+
- Testing is trivial (mock the callable)
|
|
53
|
+
- No serialization/deserialization overhead
|
|
54
|
+
|
|
55
|
+
### 2. Sidecar over DB for manual grades
|
|
56
|
+
|
|
57
|
+
Manual grades live in `audit_baseline.json`, not the database. Why:
|
|
58
|
+
- **Git-trackable** — diffs show grade changes in code review
|
|
59
|
+
- **Human-editable** — no special tooling needed to update
|
|
60
|
+
- **Atomic** — read the whole file, write the whole file (no partial states)
|
|
61
|
+
- **Portable** — copy between environments without DB migration
|
|
62
|
+
|
|
63
|
+
The DB stores the time-series of auto scores and reconciliation results. The sidecar stores the current truth of manual assessment.
|
|
64
|
+
|
|
65
|
+
### 3. Tier hierarchy is inclusive
|
|
66
|
+
|
|
67
|
+
Each tier includes all checks from lower tiers:
|
|
68
|
+
```
|
|
69
|
+
weekly ⊃ daily ⊃ medium ⊃ light
|
|
70
|
+
```
|
|
71
|
+
|
|
72
|
+
This means running `daily` also runs all `light` and `medium` checks. There's no way to run "only daily checks" because the lower tiers provide the foundation that daily reconciliation needs.
|
|
73
|
+
|
|
74
|
+
### 4. Divergence requires confidence
|
|
75
|
+
|
|
76
|
+
A divergence is only flagged when:
|
|
77
|
+
```
|
|
78
|
+
abs(auto_score - manual_score) > 0.15 AND auto_confidence >= 0.5
|
|
79
|
+
```
|
|
80
|
+
|
|
81
|
+
Low-confidence dimensions (like `usability_ux` at 0.30) can't trigger divergence because we don't trust the auto score enough to contradict a human reviewer. The confidence floor prevents noisy alerts.
|
|
82
|
+
|
|
83
|
+
### 5. Three resolution paths (not just two)
|
|
84
|
+
|
|
85
|
+
Most audit systems offer "fix it" or "ignore it". We add a third:
|
|
86
|
+
1. **Update manual grade** — acknowledge the auto score is right
|
|
87
|
+
2. **Acknowledge** — dismiss without changing (visible but dimmed)
|
|
88
|
+
3. **LLM review** — get a second opinion from an external model
|
|
89
|
+
|
|
90
|
+
The LLM review path is valuable because it resolves the "who's right?" question without requiring a full human re-audit. It's a structured single-call prompt, not a conversation.
|
|
91
|
+
|
|
92
|
+
### 6. Ratchets are advisory by default
|
|
93
|
+
|
|
94
|
+
Ratchets prevent silent regression: once you declare "testing should be at least A", a drop below that floor is flagged. But in v0.1, ratchets produce WARN, not FAIL.
|
|
95
|
+
|
|
96
|
+
This is deliberate — new users shouldn't have their CI broken by a feature they just set up. Promote to FAIL per-dimension after tuning the system for your codebase.
|
|
97
|
+
|
|
98
|
+
### 7. Presets are starting points, not constraints
|
|
99
|
+
|
|
100
|
+
Presets provide dimension definitions with stub check functions. The expectation is:
|
|
101
|
+
- Use a preset to get started fast
|
|
102
|
+
- Replace stub checks with real implementations for your project
|
|
103
|
+
- Add/remove dimensions as needed
|
|
104
|
+
|
|
105
|
+
A preset is a Python list of `Dimension` objects, not a locked configuration.
|
|
106
|
+
|
|
107
|
+
### 8. Integrations are optional and pluggable
|
|
108
|
+
|
|
109
|
+
Each integration (GitHub, semgrep, PyPI, Ollama) is:
|
|
110
|
+
- A separate module with its own dependencies
|
|
111
|
+
- Configured via `configure(**kwargs)` (no global config file)
|
|
112
|
+
- Provides `checks()` that return callables wire-able to dimensions
|
|
113
|
+
|
|
114
|
+
If you don't install `two-brain-audit[github]`, the GitHub integration simply isn't available. No broken imports, no missing-dep errors at runtime.
|
|
115
|
+
|
|
116
|
+
## Data Flow
|
|
117
|
+
|
|
118
|
+
### Scoring Run
|
|
119
|
+
|
|
120
|
+
```
|
|
121
|
+
1. Engine.run_tier("daily")
|
|
122
|
+
2. For each dimension where tier ≤ requested tier:
|
|
123
|
+
a. Call dimension.check() → (score, detail)
|
|
124
|
+
b. Clamp score to [0.0, 1.0]
|
|
125
|
+
c. Load manual grade from sidecar
|
|
126
|
+
d. Compare: divergent if gap > 0.15 AND confidence ≥ 0.5
|
|
127
|
+
e. Write DimensionResult to audit_scores table
|
|
128
|
+
3. Return list of DimensionResult
|
|
129
|
+
```
|
|
130
|
+
|
|
131
|
+
### Reconciliation
|
|
132
|
+
|
|
133
|
+
```
|
|
134
|
+
1. Dr. Ders / scheduler triggers daily run at 3:00 AM
|
|
135
|
+
2. Run all dimensions up to daily tier
|
|
136
|
+
3. For each dimension with divergence=1 AND acknowledged=0:
|
|
137
|
+
a. Push SSE alert (if dashboard is running)
|
|
138
|
+
b. Flag in smoke test output
|
|
139
|
+
4. Check ratchet targets — flag any below floor
|
|
140
|
+
5. Aggregate user feedback into sidecar
|
|
141
|
+
```
|
|
142
|
+
|
|
143
|
+
### Feedback Loop
|
|
144
|
+
|
|
145
|
+
```
|
|
146
|
+
1. User submits feedback (stars/slider + text)
|
|
147
|
+
2. Score stored in user_feedback table
|
|
148
|
+
3. (Optional) Text classified by LLM into dimensions
|
|
149
|
+
4. Daily reconciliation aggregates into sidecar per-dimension
|
|
150
|
+
5. UX confidence adjusts: min(0.75, 0.30 + feedback_count/100 * 0.45)
|
|
151
|
+
```
|
|
152
|
+
|
|
153
|
+
## File Layout
|
|
154
|
+
|
|
155
|
+
```
|
|
156
|
+
two-brain-audit/
|
|
157
|
+
├── src/two_brain_audit/
|
|
158
|
+
│ ├── __init__.py # Public API exports
|
|
159
|
+
│ ├── engine.py # AuditEngine, Dimension, DimensionResult
|
|
160
|
+
│ ├── db.py # SQLite storage (audit_scores, user_feedback)
|
|
161
|
+
│ ├── sidecar.py # JSON sidecar read/write
|
|
162
|
+
│ ├── grades.py # Grade ↔ score conversion
|
|
163
|
+
│ ├── tiers.py # Tier enum + scheduling
|
|
164
|
+
│ ├── reconciler.py # Weekly merge, ratchet check, status classify
|
|
165
|
+
│ ├── feedback.py # LLMClassifier protocol, conversion helpers
|
|
166
|
+
│ ├── cli.py # CLI entry point
|
|
167
|
+
│ ├── dashboard/ # Optional Flask blueprint
|
|
168
|
+
│ └── exporters/ # JSON, CSV, Markdown report generators
|
|
169
|
+
├── presets/ # Dimension configs per project type
|
|
170
|
+
│ ├── python_project.py # 8 dimensions for Python repos
|
|
171
|
+
│ ├── api_service.py # 8 dimensions for REST APIs
|
|
172
|
+
│ ├── database.py # 7 dimensions for database health
|
|
173
|
+
│ ├── infrastructure.py # 8 dimensions for DevOps/infra
|
|
174
|
+
│ └── ml_pipeline.py # 7 dimensions for ML workflows
|
|
175
|
+
├── integrations/ # Pluggable external data sources
|
|
176
|
+
│ ├── github.py # CI status, open bugs, stale PRs
|
|
177
|
+
│ ├── semgrep.py # SAST security scanning
|
|
178
|
+
│ ├── pypi.py # Dependency version drift
|
|
179
|
+
│ └── ollama.py # Local model health
|
|
180
|
+
├── examples/biged/ # Reference implementation (12 dimensions)
|
|
181
|
+
├── tests/ # pytest test suite
|
|
182
|
+
├── docs/ # This file + future guides
|
|
183
|
+
├── pyproject.toml # Build config, deps, tool settings
|
|
184
|
+
└── README.md # User-facing documentation
|
|
185
|
+
```
|
|
186
|
+
|
|
187
|
+
## Origin
|
|
188
|
+
|
|
189
|
+
Extracted from BigEd CC (`github.com/maxtheman/Education`) after production use on a 125-skill AI fleet. The BigEd implementation lives in `fleet/audit_scorer.py` (1,088 lines) and uses all 12 dimensions across 4 tiers with Dr. Ders scheduling daily/weekly runs.
|
|
190
|
+
|
|
191
|
+
The extraction preserves the battle-tested core while making it configurable for any project type.
|