two-brain-audit 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. two_brain_audit-0.1.0/.github/workflows/ci.yml +31 -0
  2. two_brain_audit-0.1.0/.github/workflows/publish.yml +29 -0
  3. two_brain_audit-0.1.0/.gitignore +39 -0
  4. two_brain_audit-0.1.0/LICENSE +21 -0
  5. two_brain_audit-0.1.0/PKG-INFO +180 -0
  6. two_brain_audit-0.1.0/README.md +130 -0
  7. two_brain_audit-0.1.0/docs/ARCHITECTURE.md +191 -0
  8. two_brain_audit-0.1.0/docs/QUICKSTART.md +341 -0
  9. two_brain_audit-0.1.0/examples/biged/__init__.py +11 -0
  10. two_brain_audit-0.1.0/examples/biged/dimensions.py +141 -0
  11. two_brain_audit-0.1.0/integrations/__init__.py +26 -0
  12. two_brain_audit-0.1.0/integrations/github.py +99 -0
  13. two_brain_audit-0.1.0/integrations/ollama.py +46 -0
  14. two_brain_audit-0.1.0/integrations/pypi.py +100 -0
  15. two_brain_audit-0.1.0/integrations/semgrep.py +82 -0
  16. two_brain_audit-0.1.0/presets/__init__.py +18 -0
  17. two_brain_audit-0.1.0/presets/api_service.py +69 -0
  18. two_brain_audit-0.1.0/presets/database.py +62 -0
  19. two_brain_audit-0.1.0/presets/infrastructure.py +69 -0
  20. two_brain_audit-0.1.0/presets/ml_pipeline.py +62 -0
  21. two_brain_audit-0.1.0/presets/python_project.py +135 -0
  22. two_brain_audit-0.1.0/pyproject.toml +74 -0
  23. two_brain_audit-0.1.0/run_demo.py +32 -0
  24. two_brain_audit-0.1.0/src/two_brain_audit/__init__.py +17 -0
  25. two_brain_audit-0.1.0/src/two_brain_audit/__main__.py +3 -0
  26. two_brain_audit-0.1.0/src/two_brain_audit/app.py +115 -0
  27. two_brain_audit-0.1.0/src/two_brain_audit/cli.py +208 -0
  28. two_brain_audit-0.1.0/src/two_brain_audit/dashboard/__init__.py +126 -0
  29. two_brain_audit-0.1.0/src/two_brain_audit/dashboard/ui.py +829 -0
  30. two_brain_audit-0.1.0/src/two_brain_audit/db.py +222 -0
  31. two_brain_audit-0.1.0/src/two_brain_audit/engine.py +275 -0
  32. two_brain_audit-0.1.0/src/two_brain_audit/exporters/__init__.py +7 -0
  33. two_brain_audit-0.1.0/src/two_brain_audit/exporters/csv_export.py +35 -0
  34. two_brain_audit-0.1.0/src/two_brain_audit/exporters/json_export.py +41 -0
  35. two_brain_audit-0.1.0/src/two_brain_audit/exporters/markdown_export.py +54 -0
  36. two_brain_audit-0.1.0/src/two_brain_audit/feedback.py +45 -0
  37. two_brain_audit-0.1.0/src/two_brain_audit/grades.py +47 -0
  38. two_brain_audit-0.1.0/src/two_brain_audit/py.typed +0 -0
  39. two_brain_audit-0.1.0/src/two_brain_audit/reconciler.py +71 -0
  40. two_brain_audit-0.1.0/src/two_brain_audit/sidecar.py +140 -0
  41. two_brain_audit-0.1.0/src/two_brain_audit/tiers.py +50 -0
  42. two_brain_audit-0.1.0/tests/__init__.py +0 -0
  43. two_brain_audit-0.1.0/tests/test_db.py +32 -0
  44. two_brain_audit-0.1.0/tests/test_engine.py +131 -0
  45. two_brain_audit-0.1.0/tests/test_grades.py +56 -0
  46. two_brain_audit-0.1.0/tests/test_reconciler.py +47 -0
  47. two_brain_audit-0.1.0/tests/test_sidecar.py +58 -0
@@ -0,0 +1,31 @@
1
+ name: CI
2
+
3
+ on:
4
+ push:
5
+ branches: [master]
6
+ pull_request:
7
+ branches: [master]
8
+
9
+ jobs:
10
+ test:
11
+ runs-on: ${{ matrix.os }}
12
+ strategy:
13
+ matrix:
14
+ os: [ubuntu-latest, windows-latest]
15
+ python-version: ["3.10", "3.12"]
16
+
17
+ steps:
18
+ - uses: actions/checkout@v4
19
+
20
+ - uses: actions/setup-python@v5
21
+ with:
22
+ python-version: ${{ matrix.python-version }}
23
+
24
+ - name: Install
25
+ run: pip install -e ".[dev]"
26
+
27
+ - name: Lint
28
+ run: ruff check src/ tests/
29
+
30
+ - name: Test
31
+ run: pytest --tb=short -q
@@ -0,0 +1,29 @@
1
+ name: Publish to PyPI
2
+
3
+ on:
4
+ push:
5
+ tags:
6
+ - "v*"
7
+
8
+ permissions:
9
+ id-token: write
10
+
11
+ jobs:
12
+ publish:
13
+ runs-on: ubuntu-latest
14
+ environment: pypi
15
+ steps:
16
+ - uses: actions/checkout@v4
17
+
18
+ - uses: actions/setup-python@v5
19
+ with:
20
+ python-version: "3.12"
21
+
22
+ - name: Install build tools
23
+ run: pip install build
24
+
25
+ - name: Build
26
+ run: python -m build
27
+
28
+ - name: Publish to PyPI
29
+ uses: pypa/gh-action-pypi-publish@release/v1
@@ -0,0 +1,39 @@
1
+ # Python
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+ *.egg-info/
6
+ dist/
7
+ build/
8
+ *.egg
9
+
10
+ # Virtual environments
11
+ .venv/
12
+ venv/
13
+ env/
14
+
15
+ # IDE
16
+ .idea/
17
+ .vscode/
18
+ *.swp
19
+ *.swo
20
+
21
+ # Testing
22
+ .pytest_cache/
23
+ htmlcov/
24
+ .coverage
25
+ coverage.xml
26
+
27
+ # Type checking
28
+ .mypy_cache/
29
+
30
+ # Runtime artifacts
31
+ *.db
32
+ *.db-shm
33
+ *.db-wal
34
+ audit_baseline.json
35
+ demo_baseline.json
36
+
37
+ # OS
38
+ .DS_Store
39
+ Thumbs.db
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Max
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,180 @@
1
+ Metadata-Version: 2.4
2
+ Name: two-brain-audit
3
+ Version: 0.1.0
4
+ Summary: Dual-layer audit system combining automated scoring (left brain) with manual qualitative grading (right brain) and reconciliation.
5
+ Project-URL: Homepage, https://github.com/maxtheman/two-brain-audit
6
+ Project-URL: Repository, https://github.com/maxtheman/two-brain-audit
7
+ Project-URL: Issues, https://github.com/maxtheman/two-brain-audit/issues
8
+ Author: Max
9
+ License-Expression: MIT
10
+ License-File: LICENSE
11
+ Keywords: audit,grading,quality,reconciliation,scoring
12
+ Classifier: Development Status :: 3 - Alpha
13
+ Classifier: Intended Audience :: Developers
14
+ Classifier: License :: OSI Approved :: MIT License
15
+ Classifier: Programming Language :: Python :: 3
16
+ Classifier: Programming Language :: Python :: 3.10
17
+ Classifier: Programming Language :: Python :: 3.11
18
+ Classifier: Programming Language :: Python :: 3.12
19
+ Classifier: Programming Language :: Python :: 3.13
20
+ Classifier: Topic :: Software Development :: Quality Assurance
21
+ Classifier: Topic :: Software Development :: Testing
22
+ Classifier: Typing :: Typed
23
+ Requires-Python: >=3.10
24
+ Provides-Extra: all
25
+ Requires-Dist: flask>=3.0; extra == 'all'
26
+ Requires-Dist: httpx>=0.27; extra == 'all'
27
+ Requires-Dist: pywebview>=5.0; extra == 'all'
28
+ Requires-Dist: semgrep>=1.60; extra == 'all'
29
+ Provides-Extra: dashboard
30
+ Requires-Dist: flask>=3.0; extra == 'dashboard'
31
+ Provides-Extra: dev
32
+ Requires-Dist: flask>=3.0; extra == 'dev'
33
+ Requires-Dist: httpx>=0.27; extra == 'dev'
34
+ Requires-Dist: mypy>=1.10; extra == 'dev'
35
+ Requires-Dist: pytest-cov>=5.0; extra == 'dev'
36
+ Requires-Dist: pytest>=8.0; extra == 'dev'
37
+ Requires-Dist: pywebview>=5.0; extra == 'dev'
38
+ Requires-Dist: ruff>=0.4; extra == 'dev'
39
+ Requires-Dist: semgrep>=1.60; extra == 'dev'
40
+ Provides-Extra: github
41
+ Requires-Dist: httpx>=0.27; extra == 'github'
42
+ Provides-Extra: llm
43
+ Requires-Dist: httpx>=0.27; extra == 'llm'
44
+ Provides-Extra: native
45
+ Requires-Dist: flask>=3.0; extra == 'native'
46
+ Requires-Dist: pywebview>=5.0; extra == 'native'
47
+ Provides-Extra: semgrep
48
+ Requires-Dist: semgrep>=1.60; extra == 'semgrep'
49
+ Description-Content-Type: text/markdown
50
+
51
+ # Two-Brain Audit
52
+
53
+ A dual-layer audit system that combines automated quantitative scoring (**left brain**) with manual qualitative grading (**right brain**) and reconciles them automatically.
54
+
55
+ ```
56
+ LEFT BRAIN (Auto) RIGHT BRAIN (Manual)
57
+ ───────────────── ────────────────────
58
+ pytest pass rate ──┐ ┌── Human grade (A)
59
+ ruff lint score ──┤ ├── LLM review findings
60
+ semgrep scan ──┤ ├── User feedback (4.2/5)
61
+ endpoint health ──┘ └── Team notes
62
+ │ │
63
+ ▼ ▼
64
+ ┌──────────┐
65
+ │RECONCILER│
66
+ └────┬─────┘
67
+
68
+ ┌──────────┼──────────┐
69
+ ▼ ▼ ▼
70
+ Aligned Diverged Failing
71
+ (green) (yellow) (red)
72
+ ```
73
+
74
+ ## Why Two Brains?
75
+
76
+ | Scenario | Auto catches it | Manual catches it |
77
+ |----------|:-:|:-:|
78
+ | Test coverage drops silently | Yes | Maybe |
79
+ | "Feels slow" but metrics are fine | No | Yes |
80
+ | Stale manual grade after major refactor | Yes (divergence) | No |
81
+ | Security vuln in dependency | Yes (scanner) | No |
82
+ | UX regression that tests can't express | No | Yes |
83
+ | Reviewer optimism ("looks good to me") | Yes (cross-validation) | No |
84
+
85
+ ## Quick Start
86
+
87
+ ```bash
88
+ pip install two-brain-audit
89
+
90
+ two-brain-audit init # create DB + baseline sidecar
91
+ two-brain-audit register --preset python # 8 dimensions for Python projects
92
+ two-brain-audit run light # first scan (~2s)
93
+ two-brain-audit status # view scores + divergences
94
+ ```
95
+
96
+ ```
97
+ Dimension Auto Grade Manual Status
98
+ -----------------------------------------------------------------
99
+ test_coverage 0.930 A — ok
100
+ lint_score 1.000 S — ok
101
+ type_coverage 0.720 B- — ok
102
+ security 0.500 D — ok
103
+
104
+ Overall: B+ (0.788)
105
+ ```
106
+
107
+ ### Web Dashboard
108
+
109
+ ```bash
110
+ pip install two-brain-audit[dashboard]
111
+ two-brain-audit dashboard # http://localhost:8484/audit/
112
+ ```
113
+
114
+ Dark-mode UI with grade ring, score bars, divergence alerts, tier triggers, and a feedback widget. Zero external dependencies.
115
+
116
+ **[Full walkthrough with examples → docs/QUICKSTART.md](docs/QUICKSTART.md)**
117
+
118
+ ## Features
119
+
120
+ - **12-grade scale** (S through F) with automatic score-to-grade conversion
121
+ - **4 audit tiers** — light (CI), medium (on-demand), daily (scheduled), weekly (deep scan)
122
+ - **Divergence detection** — auto vs manual disagreement surfaces automatically
123
+ - **Ratchet rules** — prevent silent score regression per dimension
124
+ - **User feedback** — star rating + free text, optionally classified by LLM
125
+ - **5 presets** — Python, REST API, Database, Infrastructure, ML Pipeline
126
+ - **4 integrations** — GitHub, semgrep, PyPI, Ollama (pluggable)
127
+ - **3 exporters** — JSON, CSV, Markdown reports
128
+ - **Web dashboard** — self-contained Flask blueprint, embed anywhere
129
+ - **CLI** — `init`, `run`, `status`, `health`, `export`, `dashboard`
130
+ - **CI-friendly** — `two-brain-audit health` returns exit code 0/1 + JSON
131
+
132
+ ## Python API
133
+
134
+ ```python
135
+ from two_brain_audit import AuditEngine, Dimension, Tier
136
+
137
+ engine = AuditEngine(db_path="audit.db", baseline_path="audit_baseline.json")
138
+
139
+ engine.register(Dimension(
140
+ name="test_coverage",
141
+ check=lambda: (passed / total, {"passed": passed, "total": total}),
142
+ confidence=0.95,
143
+ tier=Tier.LIGHT,
144
+ ))
145
+
146
+ results = engine.run_tier("daily")
147
+ health = engine.health_check() # {"ok": True, "grade": "A", ...}
148
+ engine.record_feedback(score=0.8, text="Looking good")
149
+ ```
150
+
151
+ ## Flask Integration
152
+
153
+ ```python
154
+ from two_brain_audit.dashboard import create_blueprint
155
+ app.register_blueprint(create_blueprint(engine), url_prefix="/audit")
156
+ ```
157
+
158
+ ## Presets
159
+
160
+ | Preset | Dimensions | Best for |
161
+ |--------|-----------|----------|
162
+ | `python` | test coverage, lint, types, deps, docs, security, complexity, imports | Python repos |
163
+ | `api` | endpoint health, latency, errors, auth, schema, rate limits, CORS, TLS | REST APIs |
164
+ | `database` | schema, indexes, queries, backups, replication, pool, migrations | Databases |
165
+ | `infrastructure` | uptime, certs, resources, config drift, secrets, DNS, CDN, containers | DevOps |
166
+ | `ml_pipeline` | model freshness, data drift, latency, accuracy, features, GPU, experiments | ML workflows |
167
+
168
+ ## Docs
169
+
170
+ - **[Quickstart Guide](docs/QUICKSTART.md)** — step-by-step with examples
171
+ - **[Architecture](docs/ARCHITECTURE.md)** — design decisions and data flow
172
+ - **[examples/biged/](examples/biged/)** — 12-dimension reference implementation
173
+
174
+ ## Origin
175
+
176
+ Extracted from [BigEd CC](https://github.com/maxtheman/Education) after production use on a 125-skill AI fleet with 12 audit dimensions, 4 tiers, and automated daily/weekly scheduling.
177
+
178
+ ## License
179
+
180
+ MIT
@@ -0,0 +1,130 @@
1
+ # Two-Brain Audit
2
+
3
+ A dual-layer audit system that combines automated quantitative scoring (**left brain**) with manual qualitative grading (**right brain**) and reconciles them automatically.
4
+
5
+ ```
6
+ LEFT BRAIN (Auto) RIGHT BRAIN (Manual)
7
+ ───────────────── ────────────────────
8
+ pytest pass rate ──┐ ┌── Human grade (A)
9
+ ruff lint score ──┤ ├── LLM review findings
10
+ semgrep scan ──┤ ├── User feedback (4.2/5)
11
+ endpoint health ──┘ └── Team notes
12
+ │ │
13
+ ▼ ▼
14
+ ┌──────────┐
15
+ │RECONCILER│
16
+ └────┬─────┘
17
+
18
+ ┌──────────┼──────────┐
19
+ ▼ ▼ ▼
20
+ Aligned Diverged Failing
21
+ (green) (yellow) (red)
22
+ ```
23
+
24
+ ## Why Two Brains?
25
+
26
+ | Scenario | Auto catches it | Manual catches it |
27
+ |----------|:-:|:-:|
28
+ | Test coverage drops silently | Yes | Maybe |
29
+ | "Feels slow" but metrics are fine | No | Yes |
30
+ | Stale manual grade after major refactor | Yes (divergence) | No |
31
+ | Security vuln in dependency | Yes (scanner) | No |
32
+ | UX regression that tests can't express | No | Yes |
33
+ | Reviewer optimism ("looks good to me") | Yes (cross-validation) | No |
34
+
35
+ ## Quick Start
36
+
37
+ ```bash
38
+ pip install two-brain-audit
39
+
40
+ two-brain-audit init # create DB + baseline sidecar
41
+ two-brain-audit register --preset python # 8 dimensions for Python projects
42
+ two-brain-audit run light # first scan (~2s)
43
+ two-brain-audit status # view scores + divergences
44
+ ```
45
+
46
+ ```
47
+ Dimension Auto Grade Manual Status
48
+ -----------------------------------------------------------------
49
+ test_coverage 0.930 A — ok
50
+ lint_score 1.000 S — ok
51
+ type_coverage 0.720 B- — ok
52
+ security 0.500 D — ok
53
+
54
+ Overall: B+ (0.788)
55
+ ```
56
+
57
+ ### Web Dashboard
58
+
59
+ ```bash
60
+ pip install two-brain-audit[dashboard]
61
+ two-brain-audit dashboard # http://localhost:8484/audit/
62
+ ```
63
+
64
+ Dark-mode UI with grade ring, score bars, divergence alerts, tier triggers, and a feedback widget. Zero external dependencies.
65
+
66
+ **[Full walkthrough with examples → docs/QUICKSTART.md](docs/QUICKSTART.md)**
67
+
68
+ ## Features
69
+
70
+ - **12-grade scale** (S through F) with automatic score-to-grade conversion
71
+ - **4 audit tiers** — light (CI), medium (on-demand), daily (scheduled), weekly (deep scan)
72
+ - **Divergence detection** — auto vs manual disagreement surfaces automatically
73
+ - **Ratchet rules** — prevent silent score regression per dimension
74
+ - **User feedback** — star rating + free text, optionally classified by LLM
75
+ - **5 presets** — Python, REST API, Database, Infrastructure, ML Pipeline
76
+ - **4 integrations** — GitHub, semgrep, PyPI, Ollama (pluggable)
77
+ - **3 exporters** — JSON, CSV, Markdown reports
78
+ - **Web dashboard** — self-contained Flask blueprint, embed anywhere
79
+ - **CLI** — `init`, `run`, `status`, `health`, `export`, `dashboard`
80
+ - **CI-friendly** — `two-brain-audit health` returns exit code 0/1 + JSON
81
+
82
+ ## Python API
83
+
84
+ ```python
85
+ from two_brain_audit import AuditEngine, Dimension, Tier
86
+
87
+ engine = AuditEngine(db_path="audit.db", baseline_path="audit_baseline.json")
88
+
89
+ engine.register(Dimension(
90
+ name="test_coverage",
91
+ check=lambda: (passed / total, {"passed": passed, "total": total}),
92
+ confidence=0.95,
93
+ tier=Tier.LIGHT,
94
+ ))
95
+
96
+ results = engine.run_tier("daily")
97
+ health = engine.health_check() # {"ok": True, "grade": "A", ...}
98
+ engine.record_feedback(score=0.8, text="Looking good")
99
+ ```
100
+
101
+ ## Flask Integration
102
+
103
+ ```python
104
+ from two_brain_audit.dashboard import create_blueprint
105
+ app.register_blueprint(create_blueprint(engine), url_prefix="/audit")
106
+ ```
107
+
108
+ ## Presets
109
+
110
+ | Preset | Dimensions | Best for |
111
+ |--------|-----------|----------|
112
+ | `python` | test coverage, lint, types, deps, docs, security, complexity, imports | Python repos |
113
+ | `api` | endpoint health, latency, errors, auth, schema, rate limits, CORS, TLS | REST APIs |
114
+ | `database` | schema, indexes, queries, backups, replication, pool, migrations | Databases |
115
+ | `infrastructure` | uptime, certs, resources, config drift, secrets, DNS, CDN, containers | DevOps |
116
+ | `ml_pipeline` | model freshness, data drift, latency, accuracy, features, GPU, experiments | ML workflows |
117
+
118
+ ## Docs
119
+
120
+ - **[Quickstart Guide](docs/QUICKSTART.md)** — step-by-step with examples
121
+ - **[Architecture](docs/ARCHITECTURE.md)** — design decisions and data flow
122
+ - **[examples/biged/](examples/biged/)** — 12-dimension reference implementation
123
+
124
+ ## Origin
125
+
126
+ Extracted from [BigEd CC](https://github.com/maxtheman/Education) after production use on a 125-skill AI fleet with 12 audit dimensions, 4 tiers, and automated daily/weekly scheduling.
127
+
128
+ ## License
129
+
130
+ MIT
@@ -0,0 +1,191 @@
1
+ # Architecture
2
+
3
+ ## Design Philosophy
4
+
5
+ The two-brain audit system is built on one core insight: **neither automated scoring nor manual review alone is sufficient**. Automated checks catch drift, regressions, and known-bad patterns with high reliability but can't assess "feel" or strategic fit. Manual reviews catch what code can't express, but are subject to optimism bias, staleness, and inconsistency.
6
+
7
+ The two-brain pattern combines both and adds **reconciliation with teeth**: when the brains disagree, the system surfaces that disagreement rather than silently choosing a winner.
8
+
9
+ ## Architecture Overview
10
+
11
+ ```
12
+ ┌──────────────────────────────────────────────────────────┐
13
+ │ AuditEngine │
14
+ │ │
15
+ │ ┌─────────────┐ ┌───────────┐ ┌──────────────────┐ │
16
+ │ │ Dimension │ │ Sidecar │ │ Reconciler │ │
17
+ │ │ Registry │ │ (.json) │ │ │ │
18
+ │ │ │ │ │ │ divergence │ │
19
+ │ │ check()→ │ │ manual │ │ detection │ │
20
+ │ │ (score, │ │ grades │ │ ratchet check │ │
21
+ │ │ detail) │ │ ratchets │ │ weekly merge │ │
22
+ │ │ │ │ feedback │ │ status classify │ │
23
+ │ └──────┬───────┘ └─────┬─────┘ └────────┬─────────┘ │
24
+ │ │ │ │ │
25
+ │ └────────┬───────┘──────────────────┘ │
26
+ │ │ │
27
+ │ ┌────────▼────────┐ │
28
+ │ │ AuditDB │ │
29
+ │ │ (SQLite) │ │
30
+ │ │ │ │
31
+ │ │ audit_scores │ │
32
+ │ │ user_feedback │ │
33
+ │ └─────────────────┘ │
34
+ └──────────────────────────────────────────────────────────┘
35
+ │ │ │
36
+ ┌────▼────┐ ┌─────▼─────┐ ┌────▼────┐
37
+ │ CLI │ │ Dashboard │ │ Export │
38
+ │ │ │ (Flask) │ │ JSON/ │
39
+ │ init │ │ │ │ CSV/ │
40
+ │ run │ │ REST API │ │ Markdown│
41
+ │ status │ │ │ │ │
42
+ └─────────┘ └───────────┘ └─────────┘
43
+ ```
44
+
45
+ ## Key Design Decisions
46
+
47
+ ### 1. Dimensions are callables, not configs
48
+
49
+ A dimension's `check` is a plain `Callable[[], tuple[float, dict]]`. No YAML schemas, no special DSL. This means:
50
+ - Any Python function can be a check
51
+ - Checks can call subprocess, HTTP, DB, or pure logic
52
+ - Testing is trivial (mock the callable)
53
+ - No serialization/deserialization overhead
54
+
55
+ ### 2. Sidecar over DB for manual grades
56
+
57
+ Manual grades live in `audit_baseline.json`, not the database. Why:
58
+ - **Git-trackable** — diffs show grade changes in code review
59
+ - **Human-editable** — no special tooling needed to update
60
+ - **Atomic** — read the whole file, write the whole file (no partial states)
61
+ - **Portable** — copy between environments without DB migration
62
+
63
+ The DB stores the time-series of auto scores and reconciliation results. The sidecar stores the current truth of manual assessment.
64
+
65
+ ### 3. Tier hierarchy is inclusive
66
+
67
+ Each tier includes all checks from lower tiers:
68
+ ```
69
+ weekly ⊃ daily ⊃ medium ⊃ light
70
+ ```
71
+
72
+ This means running `daily` also runs all `light` and `medium` checks. There's no way to run "only daily checks" because the lower tiers provide the foundation that daily reconciliation needs.
73
+
74
+ ### 4. Divergence requires confidence
75
+
76
+ A divergence is only flagged when:
77
+ ```
78
+ abs(auto_score - manual_score) > 0.15 AND auto_confidence >= 0.5
79
+ ```
80
+
81
+ Low-confidence dimensions (like `usability_ux` at 0.30) can't trigger divergence because we don't trust the auto score enough to contradict a human reviewer. The confidence floor prevents noisy alerts.
82
+
83
+ ### 5. Three resolution paths (not just two)
84
+
85
+ Most audit systems offer "fix it" or "ignore it". We add a third:
86
+ 1. **Update manual grade** — acknowledge the auto score is right
87
+ 2. **Acknowledge** — dismiss without changing (visible but dimmed)
88
+ 3. **LLM review** — get a second opinion from an external model
89
+
90
+ The LLM review path is valuable because it resolves the "who's right?" question without requiring a full human re-audit. It's a structured single-call prompt, not a conversation.
91
+
92
+ ### 6. Ratchets are advisory by default
93
+
94
+ Ratchets prevent silent regression: once you declare "testing should be at least A", a drop below that floor is flagged. But in v0.1, ratchets produce WARN, not FAIL.
95
+
96
+ This is deliberate — new users shouldn't have their CI broken by a feature they just set up. Promote to FAIL per-dimension after tuning the system for your codebase.
97
+
98
+ ### 7. Presets are starting points, not constraints
99
+
100
+ Presets provide dimension definitions with stub check functions. The expectation is:
101
+ - Use a preset to get started fast
102
+ - Replace stub checks with real implementations for your project
103
+ - Add/remove dimensions as needed
104
+
105
+ A preset is a Python list of `Dimension` objects, not a locked configuration.
106
+
107
+ ### 8. Integrations are optional and pluggable
108
+
109
+ Each integration (GitHub, semgrep, PyPI, Ollama) is:
110
+ - A separate module with its own dependencies
111
+ - Configured via `configure(**kwargs)` (no global config file)
112
+ - Provides `checks()` that return callables wire-able to dimensions
113
+
114
+ If you don't install `two-brain-audit[github]`, the GitHub integration simply isn't available. No broken imports, no missing-dep errors at runtime.
115
+
116
+ ## Data Flow
117
+
118
+ ### Scoring Run
119
+
120
+ ```
121
+ 1. Engine.run_tier("daily")
122
+ 2. For each dimension where tier ≤ requested tier:
123
+ a. Call dimension.check() → (score, detail)
124
+ b. Clamp score to [0.0, 1.0]
125
+ c. Load manual grade from sidecar
126
+ d. Compare: divergent if gap > 0.15 AND confidence ≥ 0.5
127
+ e. Write DimensionResult to audit_scores table
128
+ 3. Return list of DimensionResult
129
+ ```
130
+
131
+ ### Reconciliation
132
+
133
+ ```
134
+ 1. Dr. Ders / scheduler triggers daily run at 3:00 AM
135
+ 2. Run all dimensions up to daily tier
136
+ 3. For each dimension with divergence=1 AND acknowledged=0:
137
+ a. Push SSE alert (if dashboard is running)
138
+ b. Flag in smoke test output
139
+ 4. Check ratchet targets — flag any below floor
140
+ 5. Aggregate user feedback into sidecar
141
+ ```
142
+
143
+ ### Feedback Loop
144
+
145
+ ```
146
+ 1. User submits feedback (stars/slider + text)
147
+ 2. Score stored in user_feedback table
148
+ 3. (Optional) Text classified by LLM into dimensions
149
+ 4. Daily reconciliation aggregates into sidecar per-dimension
150
+ 5. UX confidence adjusts: min(0.75, 0.30 + feedback_count/100 * 0.45)
151
+ ```
152
+
153
+ ## File Layout
154
+
155
+ ```
156
+ two-brain-audit/
157
+ ├── src/two_brain_audit/
158
+ │ ├── __init__.py # Public API exports
159
+ │ ├── engine.py # AuditEngine, Dimension, DimensionResult
160
+ │ ├── db.py # SQLite storage (audit_scores, user_feedback)
161
+ │ ├── sidecar.py # JSON sidecar read/write
162
+ │ ├── grades.py # Grade ↔ score conversion
163
+ │ ├── tiers.py # Tier enum + scheduling
164
+ │ ├── reconciler.py # Weekly merge, ratchet check, status classify
165
+ │ ├── feedback.py # LLMClassifier protocol, conversion helpers
166
+ │ ├── cli.py # CLI entry point
167
+ │ ├── dashboard/ # Optional Flask blueprint
168
+ │ └── exporters/ # JSON, CSV, Markdown report generators
169
+ ├── presets/ # Dimension configs per project type
170
+ │ ├── python_project.py # 8 dimensions for Python repos
171
+ │ ├── api_service.py # 8 dimensions for REST APIs
172
+ │ ├── database.py # 7 dimensions for database health
173
+ │ ├── infrastructure.py # 8 dimensions for DevOps/infra
174
+ │ └── ml_pipeline.py # 7 dimensions for ML workflows
175
+ ├── integrations/ # Pluggable external data sources
176
+ │ ├── github.py # CI status, open bugs, stale PRs
177
+ │ ├── semgrep.py # SAST security scanning
178
+ │ ├── pypi.py # Dependency version drift
179
+ │ └── ollama.py # Local model health
180
+ ├── examples/biged/ # Reference implementation (12 dimensions)
181
+ ├── tests/ # pytest test suite
182
+ ├── docs/ # This file + future guides
183
+ ├── pyproject.toml # Build config, deps, tool settings
184
+ └── README.md # User-facing documentation
185
+ ```
186
+
187
+ ## Origin
188
+
189
+ Extracted from BigEd CC (`github.com/maxtheman/Education`) after production use on a 125-skill AI fleet. The BigEd implementation lives in `fleet/audit_scorer.py` (1,088 lines) and uses all 12 dimensions across 4 tiers with Dr. Ders scheduling daily/weekly runs.
190
+
191
+ The extraction preserves the battle-tested core while making it configurable for any project type.