tokencast 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tokencast-0.1.0/.claude/commands/tokencostscope-version.md +1 -0
- tokencast-0.1.0/.claude/settings.json +53 -0
- tokencast-0.1.0/.claude/settings.local.json +46 -0
- tokencast-0.1.0/.claude/worktrees/pr-review-loop-modeling/.claude/settings.local.json +7 -0
- tokencast-0.1.0/.github/workflows/sync-wiki.yml +48 -0
- tokencast-0.1.0/.gitignore +4 -0
- tokencast-0.1.0/CLAUDE.md +83 -0
- tokencast-0.1.0/LICENSE +21 -0
- tokencast-0.1.0/PKG-INFO +197 -0
- tokencast-0.1.0/README.md +172 -0
- tokencast-0.1.0/ROADMAP.md +190 -0
- tokencast-0.1.0/SKILL.md +492 -0
- tokencast-0.1.0/assets/tokencast-logo.svg +48 -0
- tokencast-0.1.0/calibration/.gitkeep +0 -0
- tokencast-0.1.0/calibration/.midcheck-state +2 -0
- tokencast-0.1.0/calibration/last-estimate.md +17 -0
- tokencast-0.1.0/calibration/planning/cache-write-modeling-architecture.md +336 -0
- tokencast-0.1.0/calibration/planning/cache-write-modeling-plan-final.md +396 -0
- tokencast-0.1.0/calibration/planning/cache-write-modeling-plan-v1.md +231 -0
- tokencast-0.1.0/calibration/planning/cache-write-modeling-requirements.md +241 -0
- tokencast-0.1.0/calibration/planning/cache-write-modeling-research.md +437 -0
- tokencast-0.1.0/calibration/planning/cache-write-modeling-staff-review.md +124 -0
- tokencast-0.1.0/calibration/planning/file-size-architecture.md +350 -0
- tokencast-0.1.0/calibration/planning/file-size-plan-final.md +1161 -0
- tokencast-0.1.0/calibration/planning/file-size-plan-v1.md +675 -0
- tokencast-0.1.0/calibration/planning/file-size-requirements.md +398 -0
- tokencast-0.1.0/calibration/planning/file-size-research.md +346 -0
- tokencast-0.1.0/calibration/planning/per-step-factors-architecture.md +439 -0
- tokencast-0.1.0/calibration/planning/per-step-factors-plan-final.md +771 -0
- tokencast-0.1.0/calibration/planning/per-step-factors-plan-v1.md +593 -0
- tokencast-0.1.0/calibration/planning/per-step-factors-requirements.md +231 -0
- tokencast-0.1.0/calibration/planning/per-step-factors-research.md +299 -0
- tokencast-0.1.0/calibration/planning/per-step-factors-staff-review.md +200 -0
- tokencast-0.1.0/calibration/planning/v1.6-architecture-decision.md +436 -0
- tokencast-0.1.0/calibration/planning/v1.6-plan-final.md +1395 -0
- tokencast-0.1.0/calibration/planning/v1.6-plan-v1.md +1223 -0
- tokencast-0.1.0/calibration/planning/v2.1-architecture.md +337 -0
- tokencast-0.1.0/calibration/planning/v2.1-plan-final.md +588 -0
- tokencast-0.1.0/calibration/planning/v2.1-plan-initial.md +443 -0
- tokencast-0.1.0/calibration/planning/v2.1-requirements.md +114 -0
- tokencast-0.1.0/calibration/planning/v2.1-research.md +449 -0
- tokencast-0.1.0/calibration/planning/v2.1-review.md +156 -0
- tokencast-0.1.0/commands/tokencast-version.md +10 -0
- tokencast-0.1.0/docs/architecture-v1.7-v2.0.md +605 -0
- tokencast-0.1.0/docs/enterprise-strategy-adversarial-report.md +456 -0
- tokencast-0.1.0/docs/enterprise-strategy-review-questions.md +131 -0
- tokencast-0.1.0/docs/enterprise-strategy-v2.md +491 -0
- tokencast-0.1.0/docs/enterprise-strategy.md +235 -0
- tokencast-0.1.0/docs/phase-1-pm-review.md +484 -0
- tokencast-0.1.0/docs/phase-1b-mcp-stories.md +510 -0
- tokencast-0.1.0/docs/phase-1c-attribution-stories.md +369 -0
- tokencast-0.1.0/docs/plan-v1.7-v2.0-final.md +1253 -0
- tokencast-0.1.0/docs/plan-v1.7-v2.0-initial.md +785 -0
- tokencast-0.1.0/docs/requirements-v1.7-v2.0.md +254 -0
- tokencast-0.1.0/docs/superpowers/plans/2026-03-15-parallel-agent-accounting.md +817 -0
- tokencast-0.1.0/docs/superpowers/specs/2026-03-15-parallel-agent-accounting-design.md +216 -0
- tokencast-0.1.0/docs/wiki/Calibration.md +144 -0
- tokencast-0.1.0/docs/wiki/Configuration.md +242 -0
- tokencast-0.1.0/docs/wiki/Home.md +55 -0
- tokencast-0.1.0/docs/wiki/How-It-Works.md +262 -0
- tokencast-0.1.0/docs/wiki/Installation.md +71 -0
- tokencast-0.1.0/docs/wiki/Roadmap.md +93 -0
- tokencast-0.1.0/pyproject.toml +36 -0
- tokencast-0.1.0/references/calibration-algorithm.md +302 -0
- tokencast-0.1.0/references/examples.md +430 -0
- tokencast-0.1.0/references/heuristics.md +264 -0
- tokencast-0.1.0/references/pricing.md +49 -0
- tokencast-0.1.0/scripts/calibration_store.py +112 -0
- tokencast-0.1.0/scripts/disable.sh +86 -0
- tokencast-0.1.0/scripts/install-hooks.sh +131 -0
- tokencast-0.1.0/scripts/parse_last_estimate.py +205 -0
- tokencast-0.1.0/scripts/sum-session-tokens.py +422 -0
- tokencast-0.1.0/scripts/tokencast-agent-hook.sh +129 -0
- tokencast-0.1.0/scripts/tokencast-learn.sh +260 -0
- tokencast-0.1.0/scripts/tokencast-midcheck.sh +190 -0
- tokencast-0.1.0/scripts/tokencast-status.py +921 -0
- tokencast-0.1.0/scripts/tokencast-track.sh +51 -0
- tokencast-0.1.0/scripts/update-factors.py +344 -0
- tokencast-0.1.0/src/tokencast/__init__.py +3 -0
- tokencast-0.1.0/tests/test_agent_hook.py +1176 -0
- tokencast-0.1.0/tests/test_cache_write_modeling.py +470 -0
- tokencast-0.1.0/tests/test_calibration_store.py +279 -0
- tokencast-0.1.0/tests/test_continuation_session.py +583 -0
- tokencast-0.1.0/tests/test_decay_weighting.py +349 -0
- tokencast-0.1.0/tests/test_file_size_awareness.py +651 -0
- tokencast-0.1.0/tests/test_midcheck.py +499 -0
- tokencast-0.1.0/tests/test_parallel_agent_accounting.py +577 -0
- tokencast-0.1.0/tests/test_per_step_factors.py +998 -0
- tokencast-0.1.0/tests/test_pr_review_loop.py +395 -0
- tokencast-0.1.0/tests/test_signature_factors.py +347 -0
- tokencast-0.1.0/tests/test_status_analysis.py +723 -0
- tokencast-0.1.0/tests/test_update_factors_excluded.py +181 -0
|
@@ -0,0 +1 @@
|
|
|
1
|
+
/Volumes/Macintosh HD2/Cowork/Projects/costscope/commands/tokencostscope-version.md
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
{
|
|
2
|
+
"hooks": {
|
|
3
|
+
"Stop": [
|
|
4
|
+
{
|
|
5
|
+
"hooks": [
|
|
6
|
+
{
|
|
7
|
+
"type": "command",
|
|
8
|
+
"command": "bash '/Volumes/Macintosh HD2/Cowork/Projects/costscope/scripts/tokencast-learn.sh'"
|
|
9
|
+
}
|
|
10
|
+
]
|
|
11
|
+
}
|
|
12
|
+
],
|
|
13
|
+
"PostToolUse": [
|
|
14
|
+
{
|
|
15
|
+
"matcher": "Agent",
|
|
16
|
+
"hooks": [
|
|
17
|
+
{
|
|
18
|
+
"type": "command",
|
|
19
|
+
"command": "bash '/Volumes/Macintosh HD2/Cowork/Projects/costscope/scripts/tokencast-track.sh'"
|
|
20
|
+
}
|
|
21
|
+
]
|
|
22
|
+
},
|
|
23
|
+
{
|
|
24
|
+
"matcher": "Agent",
|
|
25
|
+
"hooks": [
|
|
26
|
+
{
|
|
27
|
+
"type": "command",
|
|
28
|
+
"command": "bash '/Volumes/Macintosh HD2/Cowork/Projects/costscope/scripts/tokencast-agent-hook.sh'"
|
|
29
|
+
}
|
|
30
|
+
]
|
|
31
|
+
}
|
|
32
|
+
],
|
|
33
|
+
"PreToolUse": [
|
|
34
|
+
{
|
|
35
|
+
"hooks": [
|
|
36
|
+
{
|
|
37
|
+
"type": "command",
|
|
38
|
+
"command": "bash '/Volumes/Macintosh HD2/Cowork/Projects/costscope/scripts/tokencast-midcheck.sh'"
|
|
39
|
+
}
|
|
40
|
+
]
|
|
41
|
+
},
|
|
42
|
+
{
|
|
43
|
+
"matcher": "Agent",
|
|
44
|
+
"hooks": [
|
|
45
|
+
{
|
|
46
|
+
"type": "command",
|
|
47
|
+
"command": "bash '/Volumes/Macintosh HD2/Cowork/Projects/costscope/scripts/tokencast-agent-hook.sh'"
|
|
48
|
+
}
|
|
49
|
+
]
|
|
50
|
+
}
|
|
51
|
+
]
|
|
52
|
+
}
|
|
53
|
+
}
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
{
|
|
2
|
+
"permissions": {
|
|
3
|
+
"allow": [
|
|
4
|
+
"mcp__claude_ai_Gmail__gmail_search_messages",
|
|
5
|
+
"mcp__claude_ai_Gmail__gmail_read_message",
|
|
6
|
+
"Bash(bash:*)",
|
|
7
|
+
"Bash(python3:*)",
|
|
8
|
+
"Bash(gh pr:*)",
|
|
9
|
+
"Skill(tokencostscope)",
|
|
10
|
+
"Skill(commit-commands:commit-push-pr)",
|
|
11
|
+
"Bash(/usr/bin/python3 -m py_compile scripts/update-factors.py)",
|
|
12
|
+
"mcp__codebase-memory-mcp__list_projects",
|
|
13
|
+
"mcp__codebase-memory-mcp__index_repository",
|
|
14
|
+
"Bash(xargs -0 ls -t)",
|
|
15
|
+
"Bash(/usr/bin/python3 \"/Volumes/Macintosh HD2/Cowork/Projects/costscope/scripts/sum-session-tokens.py\" /Users/kellyl./.claude/projects/-Volumes-Macintosh-HD2-Cowork-Projects-costscope/b9eb66bb-94ad-4e79-aa6d-c99f1b9bffba.jsonl 0)",
|
|
16
|
+
"WebSearch",
|
|
17
|
+
"Bash(/usr/bin/python3 -c \"import ast; ast.parse\\(open\\(''/Volumes/Macintosh HD2/Cowork/Projects/costscope/tests/test_decay_weighting.py''\\).read\\(\\)\\); print\\(''OK: test_decay_weighting.py''\\)\")",
|
|
18
|
+
"Bash(/usr/bin/python3 -c \"import ast; ast.parse\\(open\\(''/Volumes/Macintosh HD2/Cowork/Projects/costscope/tests/test_signature_factors.py''\\).read\\(\\)\\); print\\(''OK: test_signature_factors.py''\\)\")",
|
|
19
|
+
"Bash(/usr/bin/python3 -c \"import ast; ast.parse\\(open\\(''/Volumes/Macintosh HD2/Cowork/Projects/costscope/tests/test_midcheck.py''\\).read\\(\\)\\); print\\(''OK: test_midcheck.py''\\)\")",
|
|
20
|
+
"Bash(/usr/bin/python3 -c \"import sys; sys.path.insert\\(0, ''scripts''\\); from update_factors import compute_decay_weights, compute_ewma, trimmed_mean, update_factors; print\\(''imports OK''\\)\")",
|
|
21
|
+
"Bash(chmod:*)",
|
|
22
|
+
"Bash(/usr/bin/python3 -c \":*)",
|
|
23
|
+
"Bash(timeout 1 cat)",
|
|
24
|
+
"Bash(/usr/bin/python3 -c \"import ast; ast.parse\\(open\\('/Volumes/Macintosh HD2/Cowork/Projects/costscope/tests/test_midcheck.py'\\).read\\(\\)\\); print\\('OK'\\)\")",
|
|
25
|
+
"mcp__claude_ai_open-brain__capture_thought",
|
|
26
|
+
"WebFetch(domain:github.com)",
|
|
27
|
+
"Bash(truncate -s 0 \"/Volumes/Macintosh HD2/Cowork/Projects/costscope/calibration/history.jsonl\")",
|
|
28
|
+
"Bash(\"/Volumes/Macintosh HD2/Cowork/Projects/costscope/calibration/factors.json\")",
|
|
29
|
+
"Bash(/usr/bin/python3 \"/Volumes/Macintosh HD2/Cowork/Projects/costscope/scripts/parse_last_estimate.py\" \"/Volumes/Macintosh HD2/Cowork/Projects/costscope/calibration/last-estimate.md\" 2>&1)",
|
|
30
|
+
"Bash(/usr/bin/python3 \"/Volumes/Macintosh HD2/Cowork/Projects/costscope/scripts/parse_last_estimate.py\" \"/Volumes/Macintosh HD2/Cowork/Projects/costscope/calibration/last-estimate.md\")",
|
|
31
|
+
"Bash(python3 -c \":*)",
|
|
32
|
+
"Bash(/opt/homebrew/bin/python3 --version)",
|
|
33
|
+
"Bash(/usr/bin/python3 --version)",
|
|
34
|
+
"Bash(ln -s \"/Volumes/Macintosh HD2/Cowork/Projects/costscope\" tokencast)",
|
|
35
|
+
"Bash(rm tokencostscope:*)",
|
|
36
|
+
"Bash(sed -i '' 's/## costscope estimate/## tokencast estimate/g' SKILL.md tests/test_cache_write_modeling.py docs/wiki/Home.md)",
|
|
37
|
+
"Bash(sed -i '' \"s/## costscope estimate \\(v1.x.x\\)/## tokencast estimate \\(v1.x.x\\)/g\" CLAUDE.md)",
|
|
38
|
+
"Bash(cd:*)",
|
|
39
|
+
"Bash(gh repo:*)",
|
|
40
|
+
"Bash(curl:*)",
|
|
41
|
+
"Bash(pip3 install:*)",
|
|
42
|
+
"Bash(pip3 --version)",
|
|
43
|
+
"Bash(/usr/bin/python3 -m build)"
|
|
44
|
+
]
|
|
45
|
+
}
|
|
46
|
+
}
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
name: Sync Wiki
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
branches:
|
|
6
|
+
- main
|
|
7
|
+
paths:
|
|
8
|
+
- "docs/wiki/**"
|
|
9
|
+
workflow_dispatch: # allow manual trigger
|
|
10
|
+
|
|
11
|
+
jobs:
|
|
12
|
+
sync-wiki:
|
|
13
|
+
runs-on: ubuntu-latest
|
|
14
|
+
permissions:
|
|
15
|
+
contents: write
|
|
16
|
+
|
|
17
|
+
steps:
|
|
18
|
+
- name: Checkout repository
|
|
19
|
+
uses: actions/checkout@v4
|
|
20
|
+
|
|
21
|
+
- name: Push docs/wiki to GitHub Wiki
|
|
22
|
+
env:
|
|
23
|
+
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
|
24
|
+
REPO: ${{ github.repository }}
|
|
25
|
+
COMMIT_SHA: ${{ github.sha }}
|
|
26
|
+
run: |
|
|
27
|
+
# Clone the wiki repo (separate git repo at <repo>.wiki.git)
|
|
28
|
+
git clone \
|
|
29
|
+
"https://x-access-token:${GH_TOKEN}@github.com/${REPO}.wiki.git" \
|
|
30
|
+
wiki-repo
|
|
31
|
+
|
|
32
|
+
# Sync docs/wiki/ → wiki repo root (delete pages removed from docs/wiki/)
|
|
33
|
+
rsync -av --delete --exclude='.git' docs/wiki/ wiki-repo/
|
|
34
|
+
|
|
35
|
+
cd wiki-repo
|
|
36
|
+
|
|
37
|
+
git config user.email "github-actions[bot]@users.noreply.github.com"
|
|
38
|
+
git config user.name "github-actions[bot]"
|
|
39
|
+
|
|
40
|
+
git add -A
|
|
41
|
+
|
|
42
|
+
if git diff --staged --quiet; then
|
|
43
|
+
echo "No wiki changes to sync."
|
|
44
|
+
else
|
|
45
|
+
git commit -m "Sync wiki from docs/wiki @ ${COMMIT_SHA}"
|
|
46
|
+
git push
|
|
47
|
+
echo "Wiki synced successfully."
|
|
48
|
+
fi
|
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
# tokencast
|
|
2
|
+
|
|
3
|
+
A Claude Code skill that automatically estimates Anthropic API token costs when a development plan is created, and learns from actual usage over time to improve accuracy via calibration factors.
|
|
4
|
+
|
|
5
|
+
## Repo
|
|
6
|
+
|
|
7
|
+
- GitHub: `krulewis/tokencast`
|
|
8
|
+
- Current version: 2.1.0
|
|
9
|
+
|
|
10
|
+
## Key Files
|
|
11
|
+
|
|
12
|
+
| Path | Purpose |
|
|
13
|
+
|------|---------|
|
|
14
|
+
| `SKILL.md` | Skill definition — activation rules, calculation algorithm, output template |
|
|
15
|
+
| `references/heuristics.md` | Token budgets, pipeline step decompositions, complexity multipliers, parallel discount parameters — all tunable parameters live here |
|
|
16
|
+
| `references/pricing.md` | Model pricing per million tokens, cache rates, step→model mapping |
|
|
17
|
+
| `references/calibration-algorithm.md` | Calibration algorithm documentation |
|
|
18
|
+
| `references/examples.md` | Worked estimation examples |
|
|
19
|
+
| `scripts/tokencast-learn.sh` | Stop hook — reads session JSONL at end of session, computes actuals, calls update-factors.py |
|
|
20
|
+
| `scripts/tokencast-midcheck.sh` | PreToolUse hook for mid-session cost warnings — checks spend vs pessimistic estimate |
|
|
21
|
+
| `scripts/update-factors.py` | Computes and persists calibration factors from completed session data |
|
|
22
|
+
| `scripts/sum-session-tokens.py` | Parses session JSONL to sum token costs |
|
|
23
|
+
| `calibration/` | Calibration data directory — gitignored; contains `active-estimate.json` and `factors.json` |
|
|
24
|
+
| `tests/test_pr_review_loop.py` | Tests for PR Review Loop cost modeling |
|
|
25
|
+
| `tests/test_parallel_agent_accounting.py` | Tests for parallel agent cost discounting |
|
|
26
|
+
| `tests/test_file_size_awareness.py` | Tests for file size bracket computation and auto-measurement |
|
|
27
|
+
| `docs/wiki/` | GitHub wiki source — Home, How-It-Works, Installation, Configuration, Calibration, Roadmap |
|
|
28
|
+
| `README.md` | Repo root README (not inside `.claude/skills/tokencast/`) |
|
|
29
|
+
|
|
30
|
+
## Test Commands
|
|
31
|
+
|
|
32
|
+
```bash
|
|
33
|
+
# Run all tests — use system Python 3.9 which has pytest
|
|
34
|
+
/usr/bin/python3 -m pytest tests/
|
|
35
|
+
|
|
36
|
+
# Run a specific test file
|
|
37
|
+
/usr/bin/python3 -m pytest tests/test_pr_review_loop.py
|
|
38
|
+
|
|
39
|
+
# Run with verbose output
|
|
40
|
+
/usr/bin/python3 -m pytest tests/ -v
|
|
41
|
+
```
|
|
42
|
+
|
|
43
|
+
**Do NOT use `pytest` or `python3 -m pytest` directly.** Homebrew `python3` resolves to 3.14 which does NOT have pytest. Always use `/usr/bin/python3`.
|
|
44
|
+
|
|
45
|
+
## Architecture Conventions
|
|
46
|
+
|
|
47
|
+
- **All tunable parameters live in `references/heuristics.md`** — not hardcoded in SKILL.md. This includes complexity multipliers, band multipliers, parallel discount factors, cache rate floors, review cycle defaults, decay halflife, per-signature min samples, and midcheck parameters.
|
|
48
|
+
- **Time-decay constants:** `DECAY_HALFLIFE_DAYS = 30` in `update-factors.py` mirrors `decay_halflife_days` in `references/heuristics.md`. `DECAY_MIN_RECORDS = 5` (cold-start guard) is hardcoded in `update-factors.py` and intentionally NOT in heuristics.md — it is a statistical invariant, not user-tunable.
|
|
49
|
+
- **Per-signature factors:** Pass 5 of `update-factors.py` computes per-signature factors from signature-normalized step arrays. Signatures are derived at Pass 1 read time and stored as a private `_canonical_sig` field. In `factors.json`, they live under `signature_factors` and are read with `.get('signature_factors', {})` default for backward compatibility.
|
|
50
|
+
- **Mid-session check:** `tokencast-midcheck.sh` is a PreToolUse hook. It reads `active-estimate.json` and the session JSONL to compute actual spend, then writes state to `calibration/.midcheck-state` (ephemeral, gitignored). Hook is fail-silent via `set -euo pipefail` + `|| exit 0` — failures do not interrupt your work. State file format: two lines — last-checked byte size and cooldown sentinel (`0` or `COOLDOWN:<size>`).
|
|
51
|
+
- **Pipeline signature derivation:** Not written to `active-estimate.json`. SKILL.md Step 3e derives it inline from the `steps` array using the same normalization formula as `learn.sh` line 38.
|
|
52
|
+
- **Shell injection safety** — `learn.sh` and `midcheck.sh` use `shlex.quote()` and env vars pattern to pass data to Python. Never interpolate user-derived strings directly into shell commands.
|
|
53
|
+
- **`active-estimate.json` is the handshake** between estimation (SKILL.md writes it at estimate time) and learning (learn.sh reads it at session end). Schema changes must be backward compatible.
|
|
54
|
+
- **Backward compatibility** — new fields in `active-estimate.json` and `factors.json` schemas use `.get()` defaults in Python so old files don't break newer scripts.
|
|
55
|
+
- **File size brackets** — when file paths are extractable from the plan and files exist on disk, tokencast auto-measures via batched `wc -l` (cap: 30 files). Three brackets: small (≤49 lines) = 3k/1k tokens (read/edit), medium (50–500) = 10k/2.5k, large (≥501) = 20k/5k. Fixed-count file reads in all steps use the weighted-average bracket. Override: `avg_file_lines=N`. Unmeasured files fall back to override bracket or medium default.
|
|
56
|
+
- **`file_brackets` in active-estimate.json** — stores aggregate bracket counts (not per-file data) for future calibration stratification. Schema: `{"small": N, "medium": N, "large": N}` or null. `null` means no paths extracted (not the same as `{"small":0,"medium":0,"large":0}` which means paths extracted but none measurable).
|
|
57
|
+
- **Version string must be consistent** across three places: `SKILL.md` frontmatter (`version:`), output template header (`## tokencast estimate (v1.x.x)`), and `learn.sh` `VERSION` variable. Always update all three together.
|
|
58
|
+
- **PR Review Loop calibration** applies the factor independently to each band (not re-anchored as fixed ratios of calibrated Expected) — this preserves the decay model's per-band cycle counts.
|
|
59
|
+
- **Step 3.5 runs post-step-loop** — the PR Review Loop row computation happens after all individual pipeline steps complete Steps 3a–3e, not inline. Cache each constituent step's pre-discount cost during the per-step loop.
|
|
60
|
+
- **Parallel discount does NOT apply to PR Review Loop C value** — `C` uses undiscounted step costs even when constituent steps were modeled as parallel.
|
|
61
|
+
|
|
62
|
+
## Memory / Docs Update Paths
|
|
63
|
+
|
|
64
|
+
When completing work, the `docs-updater` agent should update:
|
|
65
|
+
- `docs/wiki/` — whichever wiki pages cover the changed functionality
|
|
66
|
+
- `MEMORY.md` at `/Users/kellyl./.claude/projects/-Volumes-Macintosh-HD2-Cowork-Projects-costscope/memory/MEMORY.md`
|
|
67
|
+
- `ROADMAP.md` if version or milestone status changed
|
|
68
|
+
|
|
69
|
+
## Project-Specific Estimate Overrides
|
|
70
|
+
|
|
71
|
+
- **`review_cycles=4`** — use this override when running `/tokencast` for tokencast changes. The global `heuristics.md` default of 2 is too low for this project; historical data across 5 sessions averages 4–5 passes (v1.3: 5, v1.5: 4, v1.6: 3, v1.7+v2.0: 4, v2.1: 11).
|
|
72
|
+
|
|
73
|
+
## Gotchas
|
|
74
|
+
|
|
75
|
+
- **Paths with spaces** — always quote shell paths; use `-print0 | xargs -0` for `find` pipelines. The repo lives at `/Volumes/Macintosh HD2/Cowork/Projects/costscope` — the space in "Macintosh HD2" will break unquoted shell commands.
|
|
76
|
+
- **macOS volume path** — `/Volumes/Macintosh HD2/...` is the working directory; scripts run from there will have the space in the absolute path.
|
|
77
|
+
- **Worktree working directory** — if using git worktrees, the working dir differs from the main repo root. Use absolute paths.
|
|
78
|
+
- **README.md location** — `README.md` is in the repo root (`/Volumes/Macintosh HD2/Cowork/Projects/costscope/README.md`), not inside `.claude/skills/tokencast/`.
|
|
79
|
+
- **`calibration/` is gitignored** — do not commit calibration data. The directory may not exist on a fresh clone; scripts must handle its absence gracefully.
|
|
80
|
+
|
|
81
|
+
---
|
|
82
|
+
|
|
83
|
+
<!-- Global pipeline, workflow, agent delegation, and codebase-memory rules are in ~/.claude/CLAUDE.md — loaded automatically every session. No need to duplicate here. -->
|
tokencast-0.1.0/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Kelly Lewis
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
tokencast-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,197 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: tokencast
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Pre-execution cost estimation for LLM agent workflows with calibration learning
|
|
5
|
+
Project-URL: Homepage, https://github.com/krulewis/tokencast
|
|
6
|
+
Project-URL: Documentation, https://github.com/krulewis/tokencast/wiki
|
|
7
|
+
Project-URL: Repository, https://github.com/krulewis/tokencast
|
|
8
|
+
Project-URL: Issues, https://github.com/krulewis/tokencast/issues
|
|
9
|
+
Author-email: Kelly Lewis <krulewis@users.noreply.github.com>
|
|
10
|
+
License-Expression: MIT
|
|
11
|
+
License-File: LICENSE
|
|
12
|
+
Keywords: agent,calibration,cost-estimation,llm,mcp,token-cost
|
|
13
|
+
Classifier: Development Status :: 2 - Pre-Alpha
|
|
14
|
+
Classifier: Intended Audience :: Developers
|
|
15
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
16
|
+
Classifier: Programming Language :: Python :: 3
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
20
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
21
|
+
Classifier: Topic :: Software Development :: Libraries
|
|
22
|
+
Classifier: Topic :: Software Development :: Quality Assurance
|
|
23
|
+
Requires-Python: >=3.9
|
|
24
|
+
Description-Content-Type: text/markdown
|
|
25
|
+
|
|
26
|
+
<p align="center">
|
|
27
|
+
<img src="assets/tokencast-logo.svg" alt="tokencast logo" width="150">
|
|
28
|
+
</p>
|
|
29
|
+
|
|
30
|
+
# tokencast
|
|
31
|
+
|
|
32
|
+
A Claude Code skill that estimates Anthropic API cost for planned agent tasks, then **learns from actual usage** to improve estimates over time.
|
|
33
|
+
|
|
34
|
+
Install once per project. It auto-estimates after plans are created and auto-learns at session end. Zero ongoing friction.
|
|
35
|
+
|
|
36
|
+
## Setup (one time per project)
|
|
37
|
+
|
|
38
|
+
```bash
|
|
39
|
+
# Clone the repo (anywhere — it doesn't need to live inside your project)
|
|
40
|
+
git clone https://github.com/krulewis/tokencast.git
|
|
41
|
+
|
|
42
|
+
# Install into your project (quote paths with spaces)
|
|
43
|
+
bash tokencast/scripts/install-hooks.sh "/path/to/your-project"
|
|
44
|
+
```
|
|
45
|
+
|
|
46
|
+
> **Paths with spaces:** Always wrap the project path in quotes. Without them the install script will fail on paths like `/Volumes/Macintosh HD2/...`.
|
|
47
|
+
|
|
48
|
+
This does three things:
|
|
49
|
+
1. Symlinks the skill into `<project>/.claude/skills/tokencast/`
|
|
50
|
+
2. Adds a `Stop` hook for auto-learning at session end
|
|
51
|
+
3. Adds a `PostToolUse` hook to nudge estimation after planning agents
|
|
52
|
+
|
|
53
|
+
Every Claude Code session in that project now has tokencast active.
|
|
54
|
+
|
|
55
|
+
## What Happens Automatically
|
|
56
|
+
|
|
57
|
+
### After a plan is created
|
|
58
|
+
tokencast detects the plan in conversation context, infers size, files, complexity, project type, and language, then outputs a cost table:
|
|
59
|
+
|
|
60
|
+
```
|
|
61
|
+
## tokencast estimate
|
|
62
|
+
|
|
63
|
+
Change: size=M, files=5, complexity=medium
|
|
64
|
+
Calibration: 1.12x from 8 prior runs
|
|
65
|
+
|
|
66
|
+
| Step | Model | Optimistic | Expected | Pessimistic |
|
|
67
|
+
|-----------------------|--------|------------|----------|-------------|
|
|
68
|
+
| Research Agent | Sonnet | $0.60 | $1.17 | $4.47 |
|
|
69
|
+
| Architect Agent | Opus | $0.67 | $1.18 | $3.97 |
|
|
70
|
+
| ... | ... | ... | ... | ... |
|
|
71
|
+
| TOTAL | | $3.37 | $6.26 | $22.64 |
|
|
72
|
+
```
|
|
73
|
+
|
|
74
|
+
### At session end
|
|
75
|
+
The learning hook silently:
|
|
76
|
+
1. Reads the session's JSONL log
|
|
77
|
+
2. Computes actual token cost (including cache write tokens)
|
|
78
|
+
3. Compares to the estimate
|
|
79
|
+
4. Updates calibration factors
|
|
80
|
+
|
|
81
|
+
### Next session
|
|
82
|
+
Future estimates use learned correction factors. More sessions = better accuracy.
|
|
83
|
+
|
|
84
|
+
## Manual Invocation
|
|
85
|
+
|
|
86
|
+
You can also invoke explicitly with overrides:
|
|
87
|
+
|
|
88
|
+
```
|
|
89
|
+
/tokencast size=L files=12 complexity=high
|
|
90
|
+
/tokencast steps=implement,test,qa
|
|
91
|
+
/tokencast review_cycles=3
|
|
92
|
+
/tokencast review_cycles=0
|
|
93
|
+
```
|
|
94
|
+
|
|
95
|
+
Use `review_cycles=N` to set the number of expected PR review cycles. Use `review_cycles=0` to suppress the PR Review Loop row.
|
|
96
|
+
|
|
97
|
+
## How It Works
|
|
98
|
+
|
|
99
|
+
1. Infers size, file count, complexity from the plan in conversation
|
|
100
|
+
2. Reads reference files for pricing and token heuristics
|
|
101
|
+
3. Loads learned calibration factors (if any exist)
|
|
102
|
+
4. Computes per-step token estimates using activity decomposition
|
|
103
|
+
5. Applies complexity multiplier, context accumulation `(K+1)/2`, and cache rates
|
|
104
|
+
6. Splits into Optimistic / Expected / Pessimistic bands
|
|
105
|
+
7. If PR Review Loop is in scope, computes loop cost using geometric decay across N review cycles (Optimistic=1, Expected=N, Pessimistic=N×2)
|
|
106
|
+
8. Applies calibration correction to Expected band (individual steps re-anchor; PR Review Loop scales each band independently)
|
|
107
|
+
9. Records the estimate for later comparison with actuals
|
|
108
|
+
|
|
109
|
+
## Overrides
|
|
110
|
+
|
|
111
|
+
| Override | Effect |
|
|
112
|
+
|----------|--------|
|
|
113
|
+
| `size=M` | Set size class explicitly |
|
|
114
|
+
| `files=5` | Set file count explicitly |
|
|
115
|
+
| `complexity=high` | Set complexity explicitly |
|
|
116
|
+
| `steps=implement,test,qa` | Estimate only those pipeline steps |
|
|
117
|
+
| `project_type=migration` | Set project type explicitly |
|
|
118
|
+
| `language=go` | Set primary language explicitly |
|
|
119
|
+
| `review_cycles=3` | Set PR review cycle count (0 = disable) |
|
|
120
|
+
|
|
121
|
+
## Confidence Bands
|
|
122
|
+
|
|
123
|
+
| Band | Cache Hit | Multiplier | Meaning |
|
|
124
|
+
|-------------|-----------|------------|----------------------------------------|
|
|
125
|
+
| Optimistic | 60% | 0.6x | Best case — focused agent work |
|
|
126
|
+
| Expected | 50% | 1.0x | Typical run |
|
|
127
|
+
| Pessimistic | 30% | 3.0x | With rework loops, debugging, retries |
|
|
128
|
+
|
|
129
|
+
## Calibration
|
|
130
|
+
|
|
131
|
+
Calibration is fully automatic:
|
|
132
|
+
- **0-2 sessions:** No correction applied. "Collecting data" status.
|
|
133
|
+
- **3-10 sessions:** Global correction factor via trimmed mean of actual/expected ratios (trim_fraction=0.1).
|
|
134
|
+
- **10+ sessions:** EWMA with recency weighting. Per-size-class factors activate when a class has 3+ samples.
|
|
135
|
+
- **Outlier filtering:** Sessions with actual/expected ratio >3.0x or <0.2x are excluded from calibration and logged for inspection.
|
|
136
|
+
|
|
137
|
+
Calibration data lives in `calibration/` (gitignored, local to each user).
|
|
138
|
+
|
|
139
|
+
## Disabling
|
|
140
|
+
|
|
141
|
+
```bash
|
|
142
|
+
bash /path/to/tokencast/scripts/disable.sh /path/to/your-project
|
|
143
|
+
```
|
|
144
|
+
|
|
145
|
+
Removes the skill and hooks. Preserves calibration data for reuse.
|
|
146
|
+
|
|
147
|
+
## Files
|
|
148
|
+
|
|
149
|
+
```
|
|
150
|
+
SKILL.md — Skill definition (auto-trigger, algorithm)
|
|
151
|
+
references/pricing.md — Model prices, cache rates, step→model map
|
|
152
|
+
references/heuristics.md — Token budgets, pipeline decompositions, multipliers
|
|
153
|
+
references/examples.md — Worked examples with arithmetic
|
|
154
|
+
references/calibration-algorithm.md — Detailed calibration algorithm reference
|
|
155
|
+
commands/
|
|
156
|
+
tokencast-version.md — /tokencast-version slash command
|
|
157
|
+
scripts/
|
|
158
|
+
install-hooks.sh — One-time project setup
|
|
159
|
+
disable.sh — Remove from project
|
|
160
|
+
tokencast-learn.sh — Stop hook: auto-captures actuals
|
|
161
|
+
tokencast-track.sh — PostToolUse hook: nudges estimation after plans
|
|
162
|
+
sum-session-tokens.py — Parses session JSONL for actual costs
|
|
163
|
+
update-factors.py — Computes calibration factors from history
|
|
164
|
+
calibration/ — Per-user local data (gitignored)
|
|
165
|
+
history.jsonl — Estimate vs actual records
|
|
166
|
+
factors.json — Learned correction factors
|
|
167
|
+
active-estimate.json — Transient marker for current estimate
|
|
168
|
+
```
|
|
169
|
+
|
|
170
|
+
## v1.1 Changes
|
|
171
|
+
|
|
172
|
+
- **Trimmed mean** replaces median for faster convergence with small samples
|
|
173
|
+
- **Outlier flagging** — extreme ratios (>3.0x or <0.2x) excluded from calibration, logged for inspection
|
|
174
|
+
- **Richer data** — project type, language, pipeline signature, and step count captured per session
|
|
175
|
+
- **Baseline subtraction** — tokens spent before the estimate are excluded from actuals
|
|
176
|
+
- **Security hardening** — path injection fixes, consolidated parsing, safe handling of paths with spaces
|
|
177
|
+
- **Version markers** — `version: 1.1.0` in SKILL.md, `--version` flag on learn script
|
|
178
|
+
|
|
179
|
+
## v1.2 Changes
|
|
180
|
+
|
|
181
|
+
- **PR Review Loop modeling** — geometric-decay cost model for review-fix-re-review cycles
|
|
182
|
+
- **New override** — `review_cycles=N` to set expected cycle count (0 = disable)
|
|
183
|
+
- **Per-band calibration** — PR Review Loop applies calibration independently per band (not re-anchored)
|
|
184
|
+
- **New schema fields** — `review_cycles_estimated` and `review_cycles_actual` in active-estimate.json
|
|
185
|
+
|
|
186
|
+
## Limitations
|
|
187
|
+
|
|
188
|
+
- Pipeline step names reflect a default workflow — map your own steps to the closest defaults. Formulas are pipeline-agnostic (see `references/heuristics.md`)
|
|
189
|
+
- Heuristics assume typical 150-300 line source files
|
|
190
|
+
- Does not model parallel agent execution
|
|
191
|
+
- Calibration requires 3+ completed sessions before corrections activate
|
|
192
|
+
- Pricing data embedded; check `last_updated` in references/pricing.md
|
|
193
|
+
- Multi-session tasks only capture the session containing the estimate
|
|
194
|
+
|
|
195
|
+
## License
|
|
196
|
+
|
|
197
|
+
MIT
|
|
@@ -0,0 +1,172 @@
|
|
|
1
|
+
<p align="center">
|
|
2
|
+
<img src="assets/tokencast-logo.svg" alt="tokencast logo" width="150">
|
|
3
|
+
</p>
|
|
4
|
+
|
|
5
|
+
# tokencast
|
|
6
|
+
|
|
7
|
+
A Claude Code skill that estimates Anthropic API cost for planned agent tasks, then **learns from actual usage** to improve estimates over time.
|
|
8
|
+
|
|
9
|
+
Install once per project. It auto-estimates after plans are created and auto-learns at session end. Zero ongoing friction.
|
|
10
|
+
|
|
11
|
+
## Setup (one time per project)
|
|
12
|
+
|
|
13
|
+
```bash
|
|
14
|
+
# Clone the repo (anywhere — it doesn't need to live inside your project)
|
|
15
|
+
git clone https://github.com/krulewis/tokencast.git
|
|
16
|
+
|
|
17
|
+
# Install into your project (quote paths with spaces)
|
|
18
|
+
bash tokencast/scripts/install-hooks.sh "/path/to/your-project"
|
|
19
|
+
```
|
|
20
|
+
|
|
21
|
+
> **Paths with spaces:** Always wrap the project path in quotes. Without them the install script will fail on paths like `/Volumes/Macintosh HD2/...`.
|
|
22
|
+
|
|
23
|
+
This does three things:
|
|
24
|
+
1. Symlinks the skill into `<project>/.claude/skills/tokencast/`
|
|
25
|
+
2. Adds a `Stop` hook for auto-learning at session end
|
|
26
|
+
3. Adds a `PostToolUse` hook to nudge estimation after planning agents
|
|
27
|
+
|
|
28
|
+
Every Claude Code session in that project now has tokencast active.
|
|
29
|
+
|
|
30
|
+
## What Happens Automatically
|
|
31
|
+
|
|
32
|
+
### After a plan is created
|
|
33
|
+
tokencast detects the plan in conversation context, infers size, files, complexity, project type, and language, then outputs a cost table:
|
|
34
|
+
|
|
35
|
+
```
|
|
36
|
+
## tokencast estimate
|
|
37
|
+
|
|
38
|
+
Change: size=M, files=5, complexity=medium
|
|
39
|
+
Calibration: 1.12x from 8 prior runs
|
|
40
|
+
|
|
41
|
+
| Step | Model | Optimistic | Expected | Pessimistic |
|
|
42
|
+
|-----------------------|--------|------------|----------|-------------|
|
|
43
|
+
| Research Agent | Sonnet | $0.60 | $1.17 | $4.47 |
|
|
44
|
+
| Architect Agent | Opus | $0.67 | $1.18 | $3.97 |
|
|
45
|
+
| ... | ... | ... | ... | ... |
|
|
46
|
+
| TOTAL | | $3.37 | $6.26 | $22.64 |
|
|
47
|
+
```
|
|
48
|
+
|
|
49
|
+
### At session end
|
|
50
|
+
The learning hook silently:
|
|
51
|
+
1. Reads the session's JSONL log
|
|
52
|
+
2. Computes actual token cost (including cache write tokens)
|
|
53
|
+
3. Compares to the estimate
|
|
54
|
+
4. Updates calibration factors
|
|
55
|
+
|
|
56
|
+
### Next session
|
|
57
|
+
Future estimates use learned correction factors. More sessions = better accuracy.
|
|
58
|
+
|
|
59
|
+
## Manual Invocation
|
|
60
|
+
|
|
61
|
+
You can also invoke explicitly with overrides:
|
|
62
|
+
|
|
63
|
+
```
|
|
64
|
+
/tokencast size=L files=12 complexity=high
|
|
65
|
+
/tokencast steps=implement,test,qa
|
|
66
|
+
/tokencast review_cycles=3
|
|
67
|
+
/tokencast review_cycles=0
|
|
68
|
+
```
|
|
69
|
+
|
|
70
|
+
Use `review_cycles=N` to set the number of expected PR review cycles. Use `review_cycles=0` to suppress the PR Review Loop row.
|
|
71
|
+
|
|
72
|
+
## How It Works
|
|
73
|
+
|
|
74
|
+
1. Infers size, file count, complexity from the plan in conversation
|
|
75
|
+
2. Reads reference files for pricing and token heuristics
|
|
76
|
+
3. Loads learned calibration factors (if any exist)
|
|
77
|
+
4. Computes per-step token estimates using activity decomposition
|
|
78
|
+
5. Applies complexity multiplier, context accumulation `(K+1)/2`, and cache rates
|
|
79
|
+
6. Splits into Optimistic / Expected / Pessimistic bands
|
|
80
|
+
7. If PR Review Loop is in scope, computes loop cost using geometric decay across N review cycles (Optimistic=1, Expected=N, Pessimistic=N×2)
|
|
81
|
+
8. Applies calibration correction to Expected band (individual steps re-anchor; PR Review Loop scales each band independently)
|
|
82
|
+
9. Records the estimate for later comparison with actuals
|
|
83
|
+
|
|
84
|
+
## Overrides
|
|
85
|
+
|
|
86
|
+
| Override | Effect |
|
|
87
|
+
|----------|--------|
|
|
88
|
+
| `size=M` | Set size class explicitly |
|
|
89
|
+
| `files=5` | Set file count explicitly |
|
|
90
|
+
| `complexity=high` | Set complexity explicitly |
|
|
91
|
+
| `steps=implement,test,qa` | Estimate only those pipeline steps |
|
|
92
|
+
| `project_type=migration` | Set project type explicitly |
|
|
93
|
+
| `language=go` | Set primary language explicitly |
|
|
94
|
+
| `review_cycles=3` | Set PR review cycle count (0 = disable) |
|
|
95
|
+
|
|
96
|
+
## Confidence Bands
|
|
97
|
+
|
|
98
|
+
| Band | Cache Hit | Multiplier | Meaning |
|
|
99
|
+
|-------------|-----------|------------|----------------------------------------|
|
|
100
|
+
| Optimistic | 60% | 0.6x | Best case — focused agent work |
|
|
101
|
+
| Expected | 50% | 1.0x | Typical run |
|
|
102
|
+
| Pessimistic | 30% | 3.0x | With rework loops, debugging, retries |
|
|
103
|
+
|
|
104
|
+
## Calibration
|
|
105
|
+
|
|
106
|
+
Calibration is fully automatic:
|
|
107
|
+
- **0-2 sessions:** No correction applied. "Collecting data" status.
|
|
108
|
+
- **3-10 sessions:** Global correction factor via trimmed mean of actual/expected ratios (trim_fraction=0.1).
|
|
109
|
+
- **10+ sessions:** EWMA with recency weighting. Per-size-class factors activate when a class has 3+ samples.
|
|
110
|
+
- **Outlier filtering:** Sessions with actual/expected ratio >3.0x or <0.2x are excluded from calibration and logged for inspection.
|
|
111
|
+
|
|
112
|
+
Calibration data lives in `calibration/` (gitignored, local to each user).
|
|
113
|
+
|
|
114
|
+
## Disabling
|
|
115
|
+
|
|
116
|
+
```bash
|
|
117
|
+
bash /path/to/tokencast/scripts/disable.sh /path/to/your-project
|
|
118
|
+
```
|
|
119
|
+
|
|
120
|
+
Removes the skill and hooks. Preserves calibration data for reuse.
|
|
121
|
+
|
|
122
|
+
## Files
|
|
123
|
+
|
|
124
|
+
```
|
|
125
|
+
SKILL.md — Skill definition (auto-trigger, algorithm)
|
|
126
|
+
references/pricing.md — Model prices, cache rates, step→model map
|
|
127
|
+
references/heuristics.md — Token budgets, pipeline decompositions, multipliers
|
|
128
|
+
references/examples.md — Worked examples with arithmetic
|
|
129
|
+
references/calibration-algorithm.md — Detailed calibration algorithm reference
|
|
130
|
+
commands/
|
|
131
|
+
tokencast-version.md — /tokencast-version slash command
|
|
132
|
+
scripts/
|
|
133
|
+
install-hooks.sh — One-time project setup
|
|
134
|
+
disable.sh — Remove from project
|
|
135
|
+
tokencast-learn.sh — Stop hook: auto-captures actuals
|
|
136
|
+
tokencast-track.sh — PostToolUse hook: nudges estimation after plans
|
|
137
|
+
sum-session-tokens.py — Parses session JSONL for actual costs
|
|
138
|
+
update-factors.py — Computes calibration factors from history
|
|
139
|
+
calibration/ — Per-user local data (gitignored)
|
|
140
|
+
history.jsonl — Estimate vs actual records
|
|
141
|
+
factors.json — Learned correction factors
|
|
142
|
+
active-estimate.json — Transient marker for current estimate
|
|
143
|
+
```
|
|
144
|
+
|
|
145
|
+
## v1.1 Changes
|
|
146
|
+
|
|
147
|
+
- **Trimmed mean** replaces median for faster convergence with small samples
|
|
148
|
+
- **Outlier flagging** — extreme ratios (>3.0x or <0.2x) excluded from calibration, logged for inspection
|
|
149
|
+
- **Richer data** — project type, language, pipeline signature, and step count captured per session
|
|
150
|
+
- **Baseline subtraction** — tokens spent before the estimate are excluded from actuals
|
|
151
|
+
- **Security hardening** — path injection fixes, consolidated parsing, safe handling of paths with spaces
|
|
152
|
+
- **Version markers** — `version: 1.1.0` in SKILL.md, `--version` flag on learn script
|
|
153
|
+
|
|
154
|
+
## v1.2 Changes
|
|
155
|
+
|
|
156
|
+
- **PR Review Loop modeling** — geometric-decay cost model for review-fix-re-review cycles
|
|
157
|
+
- **New override** — `review_cycles=N` to set expected cycle count (0 = disable)
|
|
158
|
+
- **Per-band calibration** — PR Review Loop applies calibration independently per band (not re-anchored)
|
|
159
|
+
- **New schema fields** — `review_cycles_estimated` and `review_cycles_actual` in active-estimate.json
|
|
160
|
+
|
|
161
|
+
## Limitations
|
|
162
|
+
|
|
163
|
+
- Pipeline step names reflect a default workflow — map your own steps to the closest defaults. Formulas are pipeline-agnostic (see `references/heuristics.md`)
|
|
164
|
+
- Heuristics assume typical 150-300 line source files
|
|
165
|
+
- Does not model parallel agent execution
|
|
166
|
+
- Calibration requires 3+ completed sessions before corrections activate
|
|
167
|
+
- Pricing data embedded; check `last_updated` in references/pricing.md
|
|
168
|
+
- Multi-session tasks only capture the session containing the estimate
|
|
169
|
+
|
|
170
|
+
## License
|
|
171
|
+
|
|
172
|
+
MIT
|