token-goat 0.3.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (129) hide show
  1. token_goat-0.3.1/.gitattributes +1 -0
  2. token_goat-0.3.1/.github/workflows/ci.yml +24 -0
  3. token_goat-0.3.1/.github/workflows/publish.yml +53 -0
  4. token_goat-0.3.1/.gitignore +148 -0
  5. token_goat-0.3.1/CHANGELOG.md +155 -0
  6. token_goat-0.3.1/LICENSE +145 -0
  7. token_goat-0.3.1/PKG-INFO +255 -0
  8. token_goat-0.3.1/README.md +210 -0
  9. token_goat-0.3.1/SECURITY.md +44 -0
  10. token_goat-0.3.1/assets/icon.png +0 -0
  11. token_goat-0.3.1/assets/logo.png +0 -0
  12. token_goat-0.3.1/assets/stats.png +0 -0
  13. token_goat-0.3.1/assets/stats_social.png +0 -0
  14. token_goat-0.3.1/docs/architecture.md +5 -0
  15. token_goat-0.3.1/lefthook.yml +10 -0
  16. token_goat-0.3.1/pyproject.toml +121 -0
  17. token_goat-0.3.1/src/token_goat/__init__.py +2 -0
  18. token_goat-0.3.1/src/token_goat/__main__.py +5 -0
  19. token_goat-0.3.1/src/token_goat/bash_parser.py +158 -0
  20. token_goat-0.3.1/src/token_goat/cli.py +977 -0
  21. token_goat-0.3.1/src/token_goat/cli_doctor.py +364 -0
  22. token_goat-0.3.1/src/token_goat/cli_stats.py +56 -0
  23. token_goat-0.3.1/src/token_goat/compact.py +127 -0
  24. token_goat-0.3.1/src/token_goat/config.py +206 -0
  25. token_goat-0.3.1/src/token_goat/db.py +776 -0
  26. token_goat-0.3.1/src/token_goat/embeddings.py +484 -0
  27. token_goat-0.3.1/src/token_goat/gdrive.py +192 -0
  28. token_goat-0.3.1/src/token_goat/hints.py +289 -0
  29. token_goat-0.3.1/src/token_goat/hooks_cli.py +324 -0
  30. token_goat-0.3.1/src/token_goat/hooks_common.py +82 -0
  31. token_goat-0.3.1/src/token_goat/hooks_edit.py +91 -0
  32. token_goat-0.3.1/src/token_goat/hooks_fetch.py +93 -0
  33. token_goat-0.3.1/src/token_goat/hooks_read.py +189 -0
  34. token_goat-0.3.1/src/token_goat/hooks_session.py +79 -0
  35. token_goat-0.3.1/src/token_goat/image_shrink.py +261 -0
  36. token_goat-0.3.1/src/token_goat/install.py +1353 -0
  37. token_goat-0.3.1/src/token_goat/languages/__init__.py +1 -0
  38. token_goat-0.3.1/src/token_goat/languages/common.py +339 -0
  39. token_goat-0.3.1/src/token_goat/languages/go.py +156 -0
  40. token_goat-0.3.1/src/token_goat/languages/html.py +88 -0
  41. token_goat-0.3.1/src/token_goat/languages/json_idx.py +74 -0
  42. token_goat-0.3.1/src/token_goat/languages/liquid.py +91 -0
  43. token_goat-0.3.1/src/token_goat/languages/markdown.py +58 -0
  44. token_goat-0.3.1/src/token_goat/languages/python.py +92 -0
  45. token_goat-0.3.1/src/token_goat/languages/rust.py +136 -0
  46. token_goat-0.3.1/src/token_goat/languages/typescript.py +238 -0
  47. token_goat-0.3.1/src/token_goat/parser.py +578 -0
  48. token_goat-0.3.1/src/token_goat/paths.py +221 -0
  49. token_goat-0.3.1/src/token_goat/project.py +103 -0
  50. token_goat-0.3.1/src/token_goat/read_commands.py +459 -0
  51. token_goat-0.3.1/src/token_goat/read_replacement.py +496 -0
  52. token_goat-0.3.1/src/token_goat/render/__init__.py +1 -0
  53. token_goat-0.3.1/src/token_goat/render/ansi.py +70 -0
  54. token_goat-0.3.1/src/token_goat/render/ansi.ts +66 -0
  55. token_goat-0.3.1/src/token_goat/render/stats-renderer.ts +513 -0
  56. token_goat-0.3.1/src/token_goat/render/stats_messages.json +9 -0
  57. token_goat-0.3.1/src/token_goat/render/stats_renderer.py +609 -0
  58. token_goat-0.3.1/src/token_goat/render/types.py +68 -0
  59. token_goat-0.3.1/src/token_goat/render/types.ts +64 -0
  60. token_goat-0.3.1/src/token_goat/repomap.py +472 -0
  61. token_goat-0.3.1/src/token_goat/session.py +433 -0
  62. token_goat-0.3.1/src/token_goat/stats.py +681 -0
  63. token_goat-0.3.1/src/token_goat/webfetch.py +305 -0
  64. token_goat-0.3.1/src/token_goat/worker.py +1048 -0
  65. token_goat-0.3.1/src/token_goat/worker_daemon.py +145 -0
  66. token_goat-0.3.1/tests/__init__.py +1 -0
  67. token_goat-0.3.1/tests/conftest.py +297 -0
  68. token_goat-0.3.1/tests/fixtures/.gitkeep +0 -0
  69. token_goat-0.3.1/tests/fixtures/go_sample/main.go +34 -0
  70. token_goat-0.3.1/tests/fixtures/html_sample/article.html +18 -0
  71. token_goat-0.3.1/tests/fixtures/json_sample/config.json +13 -0
  72. token_goat-0.3.1/tests/fixtures/liquid_sample/sections/header.liquid +14 -0
  73. token_goat-0.3.1/tests/fixtures/liquid_sample/snippets/social-icons.liquid +1 -0
  74. token_goat-0.3.1/tests/fixtures/md_sample/article.md +20 -0
  75. token_goat-0.3.1/tests/fixtures/py_sample/app.py +17 -0
  76. token_goat-0.3.1/tests/fixtures/rust_sample/Cargo.toml +3 -0
  77. token_goat-0.3.1/tests/fixtures/rust_sample/src/main.rs +34 -0
  78. token_goat-0.3.1/tests/fixtures/ts_sample/index.ts +23 -0
  79. token_goat-0.3.1/tests/test_bash_parser.py +255 -0
  80. token_goat-0.3.1/tests/test_cli_hook_smoke.py +46 -0
  81. token_goat-0.3.1/tests/test_cli_smoke.py +32 -0
  82. token_goat-0.3.1/tests/test_codex_harness.py +200 -0
  83. token_goat-0.3.1/tests/test_compact.py +344 -0
  84. token_goat-0.3.1/tests/test_config_cli.py +215 -0
  85. token_goat-0.3.1/tests/test_coverage_gaps.py +499 -0
  86. token_goat-0.3.1/tests/test_db.py +420 -0
  87. token_goat-0.3.1/tests/test_doctor.py +257 -0
  88. token_goat-0.3.1/tests/test_e2e_smoke.py +173 -0
  89. token_goat-0.3.1/tests/test_edge_cases_coverage.py +218 -0
  90. token_goat-0.3.1/tests/test_embeddings.py +308 -0
  91. token_goat-0.3.1/tests/test_error_helpers.py +188 -0
  92. token_goat-0.3.1/tests/test_gdrive.py +328 -0
  93. token_goat-0.3.1/tests/test_hints.py +648 -0
  94. token_goat-0.3.1/tests/test_hooks_dispatcher.py +459 -0
  95. token_goat-0.3.1/tests/test_hooks_edit.py +145 -0
  96. token_goat-0.3.1/tests/test_hooks_image.py +172 -0
  97. token_goat-0.3.1/tests/test_hooks_pre_fetch.py +257 -0
  98. token_goat-0.3.1/tests/test_hooks_pre_read.py +296 -0
  99. token_goat-0.3.1/tests/test_hooks_session.py +270 -0
  100. token_goat-0.3.1/tests/test_hooks_webfetch.py +138 -0
  101. token_goat-0.3.1/tests/test_image_shrink.py +360 -0
  102. token_goat-0.3.1/tests/test_index_go_rust.py +144 -0
  103. token_goat-0.3.1/tests/test_index_pipeline.py +378 -0
  104. token_goat-0.3.1/tests/test_index_root.py +268 -0
  105. token_goat-0.3.1/tests/test_install.py +860 -0
  106. token_goat-0.3.1/tests/test_install_codex.py +307 -0
  107. token_goat-0.3.1/tests/test_optional_dependencies.py +52 -0
  108. token_goat-0.3.1/tests/test_parser_abi.py +165 -0
  109. token_goat-0.3.1/tests/test_parser_go.py +160 -0
  110. token_goat-0.3.1/tests/test_parser_html.py +85 -0
  111. token_goat-0.3.1/tests/test_parser_json.py +66 -0
  112. token_goat-0.3.1/tests/test_parser_liquid.py +79 -0
  113. token_goat-0.3.1/tests/test_parser_malformed.py +287 -0
  114. token_goat-0.3.1/tests/test_parser_md.py +75 -0
  115. token_goat-0.3.1/tests/test_parser_py.py +133 -0
  116. token_goat-0.3.1/tests/test_parser_rust.py +131 -0
  117. token_goat-0.3.1/tests/test_parser_ts.py +153 -0
  118. token_goat-0.3.1/tests/test_paths.py +303 -0
  119. token_goat-0.3.1/tests/test_project.py +91 -0
  120. token_goat-0.3.1/tests/test_read_replacement.py +1308 -0
  121. token_goat-0.3.1/tests/test_repomap.py +397 -0
  122. token_goat-0.3.1/tests/test_security_validation.py +227 -0
  123. token_goat-0.3.1/tests/test_session.py +395 -0
  124. token_goat-0.3.1/tests/test_stats.py +744 -0
  125. token_goat-0.3.1/tests/test_symbol_cli.py +278 -0
  126. token_goat-0.3.1/tests/test_webfetch.py +425 -0
  127. token_goat-0.3.1/tests/test_worker.py +1386 -0
  128. token_goat-0.3.1/tests/test_worker_daemon.py +267 -0
  129. token_goat-0.3.1/uv.lock +1792 -0
@@ -0,0 +1 @@
1
+ * text=auto
@@ -0,0 +1,24 @@
1
+ name: CI
2
+ on: [push, pull_request]
3
+ jobs:
4
+ lint:
5
+ runs-on: windows-2022
6
+ steps:
7
+ - uses: actions/checkout@v4
8
+ - uses: astral-sh/setup-uv@v3
9
+ - run: uv python install 3.13
10
+ - run: uv sync --all-extras
11
+ - name: Lint
12
+ run: uv run ruff check
13
+ - name: Type check
14
+ run: uv run mypy src
15
+
16
+ test:
17
+ runs-on: windows-2022
18
+ steps:
19
+ - uses: actions/checkout@v4
20
+ - uses: astral-sh/setup-uv@v3
21
+ - run: uv python install 3.13
22
+ - run: uv sync --all-extras
23
+ - name: Test
24
+ run: uv run pytest
@@ -0,0 +1,53 @@
1
+ name: Publish to PyPI
2
+
3
+ # Publishes the package to PyPI on every GitHub Release using OIDC-based
4
+ # Trusted Publishing — no long-lived API tokens are stored as repo secrets.
5
+ #
6
+ # PyPI side: configure a Trusted Publisher at
7
+ # https://pypi.org/manage/project/token-goat/settings/publishing/ with
8
+ # Owner: DFKHelper
9
+ # Repository: token-goat
10
+ # Workflow: publish.yml
11
+ # Environment: pypi
12
+ # The first publish must be done manually (or via PyPI's "Pending publisher"
13
+ # flow) so the project exists; subsequent releases flow through here.
14
+
15
+ on:
16
+ release:
17
+ types: [published]
18
+ workflow_dispatch:
19
+
20
+ jobs:
21
+ build:
22
+ name: Build sdist + wheel
23
+ runs-on: ubuntu-latest
24
+ steps:
25
+ - uses: actions/checkout@v4
26
+ - uses: astral-sh/setup-uv@v3
27
+ - run: uv python install 3.13
28
+ - name: Build
29
+ run: uv build
30
+ - name: Upload artifacts
31
+ uses: actions/upload-artifact@v4
32
+ with:
33
+ name: dist
34
+ path: dist/
35
+
36
+ publish:
37
+ name: Publish to PyPI (OIDC)
38
+ needs: build
39
+ runs-on: ubuntu-latest
40
+ environment:
41
+ name: pypi
42
+ url: https://pypi.org/p/token-goat
43
+ permissions:
44
+ id-token: write
45
+ contents: read
46
+ steps:
47
+ - name: Download artifacts
48
+ uses: actions/download-artifact@v4
49
+ with:
50
+ name: dist
51
+ path: dist/
52
+ - name: Publish
53
+ uses: pypa/gh-action-pypi-publish@release/v1
@@ -0,0 +1,148 @@
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+
6
+ # C extensions
7
+ *.so
8
+
9
+ # Distribution / packaging
10
+ .Python
11
+ build/
12
+ develop-eggs/
13
+ dist/
14
+ downloads/
15
+ eggs/
16
+ .eggs/
17
+ lib/
18
+ lib64/
19
+ parts/
20
+ sdist/
21
+ var/
22
+ wheels/
23
+ pip-wheel-metadata/
24
+ share/python-wheels/
25
+ *.egg-info/
26
+ .installed.cfg
27
+ *.egg
28
+ MANIFEST
29
+
30
+ # PyInstaller
31
+ *.manifest
32
+ *.spec
33
+
34
+ # Unit test / coverage reports
35
+ htmlcov/
36
+ .tox/
37
+ .nox/
38
+ .coverage
39
+ .coverage.*
40
+ .cache
41
+ nosetests.xml
42
+ coverage.xml
43
+ *.cover
44
+ *.py,cover
45
+ .hypothesis/
46
+ .pytest_cache/
47
+
48
+ # Translations
49
+ *.mo
50
+ *.pot
51
+
52
+ # Django stuff:
53
+ *.log
54
+ local_settings.py
55
+ db.sqlite3
56
+ db.sqlite3-journal
57
+
58
+ # Flask stuff:
59
+ instance/
60
+ .webassets-cache
61
+
62
+ # Scrapy stuff:
63
+ .scrapy
64
+
65
+ # Sphinx documentation
66
+ docs/_build/
67
+
68
+ # PyBuilder
69
+ target/
70
+
71
+ # Jupyter Notebook
72
+ .ipynb_checkpoints
73
+
74
+ # IPython
75
+ profile_default/
76
+ ipython_config.py
77
+
78
+ # pyenv
79
+ .python-version
80
+
81
+ # pipenv
82
+ Pipfile.lock
83
+
84
+ # PEP 582
85
+ __pypackages__/
86
+
87
+ # Celery stuff
88
+ celerybeat-schedule
89
+ celerybeat.pid
90
+
91
+ # SageMath parsed files
92
+ *.sage.py
93
+
94
+ # Environments
95
+ .env
96
+ .venv
97
+ env/
98
+ venv/
99
+ ENV/
100
+ env.bak/
101
+ venv.bak/
102
+
103
+ # Spyder project settings
104
+ .spyderproject
105
+ .spyproject
106
+
107
+ # Rope project settings
108
+ .ropeproject
109
+
110
+ # mkdocs documentation
111
+ /site
112
+
113
+ # mypy
114
+ .mypy_cache/
115
+ .dmypy.json
116
+ dmypy.json
117
+
118
+ # Pyre type checker
119
+ .pyre/
120
+
121
+ # cc-saver runtime
122
+ *.db
123
+ *.db-wal
124
+ *.db-shm
125
+ .cc-shrunken.*
126
+ .improve-state-*.json
127
+ tmp_data/
128
+
129
+ # Patent filings — personal, not for public repo
130
+ PATENT_PROVISIONAL_DRAFT.md
131
+ PATENT_PROVISIONAL_DRAFT.pdf
132
+ convert_patent_pdf.py
133
+
134
+ *patent*.pdf
135
+ *patent*.md
136
+ patantsub.pdf
137
+
138
+ # IDE
139
+ .idea/
140
+ .vscode/
141
+ *.iml
142
+
143
+ # Claude Code project instructions — kept local, not for public repo
144
+ CLAUDE.md
145
+
146
+ # Test/temp artifacts
147
+ .tmp/
148
+ .verify-manual/
@@ -0,0 +1,155 @@
1
+ # Changelog
2
+
3
+ All notable changes to Token-Goat are documented in this file. Format follows Keep a Changelog. Token-Goat follows Semantic Versioning starting at 1.0.
4
+
5
+ ## [Unreleased]
6
+
7
+ ## [0.3.1] - 2026-05-16
8
+
9
+ ### Added
10
+
11
+ - **Linux and WSL support.** The worker now registers as a `systemd --user` service (`~/.config/systemd/user/token-goat-worker.service`) when systemd is available, with an XDG autostart `.desktop` fallback elsewhere. On WSL without systemd, the SessionStart hook starts the worker at the beginning of every Claude Code session. Data directory: `~/.local/share/token-goat/`. The install/uninstall flow, doctor checks, weekly auto-update (via `crontab`), and hook entry-point are platform-aware end-to-end.
12
+ - **macOS support** (untested). The worker registers as a LaunchAgent at `~/Library/LaunchAgents/com.dfkhelper.token-goat-worker.plist`, loaded via `launchctl`. Data directory: `~/Library/Application Support/dfk-helper/token-goat/`. Weekly auto-update uses the same crontab path as Linux.
13
+ - **PyPI Trusted Publishing.** A `Publish to PyPI` GitHub Actions workflow builds and publishes on GitHub Release via OIDC, replacing long-lived API tokens stored as repo secrets. PyPI's docs explicitly call out the security and usability advantages of OIDC-based publishing.
14
+ - **README `What gets installed?` and `Security, privacy, and uninstall` sections** enumerating every file, hook, autostart entry, scheduled task, and data path the installer writes — and how each is reversed.
15
+ - README badges for PyPI version and CI status (in addition to the existing Python version and license badges).
16
+ - Lefthook git hooks for local lint / type-check / test parity with CI.
17
+ - PyPI project URLs, classifiers, and keywords surfaced in `pyproject.toml`.
18
+
19
+ ### Changed
20
+
21
+ - Data directory namespace renamed from `DFK Helper LLC` to `dfk-helper` for cross-platform path hygiene (matches the platformdirs convention on every OS). A reinstall will recreate the index at the new path; the old directory can be removed by hand.
22
+ - Author / namespace migrated to `DFK Helper LLC` across the project (replaces a personal username in metadata and packaging fields).
23
+ - CI slimmed to Python 3.13 on Windows for `ruff`, `mypy`, and `pytest`. The package itself still declares support for 3.11–3.13.
24
+ - README rewritten with a before/after comparison table and stat callouts.
25
+
26
+ ### Fixed
27
+
28
+ - Python 3.13 changed how `stat()` reports paths that contain a null byte; existing tests and a defensive check in `paths.py` were updated to accommodate the new error type.
29
+ - Three Windows-runner CI test failures resolved.
30
+ - Ruff caught a handful of orphaned imports left over from the iteration sweeps — all removed.
31
+ - `token-goat stats` no longer charges suggestion-only hints with an overhead "saving" they did not earn.
32
+ - `token-goat stats` bar-scale and share-% now use separate denominators so a single dominant kind no longer flattens the rest of the chart.
33
+
34
+ ### Security
35
+
36
+ - Continued hardening of input validation in `paths.py` (`is_safe_rel_path`, hash-traversal guards in `project_db_path` and `session_cache_path`) so no rel-path can escape the data directory under any caller.
37
+
38
+ ### Removed
39
+
40
+ - Legacy `tokenwise` launcher binaries (`tokenwise`, `tokenwise-hook`, `tokenwise-worker`) are now removed during install and uninstall when they sit alongside the current `token-goat` launchers.
41
+ - Provisional application number stripped from the patent notice.
42
+
43
+ ## [0.2.3] - 2026-05-14
44
+
45
+ ### Changed
46
+
47
+ - **`token-goat stats` reorders its table columns.** In the by-kind, by-day and by-project tables the `share` percentage now sits directly after `tokens saved`, ahead of the raw `events` count. The share is the at-a-glance "how much of the total is this" number; the event count is supporting detail — so the eye lands on share first and the column order matches that priority.
48
+ - **The worker now restarts on a same-version reinstall.** Its version-self-restart compared only the installed version *string*, so `uv tool install --reinstall` without a version bump — the common case during development — left the worker running stale code until something restarted it manually. `run_daemon` now also compares a content fingerprint of the installed package (a hash over the size and mtime of every `.py` file in the package directory), captured at boot and re-read on the same once-a-minute cadence. A change in either the version string or the fingerprint triggers the graceful slot-release-and-respawn. Fails soft: a fingerprint that can't be computed falls back to the version-string check.
49
+ - **Daily log files are now size-capped.** The `worker.log` and hook daily logs used a plain `FileHandler` with no size bound — they were bounded in *count* (date-named, 7-day retention sweep) but a single pathological day, e.g. a worker stuck in a fast error loop, could still bloat one file. Both handlers, and the `worker-stderr.log` crash sink, now share `paths.roll_log_if_oversized()`, which rolls a log over to a `.prev.log` sibling once it passes its cap (5 MB for daily logs, 1 MB for the crash sink) before the handler is attached. Best-effort under Windows multi-process contention — the roll is suppressed if another process holds the file and retried by the next opener — and `.prev.log` ends in `.log` so the retention sweep still reaps it.
50
+
51
+ ## [0.2.2] - 2026-05-14
52
+
53
+ ### Added
54
+
55
+ - **Skills and plugins indexing.** `token-goat index --root <path>` indexes any directory — no `.git` or project marker required. Shorthand flags: `--skills` indexes `~/.claude/skills/`, `--plugins` indexes `~/.claude/plugins/`. After indexing, `token-goat section "superman/SKILL.md::Plan Gate"` and `token-goat read "ralph/SKILL.md::symbol"` work from any directory, and `token-goat symbol --all-projects` picks up symbols defined in skills. Run once and forget — incremental re-indexing keeps skills current as you update them.
56
+ - **Cross-project file resolution.** `token-goat section` and `token-goat read` now fall back to searching all indexed projects when the file is not found in the current project. This means `token-goat section "superman/SKILL.md::Plan Gate"` works from inside any project directory, not just from inside `~/.claude/skills/`.
57
+
58
+ - **Compaction assist.** Before Claude Code compacts the conversation, a new `PreCompact` hook builds a structured session manifest and injects it as `systemMessage` so the compaction LLM can preserve edited files, accessed symbols, and frequently read files in its summary. The manifest stays under a configurable token budget (default 400 tokens). Configure via `[compact_assist]` in `config.toml` or set `TOKEN_GOAT_COMPACT_ASSIST=0` to disable entirely.
59
+ - `token-goat compact-hint --session-id <id>` debug command shows exactly what the `PreCompact` hook would emit for any session.
60
+ - `session.py` now tracks which files were edited this session (`edited_files: dict[str, int]`). The `post_edit` hook (previously a no-op) now calls `session.mark_file_edited()` on every Write/Edit/MultiEdit. Edited files are listed first in the compaction manifest — they are the most critical context to preserve.
61
+ - `token-goat doctor` now reports worker-watchdog state: the single-worker claim file (held / stale / absent), any index-spawn markers (`locks/{hash}.indexing`) and whether they are active or stale, and the dirty-queue depth (flagged when a backlog suggests the worker is down or behind). These cover the failure modes introduced with the worker claim file and index-spawn deduplication.
62
+ - `token-goat doctor --fix` clears the stale `.indexing` spawn markers doctor flags — the on-demand counterpart to the worker's startup reaping, for when the worker is down. It only ever removes markers `spawn_index_detached` already reads as inactive, so an in-flight indexer is never disturbed.
63
+
64
+ ### Changed
65
+
66
+ - `token-goat stats` now reports the **net** token impact of the pre-read hook, not just its upside. Injecting a hint as `additionalContext` costs tokens in the conversation; the `session_hint` event now records `realized_saving − injection_cost`. Dedup hints (re-read warnings) stay net-positive; pure suggestion hints record a small negative — the honest signal that they cost tokens now and pay off later via the `read_replacement` stat `token-goat read` records if the agent acts on them. Summing the kind answers "is the pre-read hook net-positive?" directly.
67
+ - Pre-read hints are leaner. The purely-informational "FYI, you read this file earlier, proceeding" note — emitted on a non-overlapping re-read — is suppressed entirely: it carried nothing actionable and only cost tokens. The "large file, use `token-goat read`" suggestion no longer enumerates every indexed symbol; it carries one example command and lets `token-goat symbol`/`map` provide the full list on demand.
68
+
69
+ - Incremental indexing is now O(N × stat) instead of O(N × file-read + SHA) for unchanged projects. The previous path called `index_file()` — reading file bytes and computing SHA256 — for every file in the project just to determine nothing had changed. The incremental path now loads `(rel_path, mtime, content_sha256)` from the DB, checks `stat().st_mtime` first, and skips `index_file()` entirely when mtime is unchanged. The SHA check is preserved as a secondary guard for same-mtime content changes (e.g., `touch` + overwrite). This makes the 10-minute worker sweeps over skills and plugins near-instant when nothing has changed.
70
+
71
+ - `token-goat stats` startup time reduced from ~10 s to ~2 s. Root cause was N `PRAGMA integrity_check` + N DDL `executescript` calls per registered project on every invocation. `stats.py` now uses new read-only DB openers (`db.open_global_readonly()` / `db.open_project_readonly()`) that open SQLite with `?mode=ro` URI flag, skipping integrity checks, DDL, WAL activation, and sqlite-vec loading.
72
+ - `token-goat stats` bar widths and share percentages now reflect token savings rather than bytes saved. Event kinds that cannot produce a token estimate (webfetch and Drive image downloads, which report raw bytes with no token equivalent) fall back to bytes for their bar, with visual distinction.
73
+ - `image_shrink` events now correctly show token savings in `token-goat stats`. The tokens column was hardcoded to `—` despite the data being present in the DB.
74
+ - The worker's periodic reindex now sweeps every recently-active project, not just `marker='manual'` skills and plugins. Previously, normal git projects only reindexed when a file was edited *through Claude Code* (via the `post_edit` hook → dirty queue); a file edited in an IDE or by another tool would never be picked up, so `token-goat read`/`symbol`/`map` returned stale results indefinitely. The sweep is bounded to projects seen within the last 7 days, and `last_seen` is now bumped by the `SessionStart` hook so the window tracks real usage rather than the worker's own reindex cadence.
75
+
76
+ ### Fixed
77
+
78
+ - **The worker-stderr crash sink grew without bound.** `spawn_detached` opens `logs/worker-stderr.log` in append mode on every worker spawn (one per `SessionStart` hook), and the daily-log retention sweep never catches it — each append refreshes the file's mtime, so it never ages past the 7-day cutoff. An actively-written crash log therefore grew forever. `spawn_detached` now rolls the file over to `worker-stderr.prev.log` once it passes `STDERR_LOG_MAX_BYTES` (1 MB), bounding the crash sink at ~2 MB while still retaining recent crash output.
79
+ - **Edits made while a project was first being indexed were silently dropped.** `index_project` registered the project in the global `projects` table only *after* the full file walk and index completed. For a large tree that window is minutes long — and never closes if the index spawn hangs or crashes. During it, the worker's dirty-queue drain looked up the project hash, found nothing, logged `dirty queue refers to unknown project hash`, and discarded the entry — so any file edited mid-index was never reindexed. The project is now registered in the global registry up front, before the walk; the final registry update still fills in the real `file_count`/`languages` once indexing finishes, and a crashed initial index now self-heals via the normal incremental drain and periodic reindex. (Surfaced in the field by a stray `.git` at a directory that is a container of repos, which made the entire supertree index as one project.)
80
+ - **The test suite deleted the user's real worker-autostart Run key.** `test_install_uninstall_round_trip` exercises `install_all()`/`uninstall_all()` — which call `winreg.SetValueEx`/`DeleteValue` on `HKCU\...\Run` directly — without mocking `winreg`, despite its "hermetic round-trip" docstring. Every `pytest` run therefore wrote and then *deleted* the real `token-goat-worker` autostart entry, so `token-goat doctor` reported `NOT INSTALLED` after any test run (which looked like an autostart bug but was the tests eating their own machine's registry). A new `isolate_registry` autouse fixture replaces `winreg` with an in-memory fake for the whole suite, so no test — present or future — can touch the real registry.
81
+ - **The worker had no autostart after `uv tool install --reinstall`.** The HKCU Run key that launches the worker at logon was only ever written by `token-goat install`; a `uv tool install --reinstall` — the normal way to deploy code changes — never touches it, and nothing else does either. Once the key was absent or cleared, the worker survived only as long as a Claude Code hook kept respawning it, and never came back after a reboot. `run_daemon` now self-registers the Run key on every startup (the claim-winning worker only), so autostart is self-healing and the registered command stays current. Fail-soft: a registry error is logged and ignored, never crashing the worker.
82
+ - **A worker that crashed during startup left no trace.** `spawn_detached` wired the spawned worker's stderr to `DEVNULL`, so any failure before the logging `FileHandler` was attached — an import error, a crash in `_setup_logging` — vanished completely, which is what made silent worker deaths impossible to diagnose. The worker's stderr now goes to `logs/worker-stderr.log`. The console `StreamHandler` — pointless for a detached daemon with no console, and now just routine-log noise in that file — is dropped for non-interactive runs, so the crash log captures only genuine escaped tracebacks.
83
+ - **The image cache missed for re-used images.** `image_shrink._cache_key` hashed `(absolute_path, mtime, size)`, so the cache entry was tied to one exact path at one exact mtime. Claude Code stages prompt-attached images to a fresh temp filename every prompt — so the same image re-used across prompts, or even referenced twice in one prompt, was re-shrunk from scratch each time and stored as a separate cache file. The key is now the sha256 of the image's *content*: identical bytes share one cache entry regardless of path, a re-used image is a cache hit, and a bare mtime touch no longer invalidates the entry while a real content change still does.
84
+ - **The first edit in a never-indexed project was silently dropped.** When the worker drained the dirty queue and the project's hash was not yet in `global.db` — the normal state for a project edited before it was ever indexed — `_process_dirty_entries` logged `dirty queue refers to unknown project hash` and discarded the entry. Nothing else triggered an initial index, so the edit was lost and the project stayed unindexed. The dirty-queue entry now carries `project_root` and `project_marker`, making it self-sufficient: on an unknown hash the worker reconstructs the project from the entry and runs a first full index (which self-registers it) instead of dropping the edit. Legacy entries with no recorded root still drop, but now with an explicit reason in the log.
85
+ - **A stray `.git` could make an entire directory of repos index as one project.** `find_project` walks up looking for a project marker; an accidental `git init` at a container directory (e.g. `C:\Projects` holding a dozen unrelated checkouts) made it return the whole supertree, and everything underneath indexed as a single giant project. `find_project` now skips a candidate root that looks like a *container* of repos — three or more immediate child directories with their own `.git` — and keeps walking up. A real project, including a monorepo whose packages share one root `.git`, does not match the container signature. This was the environmental trigger behind the field report of the mid-index-drop bug above.
86
+ - **Dirty-queue drain dropped entries appended mid-drain.** `drain_dirty_queue` read `dirty.txt` and then truncated it; a `post_edit` hook calling `enqueue_dirty` in the window between the read and the truncate had its line truncated away, so that file was never reindexed. The drain now atomically renames `dirty.txt` to a private `.draining` file before reading it — a concurrent append either travels in `.draining` or lands in a fresh `dirty.txt` for the next cycle, and can never be lost. A `.draining` file left behind by a worker that crashed mid-drain is recovered on the next call.
87
+ - **A reinstalled worker kept running stale code.** `uv tool install --reinstall` replaces the on-disk package but cannot touch an already-running worker process, so the daemon kept executing the old code until something external restarted it. The daemon now checks the installed version once a minute and, on a change, releases its single-worker slot and respawns — the successor loads the new code fresh from disk and claims the slot cleanly.
88
+ - **Stale `.indexing` spawn markers were never reaped.** `spawn_index_detached` writes a `locks/{hash}.indexing` marker and treats a present, *active* marker as "an index is already running" — but the marker was only ever cleared implicitly, via the PID-liveness + TTL check in `_index_spawn_active`. A marker whose indexer finished or crashed without its PID being recycled lingered on disk indefinitely (16 were found in the field). The worker's `cleanup_on_startup` — run on startup and every maintenance cycle — now reaps them with the exact predicate `spawn_index_detached` uses, so it can never remove a marker still doing its job.
89
+ - `post_edit` hook was registered but never called any session-tracking logic. It now records file edits, which feeds both the compaction manifest and future session-aware features.
90
+ - Double `@fail_soft` decorator on `post_edit` (applied twice, causing the decorator to wrap itself). Reduced to a single application.
91
+ - **Incremental reindex never ran for normal projects.** `post_edit` recorded edits to the session cache but never appended them to the dirty queue, and `enqueue_dirty()` — the function meant to do this — was defined but called from nowhere. The entire incremental-reindex path was dead code for git-detected projects: a project's symbol index went stale the moment you edited a file, so `token-goat read "file::symbol"` returned the wrong function body and the pre-read hint showed stale line numbers. `post_edit` now resolves the edited file's project and enqueues it; the worker drains and reindexes within ~2 s.
92
+ - **Runaway `index --full` pileup.** `spawn_index_detached` (called by every `SessionStart` hook) had no deduplication. Its `file_count == 0` guard was racy — concurrent indexers contended on the 30 s writer lock, timed out, exited *without writing*, so `file_count` stayed 0 and the next session spawned yet another. Observed in the field as 44 concurrent processes holding ~41 GB of paged memory. The spawn is now idempotent via a per-project marker (PID + timestamp, with a TTL and PID-liveness check).
93
+ - **Duplicate worker daemons.** `run_daemon`'s `is_worker_alive()` → `_write_pid()` sequence was a check-then-act race; two workers starting in the same window both passed the check and both ran the main loop, draining the same dirty queue. Replaced with an atomic `os.open(O_CREAT | O_EXCL)` claim keyed on the process's create-time, so exactly one worker can hold the slot and a crashed worker's claim is correctly reclaimed.
94
+ - **Deleted files lingered in the index forever.** `index_project` walked the files on disk but never pruned rows for files that had been removed or renamed. It now prunes them after indexing (the foreign-key cascade cleans up the file's symbols, refs, sections, and chunks).
95
+ - **Every token-goat command crashed under Codex's unelevated sandbox.** The sandbox cannot create the WAL shared-memory file, so `PRAGMA journal_mode = WAL` and the first real query failed with `unable to open database file`. `_connect()` and `_connect_readonly()` now fall back to an immutable read-only connection that bypasses WAL coordination entirely; schema-ensure and `record_stat` tolerate read-only connections; `conn.close()` errors in `finally` blocks are suppressed (the WAL checkpoint on close also fails); and the hook logger falls back to a `NullHandler` when the log directory is read-only. Fallback notices are logged at `INFO` so CLI and hook stderr stay clean.
96
+ - **`token-goat stats` overstated savings.** The pre-read hook recorded a `session_hint` saving for *every* hint it emitted — including pure suggestions like "this file is large, consider `token-goat read`" — at a flat "25 % of the file" estimate, whether or not the agent acted on it. Hints now carry the genuine avoided cost: suggestion hints record nothing (if followed, `token-goat read` records the real `read_replacement` saving itself), and only dedup hints that warn about re-reading already-cached content record a saving, sized to the actual overlapping lines.
97
+ - **A worker that crashed or hung mid-session was never replaced until the next session.** `SessionStart` starts the worker, but nothing noticed a death *during* a session — the dirty queue would silently stop draining. The `post_edit` hook (which feeds the queue) now runs a cheap mid-session watchdog: a single `stat()` on the heartbeat file, and only on the rare stale path does it import `worker` and call `ensure_running()`. `ensure_running()` itself now distinguishes a crashed worker (process gone — respawn), a hung worker (alive but heartbeat stale beyond any plausible busy period — reap, then respawn), and a merely-busy worker (alive, moderately stale — left untouched, since a duplicate would just lose the claim race and clearing its pid file would orphan it). Hung-worker reaping verifies the process command line first, so a recycled PID is never killed.
98
+
99
+ ## [0.2.0] - 2026-05-12
100
+
101
+ ### Added
102
+
103
+ - Session hint events in `token-goat stats`. When the agent tries to re-read a file already pulled into the current session, Token-Goat now records the savings estimate alongside the existing reminder. The hints show up in the stats output next to image-shrink and read-replacement counts.
104
+ - Automatic first-time indexing at session start. The first time Token-Goat sees a new project, it kicks off a background symbol index so the next `token-goat symbol`, `token-goat read`, and `token-goat section` calls return data instead of an empty result.
105
+ - "Project not yet indexed" hint in `token-goat symbol`, `ref`, `read`, and `section`. The old response was "No matches", which made it look like Token-Goat was broken when the index was still warming up.
106
+ - Token-Goat logo (`assets/logo.png`) and a Windows multi-size icon (`assets/token-goat.ico`). README now opens with the logo centered.
107
+ - Availability line in the README footer for engineering inquiries.
108
+
109
+ ### Changed
110
+
111
+ - Hook commands and the worker auto-start command now invoke `pythonw.exe -m token_goat.cli ...` directly from Token-Goat's uv tool venv. The previous launcher .exe approach tripped behavioral heuristics in several major antivirus and EDR products; the signed Python interpreter plus module invocation does not. See Security below.
112
+ - `token-goat stats` redesigned. A one-line headline summary at the top, unicode bar charts proportional to bytes saved, and separate breakdowns by event kind, day, and project below.
113
+ - Image-shrink events now include a token-savings estimate at one token per four bytes saved, so the headline counter reflects token impact and not just bytes on disk.
114
+ - License changed from MIT to PolyForm Noncommercial 1.0.0. Token-Goat stays free for personal and noncommercial use; commercial use requires a separate license. See LICENSE for full terms.
115
+ - CLAUDE.md, Codex AGENTS.md, and SKILL.md directives sharpened. Imperative phrasing, before-and-after tables that show the token-cost difference between `token-goat symbol` and `grep`, and a verification cue at the bottom.
116
+ - Python version pin widened to support 3.14.
117
+ - Continuous integration now runs `mypy` alongside `ruff` and `pytest`.
118
+
119
+ ### Fixed
120
+
121
+ - "hook exited with code 1" errors in Codex and Claude Code. Hook entry points now eat unknown arguments, catch every exception class including `SystemExit`, and always exit zero with valid JSON on stdout, even when the harness passes arguments the typer entry point did not expect.
122
+ - Database integrity check no longer treats a locked or busy SQLite file as corruption. The previous behavior tried to quarantine the file, failed because Windows held the file lock, and surfaced as `token-goat map` or `token-goat stats` exiting 1.
123
+ - Test runs no longer write to the production hook log file. An autouse fixture isolates the hook logger for the duration of each test.
124
+ - `read_payload` coerces non-dict JSON (`null`, lists, scalars) to an empty dict so hook handlers can safely call `payload.get(...)` regardless of what the harness sends on stdin.
125
+ - Pillow `Image.LANCZOS` replaced with `Image.Resampling.LANCZOS` to remove the deprecation warning on Pillow 10 and newer.
126
+ - Rust and Go extractor error fallbacks now return the four-tuple the extractor protocol requires. The previous three-tuple return crashed downstream and was caught by fail-soft, so Go and Rust files never indexed when extraction failed.
127
+ - Variable-name shadowing in `embeddings.py` chunk extraction. Caught by mypy, not a runtime bug, but cleaner now.
128
+
129
+ ### Security
130
+
131
+ - Hook and worker spawn pattern reworked so antivirus and EDR products do not behavior-flag Token-Goat. The previous design spawned a small PyInstaller-style launcher .exe from a user-writable directory (`~/.local/bin/`), which matched the textbook payload-drop signature those products monitor for. Hooks now invoke the Python Software Foundation signed `pythonw.exe` from Token-Goat's uv tool venv directly, with `-m token_goat.cli`. This is the most boring spawn pattern on Windows and gets treated as benign by Bitdefender, Defender, Norton, McAfee, Kaspersky, Sophos, and ESET.
132
+
133
+ ## [0.1.0] - 2026-05-12
134
+
135
+ First public release.
136
+
137
+ ### Added
138
+
139
+ - Image shrinking on local file reads. When the agent opens a large PNG or JPEG, Token-Goat returns a compressed copy in place of the original. A 3.3 MB screenshot from one test session arrived at 84 KB.
140
+ - Image shrinking on Google Drive image downloads. Activates only when the user has already authorized Google Drive through Claude Code's built-in connector. Token-Goat never asks for its own Drive auth.
141
+ - Session-aware read hints. When the agent tries to read a file already pulled into the current session, it gets a short reminder of the prior read and a nudge to grab a narrower slice instead.
142
+ - Targeted symbol reads via `token-goat read "file.py::function_name"`. Pulls one function or class, not the whole file.
143
+ - Targeted section reads via `token-goat section "doc.md::Heading"`. Pulls one Markdown section by heading.
144
+ - Semantic search via `token-goat semantic "<query>"`. Find code by meaning, not by filename. First call downloads a small embedding model into `%LOCALAPPDATA%\dfk-helper\token-goat\models\`.
145
+ - Repo orientation via `token-goat map`. A compact, ranked overview of the most important files in a repository.
146
+ - Cumulative savings tracking via `token-goat stats`.
147
+ - Install and uninstall flow for Claude Code, with `--codex` flag to patch Codex CLI in the same pass.
148
+ - Diagnostic command `token-goat doctor` confirms the install is healthy.
149
+ - Background worker that auto-starts at logon, runs without a console window, and survives reboots.
150
+
151
+ ### Notes
152
+
153
+ - Licensed under PolyForm Noncommercial 1.0.0. See LICENSE for full terms.
154
+ - Windows 10 and 11 only.
155
+ - Python 3.11, 3.12, 3.13, and 3.14 supported.
@@ -0,0 +1,145 @@
1
+ # PolyForm Noncommercial License 1.0.0
2
+
3
+ Required Notice: Copyright (c) 2026 DFK Helper LLC. Contact: token-goat@dfkhelper.com
4
+
5
+ <https://polyformproject.org/licenses/noncommercial/1.0.0>
6
+
7
+ ## Acceptance
8
+
9
+ In order to get any license under these terms, you must agree
10
+ to them as both strict obligations and conditions to all
11
+ your licenses.
12
+
13
+ ## Copyright License
14
+
15
+ The licensor grants you a copyright license for the
16
+ software to do everything you might do with the software
17
+ that would otherwise infringe the licensor's copyright
18
+ in it for any permitted purpose. However, you may
19
+ only distribute the software according to [Distribution
20
+ License](#distribution-license) and make changes or new works
21
+ based on the software according to [Changes and New Works
22
+ License](#changes-and-new-works-license).
23
+
24
+ ## Distribution License
25
+
26
+ The licensor grants you an additional copyright license
27
+ to distribute copies of the software. Your license
28
+ to distribute covers distributing the software with
29
+ changes and new works permitted by [Changes and New Works
30
+ License](#changes-and-new-works-license).
31
+
32
+ ## Notices
33
+
34
+ You must ensure that anyone who gets a copy of any part of
35
+ the software from you also gets a copy of these terms or the
36
+ URL for them above, as well as copies of any plain-text lines
37
+ beginning with `Required Notice:` that the licensor provided
38
+ with the software. For example:
39
+
40
+ > Required Notice: Copyright (c) 2026 DFK Helper LLC. Contact: token-goat@dfkhelper.com
41
+
42
+ ## Changes and New Works License
43
+
44
+ The licensor grants you an additional copyright license to
45
+ make changes and new works based on the software for any
46
+ permitted purpose.
47
+
48
+ ## Patent License
49
+
50
+ The licensor grants you a patent license for the software that
51
+ covers patent claims the licensor can license, or becomes able
52
+ to license, that you would infringe by using the software.
53
+
54
+ ## Noncommercial Purposes
55
+
56
+ Any noncommercial purpose is a permitted purpose.
57
+
58
+ ## Personal Uses
59
+
60
+ Personal use for research, experiment, and testing for
61
+ the benefit of public knowledge, personal study, private
62
+ entertainment, hobby projects, amateur pursuits, or religious
63
+ observance, without any anticipated commercial application,
64
+ is use for a permitted purpose.
65
+
66
+ ## Noncommercial Organizations
67
+
68
+ Use by any charitable organization, educational institution,
69
+ public research organization, public safety or health
70
+ organization, environmental protection organization,
71
+ or government institution is use for a permitted purpose
72
+ regardless of the source of funding or obligations resulting
73
+ from the funding.
74
+
75
+ ## Fair Use
76
+
77
+ You may have "fair use" rights for the software under the
78
+ law. These terms do not limit them.
79
+
80
+ ## No Other Rights
81
+
82
+ These terms do not allow you to sublicense or transfer any of
83
+ your licenses to anyone else, or prevent the licensor from
84
+ granting licenses to anyone else. These terms do not imply
85
+ any other licenses.
86
+
87
+ ## Patent Defense
88
+
89
+ If you make any written claim that the software infringes or
90
+ contributes to infringement of any patent, your patent license
91
+ for the software granted under these terms ends immediately.
92
+ If your company makes such a claim, your patent license ends
93
+ immediately for work on behalf of your company.
94
+
95
+ ## Violations
96
+
97
+ The first time you are notified in writing that you have
98
+ violated any of these terms, or done anything with the software
99
+ not covered by your licenses, your licenses can nonetheless
100
+ continue if you come into full compliance with these terms,
101
+ and take practical steps to correct past violations, within
102
+ 32 days of receiving notice. Otherwise, all your licenses
103
+ end immediately.
104
+
105
+ ## Additional Use Grant
106
+
107
+ Individual developers may install and use the software on their
108
+ own machines for individual productivity purposes without a
109
+ commercial license, provided such use does not involve:
110
+ (a) providing the software as a service to third parties;
111
+ (b) incorporating the software or its outputs into a commercial
112
+ product or platform; or (c) deploying the software as shared
113
+ infrastructure across a team or organization. Employment at a
114
+ for-profit company does not by itself constitute commercial use
115
+ under this grant.
116
+
117
+ ## No Liability
118
+
119
+ ***As far as the law allows, the software comes as is, without
120
+ any warranty or condition, and the licensor will not be liable
121
+ to you for any damages arising out of these terms or the use
122
+ or nature of the software, under any kind of legal claim.***
123
+
124
+ ## Definitions
125
+
126
+ The **licensor** is the individual or entity offering these
127
+ terms, and the **software** is the software the licensor makes
128
+ available under these terms.
129
+
130
+ **You** refers to the individual or entity agreeing to these
131
+ terms.
132
+
133
+ **Your company** is any legal entity, sole proprietorship,
134
+ or other kind of organization that you work for, plus all
135
+ organizations that have control over, are under the control of,
136
+ or are under common control with that organization. **Control**
137
+ means ownership of substantially all the assets of an entity,
138
+ or the power to direct its management and policies by vote,
139
+ contract, or otherwise. Control can be direct or indirect.
140
+
141
+ **Your licenses** are all the licenses granted to you for the
142
+ software under these terms.
143
+
144
+ **Use** means anything you do with the software requiring one
145
+ of your licenses.