tokenfit 1.0.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tokenfit-1.0.0/LICENSE +21 -0
- tokenfit-1.0.0/PKG-INFO +223 -0
- tokenfit-1.0.0/README.md +171 -0
- tokenfit-1.0.0/pyproject.toml +3 -0
- tokenfit-1.0.0/setup.cfg +4 -0
- tokenfit-1.0.0/setup.py +66 -0
- tokenfit-1.0.0/tests/test_pipeline.py +63 -0
- tokenfit-1.0.0/tokenfit/__init__.py +9 -0
- tokenfit-1.0.0/tokenfit/budget.py +34 -0
- tokenfit-1.0.0/tokenfit/cli.py +207 -0
- tokenfit-1.0.0/tokenfit/eval/__init__.py +0 -0
- tokenfit-1.0.0/tokenfit/eval/dataset/__init__.py +0 -0
- tokenfit-1.0.0/tokenfit/eval/dataset/questions.yaml +18 -0
- tokenfit-1.0.0/tokenfit/eval/harness.py +189 -0
- tokenfit-1.0.0/tokenfit/index.py +61 -0
- tokenfit-1.0.0/tokenfit/ingest.py +183 -0
- tokenfit-1.0.0/tokenfit/models.py +75 -0
- tokenfit-1.0.0/tokenfit/pack.py +87 -0
- tokenfit-1.0.0/tokenfit/retrieve.py +26 -0
- tokenfit-1.0.0/tokenfit.egg-info/PKG-INFO +223 -0
- tokenfit-1.0.0/tokenfit.egg-info/SOURCES.txt +23 -0
- tokenfit-1.0.0/tokenfit.egg-info/dependency_links.txt +1 -0
- tokenfit-1.0.0/tokenfit.egg-info/entry_points.txt +3 -0
- tokenfit-1.0.0/tokenfit.egg-info/requires.txt +15 -0
- tokenfit-1.0.0/tokenfit.egg-info/top_level.txt +1 -0
tokenfit-1.0.0/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Shubham Divakar
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
tokenfit-1.0.0/PKG-INFO
ADDED
|
@@ -0,0 +1,223 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: tokenfit
|
|
3
|
+
Version: 1.0.0
|
|
4
|
+
Summary: Fit your whole repo into any small model's token window — context selection for free/small LLMs.
|
|
5
|
+
Home-page: https://github.com/shubham10divakar/tokenfit
|
|
6
|
+
Author: Shubham Divakar
|
|
7
|
+
Author-email: shubham.divakar@gmail.com
|
|
8
|
+
License: MIT
|
|
9
|
+
Project-URL: Source, https://github.com/shubham10divakar/tokenfit
|
|
10
|
+
Project-URL: Issues, https://github.com/shubham10divakar/tokenfit/issues
|
|
11
|
+
Project-URL: Examples, https://github.com/shubham10divakar/tokenfit/blob/main/EXAMPLES.md
|
|
12
|
+
Keywords: llm,rag,context,huggingface,coding-agent,retrieval
|
|
13
|
+
Classifier: Development Status :: 5 - Production/Stable
|
|
14
|
+
Classifier: Intended Audience :: Developers
|
|
15
|
+
Classifier: Operating System :: OS Independent
|
|
16
|
+
Classifier: Programming Language :: Python :: 3
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
20
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
21
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
22
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
23
|
+
Requires-Python: >=3.9
|
|
24
|
+
Description-Content-Type: text/markdown
|
|
25
|
+
License-File: LICENSE
|
|
26
|
+
Requires-Dist: huggingface_hub>=0.25.0
|
|
27
|
+
Requires-Dist: transformers>=4.44.0
|
|
28
|
+
Requires-Dist: sentence-transformers>=3.0.0
|
|
29
|
+
Requires-Dist: numpy>=1.24.0
|
|
30
|
+
Requires-Dist: pyyaml>=6.0
|
|
31
|
+
Provides-Extra: hybrid
|
|
32
|
+
Requires-Dist: rank-bm25>=0.2.2; extra == "hybrid"
|
|
33
|
+
Provides-Extra: chroma
|
|
34
|
+
Requires-Dist: chromadb>=0.5.0; extra == "chroma"
|
|
35
|
+
Provides-Extra: dev
|
|
36
|
+
Requires-Dist: pytest>=8.0; extra == "dev"
|
|
37
|
+
Requires-Dist: build>=1.2; extra == "dev"
|
|
38
|
+
Dynamic: author
|
|
39
|
+
Dynamic: author-email
|
|
40
|
+
Dynamic: classifier
|
|
41
|
+
Dynamic: description
|
|
42
|
+
Dynamic: description-content-type
|
|
43
|
+
Dynamic: home-page
|
|
44
|
+
Dynamic: keywords
|
|
45
|
+
Dynamic: license
|
|
46
|
+
Dynamic: license-file
|
|
47
|
+
Dynamic: project-url
|
|
48
|
+
Dynamic: provides-extra
|
|
49
|
+
Dynamic: requires-dist
|
|
50
|
+
Dynamic: requires-python
|
|
51
|
+
Dynamic: summary
|
|
52
|
+
|
|
53
|
+
# tokenfit
|
|
54
|
+
|
|
55
|
+
> **Fit your whole repo into any small model's token window.**
|
|
56
|
+
|
|
57
|
+
`tokenfit` is a **context-selection pre-processor** for free / small LLMs. Point it at
|
|
58
|
+
your project's markdown + code, ask a question, and it returns the *most relevant* slice
|
|
59
|
+
of your codebase — packed to fit a tight token budget — so a 7B model with an 8k window
|
|
60
|
+
answers as if it read the whole repo.
|
|
61
|
+
|
|
62
|
+
[](https://pypi.org/project/tokenfit/)
|
|
63
|
+
[](https://pepy.tech/project/tokenfit)
|
|
64
|
+
[](https://pypi.org/project/tokenfit/)
|
|
65
|
+
[](./LICENSE)
|
|
66
|
+
[](https://www.python.org/)
|
|
67
|
+
|
|
68
|
+
---
|
|
69
|
+
|
|
70
|
+
## Why
|
|
71
|
+
|
|
72
|
+
GitHub Copilot moved to usage-based token billing (June 2026), pushing developers toward
|
|
73
|
+
cheap open-source models on HuggingFace. But free/small models have **tiny context
|
|
74
|
+
windows** — dump your whole repo at them and they choke or truncate.
|
|
75
|
+
|
|
76
|
+
Existing tools (`tiny-agents`, `AGENTS.md`, `SKILL.md`) inject context *raw*. tokenfit is
|
|
77
|
+
the missing **retrieval layer** that makes those models punch above their weight. It's a
|
|
78
|
+
pre-processor: it builds the optimal prompt, then hands it to your model or agent
|
|
79
|
+
framework — it does **not** trust a weak model to call a retrieval tool correctly.
|
|
80
|
+
|
|
81
|
+
## How it works
|
|
82
|
+
|
|
83
|
+
```
|
|
84
|
+
query
|
|
85
|
+
│
|
|
86
|
+
▼
|
|
87
|
+
1. INGEST load AGENTS.md / SKILL.md / docs / code → chunk
|
|
88
|
+
2. INDEX embed chunks (BAAI/bge-small, local) → persist
|
|
89
|
+
3. RETRIEVE cosine top-k semantic search
|
|
90
|
+
4. BUDGET tokenizer-aware fit to N tokens + citations
|
|
91
|
+
│
|
|
92
|
+
▼
|
|
93
|
+
optimal prompt → any HuggingFace model
|
|
94
|
+
```
|
|
95
|
+
|
|
96
|
+
## Does it actually beat just dumping the files? Yes.
|
|
97
|
+
|
|
98
|
+
We ran the free **Qwen2.5-Coder-7B** against [`psf/requests`](https://github.com/psf/requests)
|
|
99
|
+
— **~150,000 tokens** of code, ~19× bigger than an 8000-token budget — comparing two ways
|
|
100
|
+
of feeding the model, across 10 questions (`tokenfit eval --compare`):
|
|
101
|
+
|
|
102
|
+
- **Naive** — concatenate the files and truncate to 8000 tokens.
|
|
103
|
+
- **Retrieved** — let tokenfit pick the relevant ~2000 tokens.
|
|
104
|
+
|
|
105
|
+
| | Naive (8000 tok) | **tokenfit retrieved (~2000 tok)** |
|
|
106
|
+
|---|---|---|
|
|
107
|
+
| Wins (of 10) | 1 (1 tie) | **9** |
|
|
108
|
+
| Cites the right source file | rarely | **almost always** |
|
|
109
|
+
| Tokens per call | 8000 | **~2000 (≈4× cheaper)** |
|
|
110
|
+
| Failure modes | "context doesn't provide info", quoted the changelog, once **answered in Chinese**, once **invented a class that doesn't exist** | accurate, code-grounded answers |
|
|
111
|
+
|
|
112
|
+
**Why naive collapses:** the whole 8000-token budget filled up with `HISTORY.md` (the
|
|
113
|
+
changelog) and never reached a single source file. tokenfit semantically skips the noise
|
|
114
|
+
and fetches the right module — so it's **both more accurate _and_ ~4× cheaper per call.**
|
|
115
|
+
|
|
116
|
+
> 📂 Full side-by-side transcripts in **[EXAMPLES.md](./EXAMPLES.md)**.
|
|
117
|
+
|
|
118
|
+
## Install
|
|
119
|
+
|
|
120
|
+
```bash
|
|
121
|
+
pip install tokenfit
|
|
122
|
+
```
|
|
123
|
+
|
|
124
|
+
Set a HuggingFace token with **"Make calls to Inference Providers"** permission:
|
|
125
|
+
|
|
126
|
+
```bash
|
|
127
|
+
export HF_TOKEN=hf_your_token_here # bash
|
|
128
|
+
$env:HF_TOKEN = "hf_your_token_here" # PowerShell
|
|
129
|
+
```
|
|
130
|
+
|
|
131
|
+
Verify it before you run anything:
|
|
132
|
+
|
|
133
|
+
```bash
|
|
134
|
+
tokenfit auth # checks the token is set and valid
|
|
135
|
+
tokenfit auth --ping # also makes a 1-token call to confirm inference access
|
|
136
|
+
```
|
|
137
|
+
|
|
138
|
+
## Quickstart (CLI)
|
|
139
|
+
|
|
140
|
+
The fastest way — no Python required:
|
|
141
|
+
|
|
142
|
+
```bash
|
|
143
|
+
# Ask a question: tokenfit retrieves the right context AND gets the model's answer
|
|
144
|
+
tokenfit ask "How does the auth flow work?" --repo ./my-project
|
|
145
|
+
|
|
146
|
+
# Just print the selected context (no model call, pipe it anywhere)
|
|
147
|
+
tokenfit context "auth flow" --repo ./my-project
|
|
148
|
+
|
|
149
|
+
# Pre-build / refresh the index for a repo
|
|
150
|
+
tokenfit index --repo ./my-project --rebuild
|
|
151
|
+
```
|
|
152
|
+
|
|
153
|
+
Useful flags: `--budget 8000` (token budget), `--top-k 12` (chunks retrieved),
|
|
154
|
+
`--model Qwen/Qwen2.5-Coder-7B-Instruct` (any HF model), `--rebuild` (re-index).
|
|
155
|
+
Progress prints to stderr, so the answer/context on stdout stays clean for piping.
|
|
156
|
+
|
|
157
|
+
tokenfit indexes common source + doc file types out of the box (Python, JS/TS, Go,
|
|
158
|
+
Rust, Java, C#, C/C++, Ruby, PHP, Swift, GDScript, shell, plus md/yaml/toml/json…).
|
|
159
|
+
Indexing a different language? Add globs with `--include`:
|
|
160
|
+
|
|
161
|
+
```bash
|
|
162
|
+
tokenfit ask "How does combat work?" --repo ./my-godot-game --include "*.gd" --rebuild
|
|
163
|
+
```
|
|
164
|
+
|
|
165
|
+
📂 **See [EXAMPLES.md](./EXAMPLES.md)** for real output — a free 7B model explaining a
|
|
166
|
+
Godot game's movement code, grounded in the actual source.
|
|
167
|
+
|
|
168
|
+
## Quickstart (Python)
|
|
169
|
+
|
|
170
|
+
```python
|
|
171
|
+
from tokenfit import pack
|
|
172
|
+
from tokenfit.models import TokenfitModel
|
|
173
|
+
|
|
174
|
+
# Select the best ~8k tokens of context for a question
|
|
175
|
+
context = pack.build(
|
|
176
|
+
query="How does the auth flow work?",
|
|
177
|
+
repo="./my-project",
|
|
178
|
+
budget=8000,
|
|
179
|
+
)
|
|
180
|
+
|
|
181
|
+
# Feed it to any small HF model
|
|
182
|
+
model = TokenfitModel(model="Qwen/Qwen2.5-Coder-7B-Instruct")
|
|
183
|
+
answer = model.chat(
|
|
184
|
+
system="You are a coding assistant for THIS project. Use only the provided context.",
|
|
185
|
+
user=f"{context}\n\nQUESTION: How does the auth flow work?",
|
|
186
|
+
)
|
|
187
|
+
print(answer)
|
|
188
|
+
```
|
|
189
|
+
|
|
190
|
+
## Validation harness
|
|
191
|
+
|
|
192
|
+
tokenfit ships with an eval harness that compares **naive truncation** vs **retrieved
|
|
193
|
+
context** on your own repo — the experiment that proves the approach is worth it:
|
|
194
|
+
|
|
195
|
+
```bash
|
|
196
|
+
tokenfit eval --repo ./my-project --mode naive
|
|
197
|
+
tokenfit eval --repo ./my-project --mode retrieved
|
|
198
|
+
```
|
|
199
|
+
|
|
200
|
+
Each run writes a graded comparison sheet to `tokenfit/eval/results/`. Score the answers
|
|
201
|
+
1–5 and compare. Edit `tokenfit/eval/dataset/questions.yaml` to fit your project.
|
|
202
|
+
|
|
203
|
+
## Roadmap
|
|
204
|
+
|
|
205
|
+
- [x] **Phase 0** — eval harness + naive baseline
|
|
206
|
+
- [x] **Phase 1** — semantic retrieval (chunk → embed → retrieve → budget)
|
|
207
|
+
- [ ] **Phase 2** — hybrid BM25 + rerank + summarization for oversized chunks
|
|
208
|
+
- [ ] **Phase 3** — `tiny-agents` / `smolagents` adapters, optional Chroma backend
|
|
209
|
+
|
|
210
|
+
See [`idea.md`](./idea.md) for the rationale and [`plan.md`](./plan.md) for the full plan.
|
|
211
|
+
|
|
212
|
+
## Development
|
|
213
|
+
|
|
214
|
+
```bash
|
|
215
|
+
git clone https://github.com/shubham10divakar/tokenfit
|
|
216
|
+
cd tokenfit
|
|
217
|
+
pip install -e ".[dev]"
|
|
218
|
+
python -m tests.test_pipeline # dep-free regression test
|
|
219
|
+
```
|
|
220
|
+
|
|
221
|
+
## License
|
|
222
|
+
|
|
223
|
+
MIT — see [LICENSE](./LICENSE).
|
tokenfit-1.0.0/README.md
ADDED
|
@@ -0,0 +1,171 @@
|
|
|
1
|
+
# tokenfit
|
|
2
|
+
|
|
3
|
+
> **Fit your whole repo into any small model's token window.**
|
|
4
|
+
|
|
5
|
+
`tokenfit` is a **context-selection pre-processor** for free / small LLMs. Point it at
|
|
6
|
+
your project's markdown + code, ask a question, and it returns the *most relevant* slice
|
|
7
|
+
of your codebase — packed to fit a tight token budget — so a 7B model with an 8k window
|
|
8
|
+
answers as if it read the whole repo.
|
|
9
|
+
|
|
10
|
+
[](https://pypi.org/project/tokenfit/)
|
|
11
|
+
[](https://pepy.tech/project/tokenfit)
|
|
12
|
+
[](https://pypi.org/project/tokenfit/)
|
|
13
|
+
[](./LICENSE)
|
|
14
|
+
[](https://www.python.org/)
|
|
15
|
+
|
|
16
|
+
---
|
|
17
|
+
|
|
18
|
+
## Why
|
|
19
|
+
|
|
20
|
+
GitHub Copilot moved to usage-based token billing (June 2026), pushing developers toward
|
|
21
|
+
cheap open-source models on HuggingFace. But free/small models have **tiny context
|
|
22
|
+
windows** — dump your whole repo at them and they choke or truncate.
|
|
23
|
+
|
|
24
|
+
Existing tools (`tiny-agents`, `AGENTS.md`, `SKILL.md`) inject context *raw*. tokenfit is
|
|
25
|
+
the missing **retrieval layer** that makes those models punch above their weight. It's a
|
|
26
|
+
pre-processor: it builds the optimal prompt, then hands it to your model or agent
|
|
27
|
+
framework — it does **not** trust a weak model to call a retrieval tool correctly.
|
|
28
|
+
|
|
29
|
+
## How it works
|
|
30
|
+
|
|
31
|
+
```
|
|
32
|
+
query
|
|
33
|
+
│
|
|
34
|
+
▼
|
|
35
|
+
1. INGEST load AGENTS.md / SKILL.md / docs / code → chunk
|
|
36
|
+
2. INDEX embed chunks (BAAI/bge-small, local) → persist
|
|
37
|
+
3. RETRIEVE cosine top-k semantic search
|
|
38
|
+
4. BUDGET tokenizer-aware fit to N tokens + citations
|
|
39
|
+
│
|
|
40
|
+
▼
|
|
41
|
+
optimal prompt → any HuggingFace model
|
|
42
|
+
```
|
|
43
|
+
|
|
44
|
+
## Does it actually beat just dumping the files? Yes.
|
|
45
|
+
|
|
46
|
+
We ran the free **Qwen2.5-Coder-7B** against [`psf/requests`](https://github.com/psf/requests)
|
|
47
|
+
— **~150,000 tokens** of code, ~19× bigger than an 8000-token budget — comparing two ways
|
|
48
|
+
of feeding the model, across 10 questions (`tokenfit eval --compare`):
|
|
49
|
+
|
|
50
|
+
- **Naive** — concatenate the files and truncate to 8000 tokens.
|
|
51
|
+
- **Retrieved** — let tokenfit pick the relevant ~2000 tokens.
|
|
52
|
+
|
|
53
|
+
| | Naive (8000 tok) | **tokenfit retrieved (~2000 tok)** |
|
|
54
|
+
|---|---|---|
|
|
55
|
+
| Wins (of 10) | 1 (1 tie) | **9** |
|
|
56
|
+
| Cites the right source file | rarely | **almost always** |
|
|
57
|
+
| Tokens per call | 8000 | **~2000 (≈4× cheaper)** |
|
|
58
|
+
| Failure modes | "context doesn't provide info", quoted the changelog, once **answered in Chinese**, once **invented a class that doesn't exist** | accurate, code-grounded answers |
|
|
59
|
+
|
|
60
|
+
**Why naive collapses:** the whole 8000-token budget filled up with `HISTORY.md` (the
|
|
61
|
+
changelog) and never reached a single source file. tokenfit semantically skips the noise
|
|
62
|
+
and fetches the right module — so it's **both more accurate _and_ ~4× cheaper per call.**
|
|
63
|
+
|
|
64
|
+
> 📂 Full side-by-side transcripts in **[EXAMPLES.md](./EXAMPLES.md)**.
|
|
65
|
+
|
|
66
|
+
## Install
|
|
67
|
+
|
|
68
|
+
```bash
|
|
69
|
+
pip install tokenfit
|
|
70
|
+
```
|
|
71
|
+
|
|
72
|
+
Set a HuggingFace token with **"Make calls to Inference Providers"** permission:
|
|
73
|
+
|
|
74
|
+
```bash
|
|
75
|
+
export HF_TOKEN=hf_your_token_here # bash
|
|
76
|
+
$env:HF_TOKEN = "hf_your_token_here" # PowerShell
|
|
77
|
+
```
|
|
78
|
+
|
|
79
|
+
Verify it before you run anything:
|
|
80
|
+
|
|
81
|
+
```bash
|
|
82
|
+
tokenfit auth # checks the token is set and valid
|
|
83
|
+
tokenfit auth --ping # also makes a 1-token call to confirm inference access
|
|
84
|
+
```
|
|
85
|
+
|
|
86
|
+
## Quickstart (CLI)
|
|
87
|
+
|
|
88
|
+
The fastest way — no Python required:
|
|
89
|
+
|
|
90
|
+
```bash
|
|
91
|
+
# Ask a question: tokenfit retrieves the right context AND gets the model's answer
|
|
92
|
+
tokenfit ask "How does the auth flow work?" --repo ./my-project
|
|
93
|
+
|
|
94
|
+
# Just print the selected context (no model call, pipe it anywhere)
|
|
95
|
+
tokenfit context "auth flow" --repo ./my-project
|
|
96
|
+
|
|
97
|
+
# Pre-build / refresh the index for a repo
|
|
98
|
+
tokenfit index --repo ./my-project --rebuild
|
|
99
|
+
```
|
|
100
|
+
|
|
101
|
+
Useful flags: `--budget 8000` (token budget), `--top-k 12` (chunks retrieved),
|
|
102
|
+
`--model Qwen/Qwen2.5-Coder-7B-Instruct` (any HF model), `--rebuild` (re-index).
|
|
103
|
+
Progress prints to stderr, so the answer/context on stdout stays clean for piping.
|
|
104
|
+
|
|
105
|
+
tokenfit indexes common source + doc file types out of the box (Python, JS/TS, Go,
|
|
106
|
+
Rust, Java, C#, C/C++, Ruby, PHP, Swift, GDScript, shell, plus md/yaml/toml/json…).
|
|
107
|
+
Indexing a different language? Add globs with `--include`:
|
|
108
|
+
|
|
109
|
+
```bash
|
|
110
|
+
tokenfit ask "How does combat work?" --repo ./my-godot-game --include "*.gd" --rebuild
|
|
111
|
+
```
|
|
112
|
+
|
|
113
|
+
📂 **See [EXAMPLES.md](./EXAMPLES.md)** for real output — a free 7B model explaining a
|
|
114
|
+
Godot game's movement code, grounded in the actual source.
|
|
115
|
+
|
|
116
|
+
## Quickstart (Python)
|
|
117
|
+
|
|
118
|
+
```python
|
|
119
|
+
from tokenfit import pack
|
|
120
|
+
from tokenfit.models import TokenfitModel
|
|
121
|
+
|
|
122
|
+
# Select the best ~8k tokens of context for a question
|
|
123
|
+
context = pack.build(
|
|
124
|
+
query="How does the auth flow work?",
|
|
125
|
+
repo="./my-project",
|
|
126
|
+
budget=8000,
|
|
127
|
+
)
|
|
128
|
+
|
|
129
|
+
# Feed it to any small HF model
|
|
130
|
+
model = TokenfitModel(model="Qwen/Qwen2.5-Coder-7B-Instruct")
|
|
131
|
+
answer = model.chat(
|
|
132
|
+
system="You are a coding assistant for THIS project. Use only the provided context.",
|
|
133
|
+
user=f"{context}\n\nQUESTION: How does the auth flow work?",
|
|
134
|
+
)
|
|
135
|
+
print(answer)
|
|
136
|
+
```
|
|
137
|
+
|
|
138
|
+
## Validation harness
|
|
139
|
+
|
|
140
|
+
tokenfit ships with an eval harness that compares **naive truncation** vs **retrieved
|
|
141
|
+
context** on your own repo — the experiment that proves the approach is worth it:
|
|
142
|
+
|
|
143
|
+
```bash
|
|
144
|
+
tokenfit eval --repo ./my-project --mode naive
|
|
145
|
+
tokenfit eval --repo ./my-project --mode retrieved
|
|
146
|
+
```
|
|
147
|
+
|
|
148
|
+
Each run writes a graded comparison sheet to `tokenfit/eval/results/`. Score the answers
|
|
149
|
+
1–5 and compare. Edit `tokenfit/eval/dataset/questions.yaml` to fit your project.
|
|
150
|
+
|
|
151
|
+
## Roadmap
|
|
152
|
+
|
|
153
|
+
- [x] **Phase 0** — eval harness + naive baseline
|
|
154
|
+
- [x] **Phase 1** — semantic retrieval (chunk → embed → retrieve → budget)
|
|
155
|
+
- [ ] **Phase 2** — hybrid BM25 + rerank + summarization for oversized chunks
|
|
156
|
+
- [ ] **Phase 3** — `tiny-agents` / `smolagents` adapters, optional Chroma backend
|
|
157
|
+
|
|
158
|
+
See [`idea.md`](./idea.md) for the rationale and [`plan.md`](./plan.md) for the full plan.
|
|
159
|
+
|
|
160
|
+
## Development
|
|
161
|
+
|
|
162
|
+
```bash
|
|
163
|
+
git clone https://github.com/shubham10divakar/tokenfit
|
|
164
|
+
cd tokenfit
|
|
165
|
+
pip install -e ".[dev]"
|
|
166
|
+
python -m tests.test_pipeline # dep-free regression test
|
|
167
|
+
```
|
|
168
|
+
|
|
169
|
+
## License
|
|
170
|
+
|
|
171
|
+
MIT — see [LICENSE](./LICENSE).
|
tokenfit-1.0.0/setup.cfg
ADDED
tokenfit-1.0.0/setup.py
ADDED
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
"""Packaging for tokenfit.
|
|
2
|
+
|
|
3
|
+
Build: python -m build (produces wheel + sdist in dist/)
|
|
4
|
+
Install: pip install . (or `pip install -e .` for development)
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
|
|
9
|
+
from setuptools import find_packages, setup
|
|
10
|
+
|
|
11
|
+
ROOT = Path(__file__).parent
|
|
12
|
+
LONG_DESCRIPTION = (ROOT / "README.md").read_text(encoding="utf-8")
|
|
13
|
+
|
|
14
|
+
setup(
|
|
15
|
+
name="tokenfit",
|
|
16
|
+
version="1.0.0",
|
|
17
|
+
description="Fit your whole repo into any small model's token window — "
|
|
18
|
+
"context selection for free/small LLMs.",
|
|
19
|
+
long_description=LONG_DESCRIPTION,
|
|
20
|
+
long_description_content_type="text/markdown",
|
|
21
|
+
author="Shubham Divakar",
|
|
22
|
+
author_email="shubham.divakar@gmail.com",
|
|
23
|
+
url="https://github.com/shubham10divakar/tokenfit",
|
|
24
|
+
project_urls={
|
|
25
|
+
"Source": "https://github.com/shubham10divakar/tokenfit",
|
|
26
|
+
"Issues": "https://github.com/shubham10divakar/tokenfit/issues",
|
|
27
|
+
"Examples": "https://github.com/shubham10divakar/tokenfit/blob/main/EXAMPLES.md",
|
|
28
|
+
},
|
|
29
|
+
license="MIT",
|
|
30
|
+
packages=find_packages(include=["tokenfit", "tokenfit.*"]),
|
|
31
|
+
include_package_data=True,
|
|
32
|
+
package_data={"tokenfit": ["eval/dataset/*.yaml"]},
|
|
33
|
+
python_requires=">=3.9",
|
|
34
|
+
install_requires=[
|
|
35
|
+
"huggingface_hub>=0.25.0",
|
|
36
|
+
"transformers>=4.44.0",
|
|
37
|
+
"sentence-transformers>=3.0.0",
|
|
38
|
+
"numpy>=1.24.0",
|
|
39
|
+
"pyyaml>=6.0",
|
|
40
|
+
],
|
|
41
|
+
extras_require={
|
|
42
|
+
# Phase 2 features (hybrid retrieval / scalable store)
|
|
43
|
+
"hybrid": ["rank-bm25>=0.2.2"],
|
|
44
|
+
"chroma": ["chromadb>=0.5.0"],
|
|
45
|
+
"dev": ["pytest>=8.0", "build>=1.2"],
|
|
46
|
+
},
|
|
47
|
+
entry_points={
|
|
48
|
+
"console_scripts": [
|
|
49
|
+
"tokenfit=tokenfit.cli:main",
|
|
50
|
+
"tokenfit-eval=tokenfit.eval.harness:main",
|
|
51
|
+
],
|
|
52
|
+
},
|
|
53
|
+
keywords=["llm", "rag", "context", "huggingface", "coding-agent", "retrieval"],
|
|
54
|
+
classifiers=[
|
|
55
|
+
"Development Status :: 5 - Production/Stable",
|
|
56
|
+
"Intended Audience :: Developers",
|
|
57
|
+
"Operating System :: OS Independent",
|
|
58
|
+
"Programming Language :: Python :: 3",
|
|
59
|
+
"Programming Language :: Python :: 3.9",
|
|
60
|
+
"Programming Language :: Python :: 3.10",
|
|
61
|
+
"Programming Language :: Python :: 3.11",
|
|
62
|
+
"Programming Language :: Python :: 3.12",
|
|
63
|
+
"Topic :: Software Development :: Libraries :: Python Modules",
|
|
64
|
+
"Topic :: Scientific/Engineering :: Artificial Intelligence",
|
|
65
|
+
],
|
|
66
|
+
)
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
"""Dependency-free regression test for the Phase 1 pipeline.
|
|
2
|
+
|
|
3
|
+
Uses a fake keyword-based embedder so it runs without torch / sentence-transformers /
|
|
4
|
+
network, yet still exercises chunk -> index -> retrieve -> budget end to end.
|
|
5
|
+
|
|
6
|
+
Run: python -m tests.test_pipeline (or: pytest tests/)
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
import tempfile
|
|
12
|
+
|
|
13
|
+
import numpy as np
|
|
14
|
+
|
|
15
|
+
from tokenfit import budget, index, retrieve
|
|
16
|
+
from tokenfit.ingest import Document, chunk_documents
|
|
17
|
+
|
|
18
|
+
_VOCAB = ["auth", "login", "token", "database", "persist", "config", "test", "endpoint"]
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def _fake_embed(texts, model=index.EMBED_MODEL):
|
|
22
|
+
out = []
|
|
23
|
+
for t in texts:
|
|
24
|
+
tl = t.lower()
|
|
25
|
+
v = np.array([tl.count(w) for w in _VOCAB], dtype=np.float32)
|
|
26
|
+
n = np.linalg.norm(v)
|
|
27
|
+
out.append(v / n if n else v)
|
|
28
|
+
return np.vstack(out).astype(np.float32)
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
class _FakeModel:
|
|
32
|
+
def count_tokens(self, text: str) -> int:
|
|
33
|
+
return max(1, len(text) // 4)
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def test_pipeline(monkeypatch=None):
|
|
37
|
+
# patch the embedder in both modules that reference it
|
|
38
|
+
index.embed_texts = _fake_embed
|
|
39
|
+
retrieve.embed_texts = _fake_embed
|
|
40
|
+
|
|
41
|
+
docs = [
|
|
42
|
+
Document("auth.py", "def login(user):\n # validate auth token\n return token"),
|
|
43
|
+
Document("db.py", "def save(rec):\n # persist to database\n database.write(rec)"),
|
|
44
|
+
Document("conf.py", "CONFIG = {}\n# load config values here"),
|
|
45
|
+
]
|
|
46
|
+
chunks = chunk_documents(docs, target_chars=400)
|
|
47
|
+
assert len(chunks) == 3
|
|
48
|
+
|
|
49
|
+
with tempfile.TemporaryDirectory() as d:
|
|
50
|
+
index.build_index(chunks, d)
|
|
51
|
+
assert index.index_exists(d)
|
|
52
|
+
|
|
53
|
+
hits = retrieve.retrieve("how does login auth token work", d, top_k=3)
|
|
54
|
+
assert hits[0].doc_path == "auth.py" # semantic ranking works
|
|
55
|
+
|
|
56
|
+
packed = budget.fit_to_budget(hits, _FakeModel(), budget=40)
|
|
57
|
+
assert _FakeModel().count_tokens(packed) <= 40 # budget respected
|
|
58
|
+
assert "### FILE: auth.py@" in packed # citations present
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
if __name__ == "__main__":
|
|
62
|
+
test_pipeline()
|
|
63
|
+
print("PASSED")
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
"""tokenfit — context-selection pre-processor for free/small LLMs.
|
|
2
|
+
|
|
3
|
+
Given a developer query + a project corpus (md files, code, vector DB), build the
|
|
4
|
+
optimal token-budgeted context to feed a small HuggingFace model.
|
|
5
|
+
|
|
6
|
+
See plan.md for the phased roadmap.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
__version__ = "1.0.0"
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
"""Budgeting — pack the highest-value chunks into a token window. (Phase 1)
|
|
2
|
+
|
|
3
|
+
Greedy fit by relevance order (chunks arrive already ranked). Phase 2 adds
|
|
4
|
+
summarization of oversized chunks instead of dropping them.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
from typing import TYPE_CHECKING
|
|
10
|
+
|
|
11
|
+
from tokenfit.ingest import Chunk
|
|
12
|
+
|
|
13
|
+
if TYPE_CHECKING: # only for type hints; avoids pulling the inference SDK
|
|
14
|
+
from tokenfit.models import TokenfitModel
|
|
15
|
+
|
|
16
|
+
_HEADER = "### FILE: {label}\n{text}"
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def fit_to_budget(chunks: list[Chunk], model: "TokenfitModel", budget: int) -> str:
|
|
20
|
+
"""Concatenate ranked chunks (with file citations) up to `budget` tokens.
|
|
21
|
+
|
|
22
|
+
Each chunk carries a `FILE: path@offset` header so the model can cite sources.
|
|
23
|
+
A chunk that doesn't fit is skipped (a later, smaller chunk may still fit).
|
|
24
|
+
"""
|
|
25
|
+
parts: list[str] = []
|
|
26
|
+
used = 0
|
|
27
|
+
for c in chunks:
|
|
28
|
+
block = _HEADER.format(label=c.label, text=c.text)
|
|
29
|
+
cost = model.count_tokens(block) + 2 # +2 for the joining newlines
|
|
30
|
+
if used + cost > budget:
|
|
31
|
+
continue
|
|
32
|
+
parts.append(block)
|
|
33
|
+
used += cost
|
|
34
|
+
return "\n\n".join(parts)
|