tokenfit 1.0.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
tokenfit-1.0.0/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Shubham Divakar
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,223 @@
1
+ Metadata-Version: 2.4
2
+ Name: tokenfit
3
+ Version: 1.0.0
4
+ Summary: Fit your whole repo into any small model's token window — context selection for free/small LLMs.
5
+ Home-page: https://github.com/shubham10divakar/tokenfit
6
+ Author: Shubham Divakar
7
+ Author-email: shubham.divakar@gmail.com
8
+ License: MIT
9
+ Project-URL: Source, https://github.com/shubham10divakar/tokenfit
10
+ Project-URL: Issues, https://github.com/shubham10divakar/tokenfit/issues
11
+ Project-URL: Examples, https://github.com/shubham10divakar/tokenfit/blob/main/EXAMPLES.md
12
+ Keywords: llm,rag,context,huggingface,coding-agent,retrieval
13
+ Classifier: Development Status :: 5 - Production/Stable
14
+ Classifier: Intended Audience :: Developers
15
+ Classifier: Operating System :: OS Independent
16
+ Classifier: Programming Language :: Python :: 3
17
+ Classifier: Programming Language :: Python :: 3.9
18
+ Classifier: Programming Language :: Python :: 3.10
19
+ Classifier: Programming Language :: Python :: 3.11
20
+ Classifier: Programming Language :: Python :: 3.12
21
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
22
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
23
+ Requires-Python: >=3.9
24
+ Description-Content-Type: text/markdown
25
+ License-File: LICENSE
26
+ Requires-Dist: huggingface_hub>=0.25.0
27
+ Requires-Dist: transformers>=4.44.0
28
+ Requires-Dist: sentence-transformers>=3.0.0
29
+ Requires-Dist: numpy>=1.24.0
30
+ Requires-Dist: pyyaml>=6.0
31
+ Provides-Extra: hybrid
32
+ Requires-Dist: rank-bm25>=0.2.2; extra == "hybrid"
33
+ Provides-Extra: chroma
34
+ Requires-Dist: chromadb>=0.5.0; extra == "chroma"
35
+ Provides-Extra: dev
36
+ Requires-Dist: pytest>=8.0; extra == "dev"
37
+ Requires-Dist: build>=1.2; extra == "dev"
38
+ Dynamic: author
39
+ Dynamic: author-email
40
+ Dynamic: classifier
41
+ Dynamic: description
42
+ Dynamic: description-content-type
43
+ Dynamic: home-page
44
+ Dynamic: keywords
45
+ Dynamic: license
46
+ Dynamic: license-file
47
+ Dynamic: project-url
48
+ Dynamic: provides-extra
49
+ Dynamic: requires-dist
50
+ Dynamic: requires-python
51
+ Dynamic: summary
52
+
53
+ # tokenfit
54
+
55
+ > **Fit your whole repo into any small model's token window.**
56
+
57
+ `tokenfit` is a **context-selection pre-processor** for free / small LLMs. Point it at
58
+ your project's markdown + code, ask a question, and it returns the *most relevant* slice
59
+ of your codebase — packed to fit a tight token budget — so a 7B model with an 8k window
60
+ answers as if it read the whole repo.
61
+
62
+ [![PyPI version](https://img.shields.io/pypi/v/tokenfit)](https://pypi.org/project/tokenfit/)
63
+ [![Downloads](https://static.pepy.tech/badge/tokenfit)](https://pepy.tech/project/tokenfit)
64
+ [![Downloads/month](https://img.shields.io/pypi/dm/tokenfit)](https://pypi.org/project/tokenfit/)
65
+ [![License: MIT](https://img.shields.io/badge/License-MIT-green.svg)](./LICENSE)
66
+ [![Python](https://img.shields.io/badge/python-3.9%2B-blue)](https://www.python.org/)
67
+
68
+ ---
69
+
70
+ ## Why
71
+
72
+ GitHub Copilot moved to usage-based token billing (June 2026), pushing developers toward
73
+ cheap open-source models on HuggingFace. But free/small models have **tiny context
74
+ windows** — dump your whole repo at them and they choke or truncate.
75
+
76
+ Existing tools (`tiny-agents`, `AGENTS.md`, `SKILL.md`) inject context *raw*. tokenfit is
77
+ the missing **retrieval layer** that makes those models punch above their weight. It's a
78
+ pre-processor: it builds the optimal prompt, then hands it to your model or agent
79
+ framework — it does **not** trust a weak model to call a retrieval tool correctly.
80
+
81
+ ## How it works
82
+
83
+ ```
84
+ query
85
+
86
+
87
+ 1. INGEST load AGENTS.md / SKILL.md / docs / code → chunk
88
+ 2. INDEX embed chunks (BAAI/bge-small, local) → persist
89
+ 3. RETRIEVE cosine top-k semantic search
90
+ 4. BUDGET tokenizer-aware fit to N tokens + citations
91
+
92
+
93
+ optimal prompt → any HuggingFace model
94
+ ```
95
+
96
+ ## Does it actually beat just dumping the files? Yes.
97
+
98
+ We ran the free **Qwen2.5-Coder-7B** against [`psf/requests`](https://github.com/psf/requests)
99
+ — **~150,000 tokens** of code, ~19× bigger than an 8000-token budget — comparing two ways
100
+ of feeding the model, across 10 questions (`tokenfit eval --compare`):
101
+
102
+ - **Naive** — concatenate the files and truncate to 8000 tokens.
103
+ - **Retrieved** — let tokenfit pick the relevant ~2000 tokens.
104
+
105
+ | | Naive (8000 tok) | **tokenfit retrieved (~2000 tok)** |
106
+ |---|---|---|
107
+ | Wins (of 10) | 1 (1 tie) | **9** |
108
+ | Cites the right source file | rarely | **almost always** |
109
+ | Tokens per call | 8000 | **~2000 (≈4× cheaper)** |
110
+ | Failure modes | "context doesn't provide info", quoted the changelog, once **answered in Chinese**, once **invented a class that doesn't exist** | accurate, code-grounded answers |
111
+
112
+ **Why naive collapses:** the whole 8000-token budget filled up with `HISTORY.md` (the
113
+ changelog) and never reached a single source file. tokenfit semantically skips the noise
114
+ and fetches the right module — so it's **both more accurate _and_ ~4× cheaper per call.**
115
+
116
+ > 📂 Full side-by-side transcripts in **[EXAMPLES.md](./EXAMPLES.md)**.
117
+
118
+ ## Install
119
+
120
+ ```bash
121
+ pip install tokenfit
122
+ ```
123
+
124
+ Set a HuggingFace token with **"Make calls to Inference Providers"** permission:
125
+
126
+ ```bash
127
+ export HF_TOKEN=hf_your_token_here # bash
128
+ $env:HF_TOKEN = "hf_your_token_here" # PowerShell
129
+ ```
130
+
131
+ Verify it before you run anything:
132
+
133
+ ```bash
134
+ tokenfit auth # checks the token is set and valid
135
+ tokenfit auth --ping # also makes a 1-token call to confirm inference access
136
+ ```
137
+
138
+ ## Quickstart (CLI)
139
+
140
+ The fastest way — no Python required:
141
+
142
+ ```bash
143
+ # Ask a question: tokenfit retrieves the right context AND gets the model's answer
144
+ tokenfit ask "How does the auth flow work?" --repo ./my-project
145
+
146
+ # Just print the selected context (no model call, pipe it anywhere)
147
+ tokenfit context "auth flow" --repo ./my-project
148
+
149
+ # Pre-build / refresh the index for a repo
150
+ tokenfit index --repo ./my-project --rebuild
151
+ ```
152
+
153
+ Useful flags: `--budget 8000` (token budget), `--top-k 12` (chunks retrieved),
154
+ `--model Qwen/Qwen2.5-Coder-7B-Instruct` (any HF model), `--rebuild` (re-index).
155
+ Progress prints to stderr, so the answer/context on stdout stays clean for piping.
156
+
157
+ tokenfit indexes common source + doc file types out of the box (Python, JS/TS, Go,
158
+ Rust, Java, C#, C/C++, Ruby, PHP, Swift, GDScript, shell, plus md/yaml/toml/json…).
159
+ Indexing a different language? Add globs with `--include`:
160
+
161
+ ```bash
162
+ tokenfit ask "How does combat work?" --repo ./my-godot-game --include "*.gd" --rebuild
163
+ ```
164
+
165
+ 📂 **See [EXAMPLES.md](./EXAMPLES.md)** for real output — a free 7B model explaining a
166
+ Godot game's movement code, grounded in the actual source.
167
+
168
+ ## Quickstart (Python)
169
+
170
+ ```python
171
+ from tokenfit import pack
172
+ from tokenfit.models import TokenfitModel
173
+
174
+ # Select the best ~8k tokens of context for a question
175
+ context = pack.build(
176
+ query="How does the auth flow work?",
177
+ repo="./my-project",
178
+ budget=8000,
179
+ )
180
+
181
+ # Feed it to any small HF model
182
+ model = TokenfitModel(model="Qwen/Qwen2.5-Coder-7B-Instruct")
183
+ answer = model.chat(
184
+ system="You are a coding assistant for THIS project. Use only the provided context.",
185
+ user=f"{context}\n\nQUESTION: How does the auth flow work?",
186
+ )
187
+ print(answer)
188
+ ```
189
+
190
+ ## Validation harness
191
+
192
+ tokenfit ships with an eval harness that compares **naive truncation** vs **retrieved
193
+ context** on your own repo — the experiment that proves the approach is worth it:
194
+
195
+ ```bash
196
+ tokenfit eval --repo ./my-project --mode naive
197
+ tokenfit eval --repo ./my-project --mode retrieved
198
+ ```
199
+
200
+ Each run writes a graded comparison sheet to `tokenfit/eval/results/`. Score the answers
201
+ 1–5 and compare. Edit `tokenfit/eval/dataset/questions.yaml` to fit your project.
202
+
203
+ ## Roadmap
204
+
205
+ - [x] **Phase 0** — eval harness + naive baseline
206
+ - [x] **Phase 1** — semantic retrieval (chunk → embed → retrieve → budget)
207
+ - [ ] **Phase 2** — hybrid BM25 + rerank + summarization for oversized chunks
208
+ - [ ] **Phase 3** — `tiny-agents` / `smolagents` adapters, optional Chroma backend
209
+
210
+ See [`idea.md`](./idea.md) for the rationale and [`plan.md`](./plan.md) for the full plan.
211
+
212
+ ## Development
213
+
214
+ ```bash
215
+ git clone https://github.com/shubham10divakar/tokenfit
216
+ cd tokenfit
217
+ pip install -e ".[dev]"
218
+ python -m tests.test_pipeline # dep-free regression test
219
+ ```
220
+
221
+ ## License
222
+
223
+ MIT — see [LICENSE](./LICENSE).
@@ -0,0 +1,171 @@
1
+ # tokenfit
2
+
3
+ > **Fit your whole repo into any small model's token window.**
4
+
5
+ `tokenfit` is a **context-selection pre-processor** for free / small LLMs. Point it at
6
+ your project's markdown + code, ask a question, and it returns the *most relevant* slice
7
+ of your codebase — packed to fit a tight token budget — so a 7B model with an 8k window
8
+ answers as if it read the whole repo.
9
+
10
+ [![PyPI version](https://img.shields.io/pypi/v/tokenfit)](https://pypi.org/project/tokenfit/)
11
+ [![Downloads](https://static.pepy.tech/badge/tokenfit)](https://pepy.tech/project/tokenfit)
12
+ [![Downloads/month](https://img.shields.io/pypi/dm/tokenfit)](https://pypi.org/project/tokenfit/)
13
+ [![License: MIT](https://img.shields.io/badge/License-MIT-green.svg)](./LICENSE)
14
+ [![Python](https://img.shields.io/badge/python-3.9%2B-blue)](https://www.python.org/)
15
+
16
+ ---
17
+
18
+ ## Why
19
+
20
+ GitHub Copilot moved to usage-based token billing (June 2026), pushing developers toward
21
+ cheap open-source models on HuggingFace. But free/small models have **tiny context
22
+ windows** — dump your whole repo at them and they choke or truncate.
23
+
24
+ Existing tools (`tiny-agents`, `AGENTS.md`, `SKILL.md`) inject context *raw*. tokenfit is
25
+ the missing **retrieval layer** that makes those models punch above their weight. It's a
26
+ pre-processor: it builds the optimal prompt, then hands it to your model or agent
27
+ framework — it does **not** trust a weak model to call a retrieval tool correctly.
28
+
29
+ ## How it works
30
+
31
+ ```
32
+ query
33
+
34
+
35
+ 1. INGEST load AGENTS.md / SKILL.md / docs / code → chunk
36
+ 2. INDEX embed chunks (BAAI/bge-small, local) → persist
37
+ 3. RETRIEVE cosine top-k semantic search
38
+ 4. BUDGET tokenizer-aware fit to N tokens + citations
39
+
40
+
41
+ optimal prompt → any HuggingFace model
42
+ ```
43
+
44
+ ## Does it actually beat just dumping the files? Yes.
45
+
46
+ We ran the free **Qwen2.5-Coder-7B** against [`psf/requests`](https://github.com/psf/requests)
47
+ — **~150,000 tokens** of code, ~19× bigger than an 8000-token budget — comparing two ways
48
+ of feeding the model, across 10 questions (`tokenfit eval --compare`):
49
+
50
+ - **Naive** — concatenate the files and truncate to 8000 tokens.
51
+ - **Retrieved** — let tokenfit pick the relevant ~2000 tokens.
52
+
53
+ | | Naive (8000 tok) | **tokenfit retrieved (~2000 tok)** |
54
+ |---|---|---|
55
+ | Wins (of 10) | 1 (1 tie) | **9** |
56
+ | Cites the right source file | rarely | **almost always** |
57
+ | Tokens per call | 8000 | **~2000 (≈4× cheaper)** |
58
+ | Failure modes | "context doesn't provide info", quoted the changelog, once **answered in Chinese**, once **invented a class that doesn't exist** | accurate, code-grounded answers |
59
+
60
+ **Why naive collapses:** the whole 8000-token budget filled up with `HISTORY.md` (the
61
+ changelog) and never reached a single source file. tokenfit semantically skips the noise
62
+ and fetches the right module — so it's **both more accurate _and_ ~4× cheaper per call.**
63
+
64
+ > 📂 Full side-by-side transcripts in **[EXAMPLES.md](./EXAMPLES.md)**.
65
+
66
+ ## Install
67
+
68
+ ```bash
69
+ pip install tokenfit
70
+ ```
71
+
72
+ Set a HuggingFace token with **"Make calls to Inference Providers"** permission:
73
+
74
+ ```bash
75
+ export HF_TOKEN=hf_your_token_here # bash
76
+ $env:HF_TOKEN = "hf_your_token_here" # PowerShell
77
+ ```
78
+
79
+ Verify it before you run anything:
80
+
81
+ ```bash
82
+ tokenfit auth # checks the token is set and valid
83
+ tokenfit auth --ping # also makes a 1-token call to confirm inference access
84
+ ```
85
+
86
+ ## Quickstart (CLI)
87
+
88
+ The fastest way — no Python required:
89
+
90
+ ```bash
91
+ # Ask a question: tokenfit retrieves the right context AND gets the model's answer
92
+ tokenfit ask "How does the auth flow work?" --repo ./my-project
93
+
94
+ # Just print the selected context (no model call, pipe it anywhere)
95
+ tokenfit context "auth flow" --repo ./my-project
96
+
97
+ # Pre-build / refresh the index for a repo
98
+ tokenfit index --repo ./my-project --rebuild
99
+ ```
100
+
101
+ Useful flags: `--budget 8000` (token budget), `--top-k 12` (chunks retrieved),
102
+ `--model Qwen/Qwen2.5-Coder-7B-Instruct` (any HF model), `--rebuild` (re-index).
103
+ Progress prints to stderr, so the answer/context on stdout stays clean for piping.
104
+
105
+ tokenfit indexes common source + doc file types out of the box (Python, JS/TS, Go,
106
+ Rust, Java, C#, C/C++, Ruby, PHP, Swift, GDScript, shell, plus md/yaml/toml/json…).
107
+ Indexing a different language? Add globs with `--include`:
108
+
109
+ ```bash
110
+ tokenfit ask "How does combat work?" --repo ./my-godot-game --include "*.gd" --rebuild
111
+ ```
112
+
113
+ 📂 **See [EXAMPLES.md](./EXAMPLES.md)** for real output — a free 7B model explaining a
114
+ Godot game's movement code, grounded in the actual source.
115
+
116
+ ## Quickstart (Python)
117
+
118
+ ```python
119
+ from tokenfit import pack
120
+ from tokenfit.models import TokenfitModel
121
+
122
+ # Select the best ~8k tokens of context for a question
123
+ context = pack.build(
124
+ query="How does the auth flow work?",
125
+ repo="./my-project",
126
+ budget=8000,
127
+ )
128
+
129
+ # Feed it to any small HF model
130
+ model = TokenfitModel(model="Qwen/Qwen2.5-Coder-7B-Instruct")
131
+ answer = model.chat(
132
+ system="You are a coding assistant for THIS project. Use only the provided context.",
133
+ user=f"{context}\n\nQUESTION: How does the auth flow work?",
134
+ )
135
+ print(answer)
136
+ ```
137
+
138
+ ## Validation harness
139
+
140
+ tokenfit ships with an eval harness that compares **naive truncation** vs **retrieved
141
+ context** on your own repo — the experiment that proves the approach is worth it:
142
+
143
+ ```bash
144
+ tokenfit eval --repo ./my-project --mode naive
145
+ tokenfit eval --repo ./my-project --mode retrieved
146
+ ```
147
+
148
+ Each run writes a graded comparison sheet to `tokenfit/eval/results/`. Score the answers
149
+ 1–5 and compare. Edit `tokenfit/eval/dataset/questions.yaml` to fit your project.
150
+
151
+ ## Roadmap
152
+
153
+ - [x] **Phase 0** — eval harness + naive baseline
154
+ - [x] **Phase 1** — semantic retrieval (chunk → embed → retrieve → budget)
155
+ - [ ] **Phase 2** — hybrid BM25 + rerank + summarization for oversized chunks
156
+ - [ ] **Phase 3** — `tiny-agents` / `smolagents` adapters, optional Chroma backend
157
+
158
+ See [`idea.md`](./idea.md) for the rationale and [`plan.md`](./plan.md) for the full plan.
159
+
160
+ ## Development
161
+
162
+ ```bash
163
+ git clone https://github.com/shubham10divakar/tokenfit
164
+ cd tokenfit
165
+ pip install -e ".[dev]"
166
+ python -m tests.test_pipeline # dep-free regression test
167
+ ```
168
+
169
+ ## License
170
+
171
+ MIT — see [LICENSE](./LICENSE).
@@ -0,0 +1,3 @@
1
+ [build-system]
2
+ requires = ["setuptools>=61.0", "wheel"]
3
+ build-backend = "setuptools.build_meta"
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,66 @@
1
+ """Packaging for tokenfit.
2
+
3
+ Build: python -m build (produces wheel + sdist in dist/)
4
+ Install: pip install . (or `pip install -e .` for development)
5
+ """
6
+
7
+ from pathlib import Path
8
+
9
+ from setuptools import find_packages, setup
10
+
11
+ ROOT = Path(__file__).parent
12
+ LONG_DESCRIPTION = (ROOT / "README.md").read_text(encoding="utf-8")
13
+
14
+ setup(
15
+ name="tokenfit",
16
+ version="1.0.0",
17
+ description="Fit your whole repo into any small model's token window — "
18
+ "context selection for free/small LLMs.",
19
+ long_description=LONG_DESCRIPTION,
20
+ long_description_content_type="text/markdown",
21
+ author="Shubham Divakar",
22
+ author_email="shubham.divakar@gmail.com",
23
+ url="https://github.com/shubham10divakar/tokenfit",
24
+ project_urls={
25
+ "Source": "https://github.com/shubham10divakar/tokenfit",
26
+ "Issues": "https://github.com/shubham10divakar/tokenfit/issues",
27
+ "Examples": "https://github.com/shubham10divakar/tokenfit/blob/main/EXAMPLES.md",
28
+ },
29
+ license="MIT",
30
+ packages=find_packages(include=["tokenfit", "tokenfit.*"]),
31
+ include_package_data=True,
32
+ package_data={"tokenfit": ["eval/dataset/*.yaml"]},
33
+ python_requires=">=3.9",
34
+ install_requires=[
35
+ "huggingface_hub>=0.25.0",
36
+ "transformers>=4.44.0",
37
+ "sentence-transformers>=3.0.0",
38
+ "numpy>=1.24.0",
39
+ "pyyaml>=6.0",
40
+ ],
41
+ extras_require={
42
+ # Phase 2 features (hybrid retrieval / scalable store)
43
+ "hybrid": ["rank-bm25>=0.2.2"],
44
+ "chroma": ["chromadb>=0.5.0"],
45
+ "dev": ["pytest>=8.0", "build>=1.2"],
46
+ },
47
+ entry_points={
48
+ "console_scripts": [
49
+ "tokenfit=tokenfit.cli:main",
50
+ "tokenfit-eval=tokenfit.eval.harness:main",
51
+ ],
52
+ },
53
+ keywords=["llm", "rag", "context", "huggingface", "coding-agent", "retrieval"],
54
+ classifiers=[
55
+ "Development Status :: 5 - Production/Stable",
56
+ "Intended Audience :: Developers",
57
+ "Operating System :: OS Independent",
58
+ "Programming Language :: Python :: 3",
59
+ "Programming Language :: Python :: 3.9",
60
+ "Programming Language :: Python :: 3.10",
61
+ "Programming Language :: Python :: 3.11",
62
+ "Programming Language :: Python :: 3.12",
63
+ "Topic :: Software Development :: Libraries :: Python Modules",
64
+ "Topic :: Scientific/Engineering :: Artificial Intelligence",
65
+ ],
66
+ )
@@ -0,0 +1,63 @@
1
+ """Dependency-free regression test for the Phase 1 pipeline.
2
+
3
+ Uses a fake keyword-based embedder so it runs without torch / sentence-transformers /
4
+ network, yet still exercises chunk -> index -> retrieve -> budget end to end.
5
+
6
+ Run: python -m tests.test_pipeline (or: pytest tests/)
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ import tempfile
12
+
13
+ import numpy as np
14
+
15
+ from tokenfit import budget, index, retrieve
16
+ from tokenfit.ingest import Document, chunk_documents
17
+
18
+ _VOCAB = ["auth", "login", "token", "database", "persist", "config", "test", "endpoint"]
19
+
20
+
21
+ def _fake_embed(texts, model=index.EMBED_MODEL):
22
+ out = []
23
+ for t in texts:
24
+ tl = t.lower()
25
+ v = np.array([tl.count(w) for w in _VOCAB], dtype=np.float32)
26
+ n = np.linalg.norm(v)
27
+ out.append(v / n if n else v)
28
+ return np.vstack(out).astype(np.float32)
29
+
30
+
31
+ class _FakeModel:
32
+ def count_tokens(self, text: str) -> int:
33
+ return max(1, len(text) // 4)
34
+
35
+
36
+ def test_pipeline(monkeypatch=None):
37
+ # patch the embedder in both modules that reference it
38
+ index.embed_texts = _fake_embed
39
+ retrieve.embed_texts = _fake_embed
40
+
41
+ docs = [
42
+ Document("auth.py", "def login(user):\n # validate auth token\n return token"),
43
+ Document("db.py", "def save(rec):\n # persist to database\n database.write(rec)"),
44
+ Document("conf.py", "CONFIG = {}\n# load config values here"),
45
+ ]
46
+ chunks = chunk_documents(docs, target_chars=400)
47
+ assert len(chunks) == 3
48
+
49
+ with tempfile.TemporaryDirectory() as d:
50
+ index.build_index(chunks, d)
51
+ assert index.index_exists(d)
52
+
53
+ hits = retrieve.retrieve("how does login auth token work", d, top_k=3)
54
+ assert hits[0].doc_path == "auth.py" # semantic ranking works
55
+
56
+ packed = budget.fit_to_budget(hits, _FakeModel(), budget=40)
57
+ assert _FakeModel().count_tokens(packed) <= 40 # budget respected
58
+ assert "### FILE: auth.py@" in packed # citations present
59
+
60
+
61
+ if __name__ == "__main__":
62
+ test_pipeline()
63
+ print("PASSED")
@@ -0,0 +1,9 @@
1
+ """tokenfit — context-selection pre-processor for free/small LLMs.
2
+
3
+ Given a developer query + a project corpus (md files, code, vector DB), build the
4
+ optimal token-budgeted context to feed a small HuggingFace model.
5
+
6
+ See plan.md for the phased roadmap.
7
+ """
8
+
9
+ __version__ = "1.0.0"
@@ -0,0 +1,34 @@
1
+ """Budgeting — pack the highest-value chunks into a token window. (Phase 1)
2
+
3
+ Greedy fit by relevance order (chunks arrive already ranked). Phase 2 adds
4
+ summarization of oversized chunks instead of dropping them.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ from typing import TYPE_CHECKING
10
+
11
+ from tokenfit.ingest import Chunk
12
+
13
+ if TYPE_CHECKING: # only for type hints; avoids pulling the inference SDK
14
+ from tokenfit.models import TokenfitModel
15
+
16
+ _HEADER = "### FILE: {label}\n{text}"
17
+
18
+
19
+ def fit_to_budget(chunks: list[Chunk], model: "TokenfitModel", budget: int) -> str:
20
+ """Concatenate ranked chunks (with file citations) up to `budget` tokens.
21
+
22
+ Each chunk carries a `FILE: path@offset` header so the model can cite sources.
23
+ A chunk that doesn't fit is skipped (a later, smaller chunk may still fit).
24
+ """
25
+ parts: list[str] = []
26
+ used = 0
27
+ for c in chunks:
28
+ block = _HEADER.format(label=c.label, text=c.text)
29
+ cost = model.count_tokens(block) + 2 # +2 for the joining newlines
30
+ if used + cost > budget:
31
+ continue
32
+ parts.append(block)
33
+ used += cost
34
+ return "\n\n".join(parts)