thinkpack 0.0.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- thinkpack-0.0.2/LICENSE +21 -0
- thinkpack-0.0.2/PKG-INFO +268 -0
- thinkpack-0.0.2/README.md +229 -0
- thinkpack-0.0.2/pyproject.toml +48 -0
- thinkpack-0.0.2/setup.cfg +4 -0
- thinkpack-0.0.2/src/thinkpack/__init__.py +29 -0
- thinkpack-0.0.2/src/thinkpack/_model.py +124 -0
- thinkpack-0.0.2/src/thinkpack/_tags.py +16 -0
- thinkpack-0.0.2/src/thinkpack/distill.py +166 -0
- thinkpack-0.0.2/src/thinkpack/hybrid.py +135 -0
- thinkpack-0.0.2/src/thinkpack/mask.py +257 -0
- thinkpack-0.0.2/src/thinkpack/parse.py +173 -0
- thinkpack-0.0.2/src/thinkpack/steer.py +138 -0
- thinkpack-0.0.2/src/thinkpack.egg-info/PKG-INFO +268 -0
- thinkpack-0.0.2/src/thinkpack.egg-info/SOURCES.txt +20 -0
- thinkpack-0.0.2/src/thinkpack.egg-info/dependency_links.txt +1 -0
- thinkpack-0.0.2/src/thinkpack.egg-info/requires.txt +2 -0
- thinkpack-0.0.2/src/thinkpack.egg-info/top_level.txt +1 -0
- thinkpack-0.0.2/tests/test_distill.py +214 -0
- thinkpack-0.0.2/tests/test_hybrid.py +163 -0
- thinkpack-0.0.2/tests/test_parse.py +125 -0
- thinkpack-0.0.2/tests/test_steer.py +244 -0
thinkpack-0.0.2/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2023 Luke Twist
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
thinkpack-0.0.2/PKG-INFO
ADDED
|
@@ -0,0 +1,268 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: thinkpack
|
|
3
|
+
Version: 0.0.2
|
|
4
|
+
Summary: Tools for preventing think collapse in reasoning language models.
|
|
5
|
+
Author-email: Lukas Twist <itsluketwist@gmail.com>
|
|
6
|
+
License: MIT License
|
|
7
|
+
|
|
8
|
+
Copyright (c) 2023 Luke Twist
|
|
9
|
+
|
|
10
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
11
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
12
|
+
in the Software without restriction, including without limitation the rights
|
|
13
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
14
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
15
|
+
furnished to do so, subject to the following conditions:
|
|
16
|
+
|
|
17
|
+
The above copyright notice and this permission notice shall be included in all
|
|
18
|
+
copies or substantial portions of the Software.
|
|
19
|
+
|
|
20
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
21
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
22
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
23
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
24
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
25
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
26
|
+
SOFTWARE.
|
|
27
|
+
|
|
28
|
+
Project-URL: Homepage, https://github.com/itsluketwist/thinkpack
|
|
29
|
+
Keywords: thinkpack,llm,reasoning,think-collapse,fine-tuning
|
|
30
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
31
|
+
Classifier: Programming Language :: Python
|
|
32
|
+
Classifier: Programming Language :: Python :: 3
|
|
33
|
+
Requires-Python: >=3.11
|
|
34
|
+
Description-Content-Type: text/markdown
|
|
35
|
+
License-File: LICENSE
|
|
36
|
+
Requires-Dist: datasets
|
|
37
|
+
Requires-Dist: transformers
|
|
38
|
+
Dynamic: license-file
|
|
39
|
+
|
|
40
|
+
# ThinkPack
|
|
41
|
+
|
|
42
|
+

|
|
43
|
+
|
|
44
|
+
A lightweight toolkit for working with reasoning blocks in language models — preventing think collapse via los masking, steering reasoning at inference time, and parsing model outputs.
|
|
45
|
+
|
|
46
|
+
**Think collapse** is a failure mode where reasoning models stop using their `<think>...</think>` blocks during or after fine-tuning.
|
|
47
|
+
Without intervention, the model learns to skip reasoning entirely — producing answers directly and losing the chain-of-thought behaviour it was trained on.
|
|
48
|
+
ThinkPack provides three targeted tools to prevent this:
|
|
49
|
+
|
|
50
|
+
- **Loss masking** (`thinkpack.mask`) — keeps reasoning blocks in the training context while masking them from the loss, so the model doesn't learn to skip them.
|
|
51
|
+
- **Thought steering** (`thinkpack.steer`) — injects a short primer after the opening reasoning tag at inference time, nudging the model to reason before answering.
|
|
52
|
+
- **Response parsing** (`thinkpack.parse`) — splits raw model output into reasoning and answer components, with flags for truncation detection.
|
|
53
|
+
|
|
54
|
+
---
|
|
55
|
+
|
|
56
|
+
## Installation
|
|
57
|
+
|
|
58
|
+
```bash
|
|
59
|
+
pip install thinkpack
|
|
60
|
+
```
|
|
61
|
+
|
|
62
|
+
---
|
|
63
|
+
|
|
64
|
+
## Modules
|
|
65
|
+
|
|
66
|
+
### `thinkpack.mask` — Training-time loss masking
|
|
67
|
+
|
|
68
|
+
When fine-tuning a reasoning model, naively training on all tokens can cause the model to learn to skip its reasoning block entirely. `mask()` formats your training records into a pretokenized HuggingFace dataset with selected parts of the sequence excluded from the loss.
|
|
69
|
+
|
|
70
|
+
```python
|
|
71
|
+
import thinkpack
|
|
72
|
+
|
|
73
|
+
dataset = thinkpack.mask(
|
|
74
|
+
records=records, # list of dicts with "instruction" and "response" keys
|
|
75
|
+
tokenizer=tokenizer,
|
|
76
|
+
masked=thinkpack.Mask.THINK, # mask only the think block (default)
|
|
77
|
+
)
|
|
78
|
+
```
|
|
79
|
+
|
|
80
|
+
The `masked` parameter is a composable flag — combine sections with `|`:
|
|
81
|
+
|
|
82
|
+
| Value | Effect |
|
|
83
|
+
|---|---|
|
|
84
|
+
| `Mask.THINK` | Think block hidden from loss; model trains on prompt + response |
|
|
85
|
+
| `Mask.PROMPT \| Mask.THINK` | Train on response only |
|
|
86
|
+
| `None` | No masking; all tokens contribute to the loss |
|
|
87
|
+
|
|
88
|
+
Model-specific template handling (Qwen3's native `reasoning_content` field, OLMo-3's auto-injected opening tag) is detected automatically from the tokenizer — no manual configuration needed.
|
|
89
|
+
|
|
90
|
+
See [examples/training.py](examples/training.py) for a complete training loop.
|
|
91
|
+
|
|
92
|
+
---
|
|
93
|
+
|
|
94
|
+
### `thinkpack.steer` — Inference-time thought steering
|
|
95
|
+
|
|
96
|
+
Think collapse can also be addressed at inference time by injecting a short prefix after the opening reasoning tag, seeding the model's reasoning before it generates its own thought content.
|
|
97
|
+
|
|
98
|
+
```python
|
|
99
|
+
# ensure the opening reasoning tag is present without seeding the thought
|
|
100
|
+
steered_prompts = thinkpack.steer(
|
|
101
|
+
prompts=templated_prompts, # already chat-templated strings
|
|
102
|
+
tokenizer=tokenizer,
|
|
103
|
+
)
|
|
104
|
+
|
|
105
|
+
# seed the model's thought with a preset
|
|
106
|
+
steered_prompts = thinkpack.steer(
|
|
107
|
+
prompts=templated_prompts,
|
|
108
|
+
tokenizer=tokenizer,
|
|
109
|
+
prefix=thinkpack.SimplePrefix.CONCISE,
|
|
110
|
+
)
|
|
111
|
+
|
|
112
|
+
# or pass any custom string
|
|
113
|
+
steered_prompts = thinkpack.steer(
|
|
114
|
+
prompts=templated_prompts,
|
|
115
|
+
tokenizer=tokenizer,
|
|
116
|
+
prefix="Okay, this is a tricky one. Let me consider each part carefully.",
|
|
117
|
+
)
|
|
118
|
+
```
|
|
119
|
+
|
|
120
|
+
`SimplePrefix` provides a few basic presets:
|
|
121
|
+
|
|
122
|
+
| Preset | Text |
|
|
123
|
+
|---|---|
|
|
124
|
+
| `BRIEF` | `"Okay, "` |
|
|
125
|
+
| `STEPS` | `"Okay, let me think this through step by step."` |
|
|
126
|
+
| `CONCISE` | `"Okay, let me think this through, but I need to be concise and make sure I also provide an answer."` |
|
|
127
|
+
|
|
128
|
+
`steer()` handles the PREFIXED template quirk automatically: models like OLMo-3 whose chat template already ends with an opening reasoning tag do not get a duplicate tag injected.
|
|
129
|
+
|
|
130
|
+
See [examples/inference.py](examples/inference.py) for a complete inference loop.
|
|
131
|
+
|
|
132
|
+
---
|
|
133
|
+
|
|
134
|
+
### `thinkpack.parse` — Response parsing
|
|
135
|
+
|
|
136
|
+
Parse raw model outputs into structured components — useful for evaluation, analysis, and hybrid decoding pipelines.
|
|
137
|
+
|
|
138
|
+
```python
|
|
139
|
+
# single response
|
|
140
|
+
parsed = thinkpack.parse(response=raw_text)
|
|
141
|
+
parsed.answer # str — text after the closing reasoning tag
|
|
142
|
+
parsed.reasoning # str — content of the reasoning block
|
|
143
|
+
parsed.has_valid_reasoning # bool — non-empty, completed reasoning block
|
|
144
|
+
parsed.has_truncated_reasoning # bool — reasoning block started but never closed
|
|
145
|
+
|
|
146
|
+
# directly from vLLM output objects (single output → list, list of outputs → list[list])
|
|
147
|
+
parsed = thinkpack.parse_output(output=outputs)
|
|
148
|
+
```
|
|
149
|
+
|
|
150
|
+
Handles all four output formats:
|
|
151
|
+
|
|
152
|
+
| Format | Example |
|
|
153
|
+
|---|---|
|
|
154
|
+
| Standard | `<think>reasoning</think>answer` |
|
|
155
|
+
| Prefixed template | `reasoning</think>answer` (opening tag injected by template) |
|
|
156
|
+
| Truncated standard | `<think>reasoning...` (no closing tag) |
|
|
157
|
+
| Truncated prefixed | `reasoning...` (pass `prefixed=True`) |
|
|
158
|
+
|
|
159
|
+
Recognises tag variants: `think`, `thinking`, `reasoning`, `thought` (case-insensitive).
|
|
160
|
+
|
|
161
|
+
---
|
|
162
|
+
|
|
163
|
+
### `thinkpack.distill` — Distillation prompt building and reasoning extraction
|
|
164
|
+
|
|
165
|
+
When training data lacks reasoning traces, `distill` helps construct them. It builds prompts that ask a teacher model to produce a reasoning trace given a question and its known answer, then extracts and writes those traces back into your records.
|
|
166
|
+
|
|
167
|
+
```python
|
|
168
|
+
import thinkpack
|
|
169
|
+
|
|
170
|
+
# build prompts for a teacher model to generate reasoning traces
|
|
171
|
+
prompts = thinkpack.build_prompts(
|
|
172
|
+
records=records, # list of dicts with "instruction" and "response" keys
|
|
173
|
+
)
|
|
174
|
+
|
|
175
|
+
# after generating responses from the teacher model, extract the traces
|
|
176
|
+
traces = thinkpack.extract_reasoning(text=responses, tag="reasoning_trace")
|
|
177
|
+
|
|
178
|
+
# or write traces back into records in one step
|
|
179
|
+
records = thinkpack.update_records(
|
|
180
|
+
records=records,
|
|
181
|
+
responses=responses,
|
|
182
|
+
field="reasoning", # key to write extracted traces into
|
|
183
|
+
)
|
|
184
|
+
```
|
|
185
|
+
|
|
186
|
+
`extract_reasoning` accepts a single string or a list, and returns `None` where extraction fails (blank or no tag found):
|
|
187
|
+
|
|
188
|
+
```python
|
|
189
|
+
# single response — returns str | None
|
|
190
|
+
trace = thinkpack.extract_reasoning(text=response)
|
|
191
|
+
|
|
192
|
+
# list of responses — returns list[str | None]
|
|
193
|
+
traces = thinkpack.extract_reasoning(text=responses)
|
|
194
|
+
```
|
|
195
|
+
|
|
196
|
+
---
|
|
197
|
+
|
|
198
|
+
### `thinkpack.hybrid` — Hybrid decoding
|
|
199
|
+
|
|
200
|
+
Hybrid decoding separates reasoning from answering across two model variants: the base model generates the reasoning block freely (without fine-tuning influence), and the fine-tuned adapter generates the final answer conditioned on that reasoning. This can improve answer quality when the adapter has partially collapsed.
|
|
201
|
+
|
|
202
|
+
Requires vLLM with `enable_lora=True`.
|
|
203
|
+
|
|
204
|
+
```python
|
|
205
|
+
from thinkpack import hybrid_generate, SimplePrefix
|
|
206
|
+
|
|
207
|
+
# steered_prompts = prompts already ending with an open reasoning tag (from steer())
|
|
208
|
+
results = thinkpack.hybrid_generate(
|
|
209
|
+
prompts=steered_prompts,
|
|
210
|
+
llm=llm, # vLLM LLM loaded with enable_lora=True
|
|
211
|
+
lora_request=lora_request, # adapter used for phase 2
|
|
212
|
+
sampling_params=sampling_params,
|
|
213
|
+
)
|
|
214
|
+
|
|
215
|
+
for r in results:
|
|
216
|
+
r.reasoning # str — reasoning produced by the base model
|
|
217
|
+
r.answer # str — answer produced by the fine-tuned model
|
|
218
|
+
r.raw # str — full combined string for convenience
|
|
219
|
+
```
|
|
220
|
+
|
|
221
|
+
---
|
|
222
|
+
|
|
223
|
+
## *development*
|
|
224
|
+
|
|
225
|
+
Clone the repository code:
|
|
226
|
+
|
|
227
|
+
```shell
|
|
228
|
+
git clone https://github.com/itsluketwist/thinkpack.git
|
|
229
|
+
```
|
|
230
|
+
|
|
231
|
+
We use [`uv`](https://astral.sh/blog/uv) for project management.
|
|
232
|
+
Once cloned, create a virtual environment and install the project with dev dependencies:
|
|
233
|
+
|
|
234
|
+
```shell
|
|
235
|
+
python -m venv .venv
|
|
236
|
+
|
|
237
|
+
. .venv/bin/activate
|
|
238
|
+
|
|
239
|
+
pip install uv
|
|
240
|
+
|
|
241
|
+
uv sync
|
|
242
|
+
```
|
|
243
|
+
|
|
244
|
+
Use `make` commands to lint and test:
|
|
245
|
+
|
|
246
|
+
```shell
|
|
247
|
+
make lint
|
|
248
|
+
|
|
249
|
+
make test
|
|
250
|
+
```
|
|
251
|
+
|
|
252
|
+
Use `uv` to add new dependencies into the project:
|
|
253
|
+
|
|
254
|
+
```shell
|
|
255
|
+
uv add transformers
|
|
256
|
+
```
|
|
257
|
+
|
|
258
|
+
Or to upgrade dependencies:
|
|
259
|
+
|
|
260
|
+
```shell
|
|
261
|
+
uv sync --upgrade
|
|
262
|
+
```
|
|
263
|
+
|
|
264
|
+
Check typings with `ty`:
|
|
265
|
+
|
|
266
|
+
```shell
|
|
267
|
+
uv run --extra dev ty check src tests
|
|
268
|
+
```
|
|
@@ -0,0 +1,229 @@
|
|
|
1
|
+
# ThinkPack
|
|
2
|
+
|
|
3
|
+

|
|
4
|
+
|
|
5
|
+
A lightweight toolkit for working with reasoning blocks in language models — preventing think collapse via los masking, steering reasoning at inference time, and parsing model outputs.
|
|
6
|
+
|
|
7
|
+
**Think collapse** is a failure mode where reasoning models stop using their `<think>...</think>` blocks during or after fine-tuning.
|
|
8
|
+
Without intervention, the model learns to skip reasoning entirely — producing answers directly and losing the chain-of-thought behaviour it was trained on.
|
|
9
|
+
ThinkPack provides three targeted tools to prevent this:
|
|
10
|
+
|
|
11
|
+
- **Loss masking** (`thinkpack.mask`) — keeps reasoning blocks in the training context while masking them from the loss, so the model doesn't learn to skip them.
|
|
12
|
+
- **Thought steering** (`thinkpack.steer`) — injects a short primer after the opening reasoning tag at inference time, nudging the model to reason before answering.
|
|
13
|
+
- **Response parsing** (`thinkpack.parse`) — splits raw model output into reasoning and answer components, with flags for truncation detection.
|
|
14
|
+
|
|
15
|
+
---
|
|
16
|
+
|
|
17
|
+
## Installation
|
|
18
|
+
|
|
19
|
+
```bash
|
|
20
|
+
pip install thinkpack
|
|
21
|
+
```
|
|
22
|
+
|
|
23
|
+
---
|
|
24
|
+
|
|
25
|
+
## Modules
|
|
26
|
+
|
|
27
|
+
### `thinkpack.mask` — Training-time loss masking
|
|
28
|
+
|
|
29
|
+
When fine-tuning a reasoning model, naively training on all tokens can cause the model to learn to skip its reasoning block entirely. `mask()` formats your training records into a pretokenized HuggingFace dataset with selected parts of the sequence excluded from the loss.
|
|
30
|
+
|
|
31
|
+
```python
|
|
32
|
+
import thinkpack
|
|
33
|
+
|
|
34
|
+
dataset = thinkpack.mask(
|
|
35
|
+
records=records, # list of dicts with "instruction" and "response" keys
|
|
36
|
+
tokenizer=tokenizer,
|
|
37
|
+
masked=thinkpack.Mask.THINK, # mask only the think block (default)
|
|
38
|
+
)
|
|
39
|
+
```
|
|
40
|
+
|
|
41
|
+
The `masked` parameter is a composable flag — combine sections with `|`:
|
|
42
|
+
|
|
43
|
+
| Value | Effect |
|
|
44
|
+
|---|---|
|
|
45
|
+
| `Mask.THINK` | Think block hidden from loss; model trains on prompt + response |
|
|
46
|
+
| `Mask.PROMPT \| Mask.THINK` | Train on response only |
|
|
47
|
+
| `None` | No masking; all tokens contribute to the loss |
|
|
48
|
+
|
|
49
|
+
Model-specific template handling (Qwen3's native `reasoning_content` field, OLMo-3's auto-injected opening tag) is detected automatically from the tokenizer — no manual configuration needed.
|
|
50
|
+
|
|
51
|
+
See [examples/training.py](examples/training.py) for a complete training loop.
|
|
52
|
+
|
|
53
|
+
---
|
|
54
|
+
|
|
55
|
+
### `thinkpack.steer` — Inference-time thought steering
|
|
56
|
+
|
|
57
|
+
Think collapse can also be addressed at inference time by injecting a short prefix after the opening reasoning tag, seeding the model's reasoning before it generates its own thought content.
|
|
58
|
+
|
|
59
|
+
```python
|
|
60
|
+
# ensure the opening reasoning tag is present without seeding the thought
|
|
61
|
+
steered_prompts = thinkpack.steer(
|
|
62
|
+
prompts=templated_prompts, # already chat-templated strings
|
|
63
|
+
tokenizer=tokenizer,
|
|
64
|
+
)
|
|
65
|
+
|
|
66
|
+
# seed the model's thought with a preset
|
|
67
|
+
steered_prompts = thinkpack.steer(
|
|
68
|
+
prompts=templated_prompts,
|
|
69
|
+
tokenizer=tokenizer,
|
|
70
|
+
prefix=thinkpack.SimplePrefix.CONCISE,
|
|
71
|
+
)
|
|
72
|
+
|
|
73
|
+
# or pass any custom string
|
|
74
|
+
steered_prompts = thinkpack.steer(
|
|
75
|
+
prompts=templated_prompts,
|
|
76
|
+
tokenizer=tokenizer,
|
|
77
|
+
prefix="Okay, this is a tricky one. Let me consider each part carefully.",
|
|
78
|
+
)
|
|
79
|
+
```
|
|
80
|
+
|
|
81
|
+
`SimplePrefix` provides a few basic presets:
|
|
82
|
+
|
|
83
|
+
| Preset | Text |
|
|
84
|
+
|---|---|
|
|
85
|
+
| `BRIEF` | `"Okay, "` |
|
|
86
|
+
| `STEPS` | `"Okay, let me think this through step by step."` |
|
|
87
|
+
| `CONCISE` | `"Okay, let me think this through, but I need to be concise and make sure I also provide an answer."` |
|
|
88
|
+
|
|
89
|
+
`steer()` handles the PREFIXED template quirk automatically: models like OLMo-3 whose chat template already ends with an opening reasoning tag do not get a duplicate tag injected.
|
|
90
|
+
|
|
91
|
+
See [examples/inference.py](examples/inference.py) for a complete inference loop.
|
|
92
|
+
|
|
93
|
+
---
|
|
94
|
+
|
|
95
|
+
### `thinkpack.parse` — Response parsing
|
|
96
|
+
|
|
97
|
+
Parse raw model outputs into structured components — useful for evaluation, analysis, and hybrid decoding pipelines.
|
|
98
|
+
|
|
99
|
+
```python
|
|
100
|
+
# single response
|
|
101
|
+
parsed = thinkpack.parse(response=raw_text)
|
|
102
|
+
parsed.answer # str — text after the closing reasoning tag
|
|
103
|
+
parsed.reasoning # str — content of the reasoning block
|
|
104
|
+
parsed.has_valid_reasoning # bool — non-empty, completed reasoning block
|
|
105
|
+
parsed.has_truncated_reasoning # bool — reasoning block started but never closed
|
|
106
|
+
|
|
107
|
+
# directly from vLLM output objects (single output → list, list of outputs → list[list])
|
|
108
|
+
parsed = thinkpack.parse_output(output=outputs)
|
|
109
|
+
```
|
|
110
|
+
|
|
111
|
+
Handles all four output formats:
|
|
112
|
+
|
|
113
|
+
| Format | Example |
|
|
114
|
+
|---|---|
|
|
115
|
+
| Standard | `<think>reasoning</think>answer` |
|
|
116
|
+
| Prefixed template | `reasoning</think>answer` (opening tag injected by template) |
|
|
117
|
+
| Truncated standard | `<think>reasoning...` (no closing tag) |
|
|
118
|
+
| Truncated prefixed | `reasoning...` (pass `prefixed=True`) |
|
|
119
|
+
|
|
120
|
+
Recognises tag variants: `think`, `thinking`, `reasoning`, `thought` (case-insensitive).
|
|
121
|
+
|
|
122
|
+
---
|
|
123
|
+
|
|
124
|
+
### `thinkpack.distill` — Distillation prompt building and reasoning extraction
|
|
125
|
+
|
|
126
|
+
When training data lacks reasoning traces, `distill` helps construct them. It builds prompts that ask a teacher model to produce a reasoning trace given a question and its known answer, then extracts and writes those traces back into your records.
|
|
127
|
+
|
|
128
|
+
```python
|
|
129
|
+
import thinkpack
|
|
130
|
+
|
|
131
|
+
# build prompts for a teacher model to generate reasoning traces
|
|
132
|
+
prompts = thinkpack.build_prompts(
|
|
133
|
+
records=records, # list of dicts with "instruction" and "response" keys
|
|
134
|
+
)
|
|
135
|
+
|
|
136
|
+
# after generating responses from the teacher model, extract the traces
|
|
137
|
+
traces = thinkpack.extract_reasoning(text=responses, tag="reasoning_trace")
|
|
138
|
+
|
|
139
|
+
# or write traces back into records in one step
|
|
140
|
+
records = thinkpack.update_records(
|
|
141
|
+
records=records,
|
|
142
|
+
responses=responses,
|
|
143
|
+
field="reasoning", # key to write extracted traces into
|
|
144
|
+
)
|
|
145
|
+
```
|
|
146
|
+
|
|
147
|
+
`extract_reasoning` accepts a single string or a list, and returns `None` where extraction fails (blank or no tag found):
|
|
148
|
+
|
|
149
|
+
```python
|
|
150
|
+
# single response — returns str | None
|
|
151
|
+
trace = thinkpack.extract_reasoning(text=response)
|
|
152
|
+
|
|
153
|
+
# list of responses — returns list[str | None]
|
|
154
|
+
traces = thinkpack.extract_reasoning(text=responses)
|
|
155
|
+
```
|
|
156
|
+
|
|
157
|
+
---
|
|
158
|
+
|
|
159
|
+
### `thinkpack.hybrid` — Hybrid decoding
|
|
160
|
+
|
|
161
|
+
Hybrid decoding separates reasoning from answering across two model variants: the base model generates the reasoning block freely (without fine-tuning influence), and the fine-tuned adapter generates the final answer conditioned on that reasoning. This can improve answer quality when the adapter has partially collapsed.
|
|
162
|
+
|
|
163
|
+
Requires vLLM with `enable_lora=True`.
|
|
164
|
+
|
|
165
|
+
```python
|
|
166
|
+
from thinkpack import hybrid_generate, SimplePrefix
|
|
167
|
+
|
|
168
|
+
# steered_prompts = prompts already ending with an open reasoning tag (from steer())
|
|
169
|
+
results = thinkpack.hybrid_generate(
|
|
170
|
+
prompts=steered_prompts,
|
|
171
|
+
llm=llm, # vLLM LLM loaded with enable_lora=True
|
|
172
|
+
lora_request=lora_request, # adapter used for phase 2
|
|
173
|
+
sampling_params=sampling_params,
|
|
174
|
+
)
|
|
175
|
+
|
|
176
|
+
for r in results:
|
|
177
|
+
r.reasoning # str — reasoning produced by the base model
|
|
178
|
+
r.answer # str — answer produced by the fine-tuned model
|
|
179
|
+
r.raw # str — full combined string for convenience
|
|
180
|
+
```
|
|
181
|
+
|
|
182
|
+
---
|
|
183
|
+
|
|
184
|
+
## *development*
|
|
185
|
+
|
|
186
|
+
Clone the repository code:
|
|
187
|
+
|
|
188
|
+
```shell
|
|
189
|
+
git clone https://github.com/itsluketwist/thinkpack.git
|
|
190
|
+
```
|
|
191
|
+
|
|
192
|
+
We use [`uv`](https://astral.sh/blog/uv) for project management.
|
|
193
|
+
Once cloned, create a virtual environment and install the project with dev dependencies:
|
|
194
|
+
|
|
195
|
+
```shell
|
|
196
|
+
python -m venv .venv
|
|
197
|
+
|
|
198
|
+
. .venv/bin/activate
|
|
199
|
+
|
|
200
|
+
pip install uv
|
|
201
|
+
|
|
202
|
+
uv sync
|
|
203
|
+
```
|
|
204
|
+
|
|
205
|
+
Use `make` commands to lint and test:
|
|
206
|
+
|
|
207
|
+
```shell
|
|
208
|
+
make lint
|
|
209
|
+
|
|
210
|
+
make test
|
|
211
|
+
```
|
|
212
|
+
|
|
213
|
+
Use `uv` to add new dependencies into the project:
|
|
214
|
+
|
|
215
|
+
```shell
|
|
216
|
+
uv add transformers
|
|
217
|
+
```
|
|
218
|
+
|
|
219
|
+
Or to upgrade dependencies:
|
|
220
|
+
|
|
221
|
+
```shell
|
|
222
|
+
uv sync --upgrade
|
|
223
|
+
```
|
|
224
|
+
|
|
225
|
+
Check typings with `ty`:
|
|
226
|
+
|
|
227
|
+
```shell
|
|
228
|
+
uv run --extra dev ty check src tests
|
|
229
|
+
```
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
# Package specification, as defined here:
|
|
2
|
+
# https://packaging.python.org/en/latest/specifications/pyproject-toml/#pyproject-toml-spec
|
|
3
|
+
|
|
4
|
+
[build-system]
|
|
5
|
+
requires = ["setuptools", "setuptools-git-versioning<2"]
|
|
6
|
+
build-backend = "setuptools.build_meta"
|
|
7
|
+
|
|
8
|
+
[tool.setuptools-git-versioning]
|
|
9
|
+
enabled = true
|
|
10
|
+
|
|
11
|
+
[tool.setuptools.packages.find]
|
|
12
|
+
where = ["src"]
|
|
13
|
+
|
|
14
|
+
[project]
|
|
15
|
+
name = "thinkpack"
|
|
16
|
+
# version = "0.0.0"
|
|
17
|
+
dynamic = ["version"]
|
|
18
|
+
description = "Tools for preventing think collapse in reasoning language models."
|
|
19
|
+
readme = "README.md"
|
|
20
|
+
authors = [{ name = "Lukas Twist", email = "itsluketwist@gmail.com" }]
|
|
21
|
+
license = { file = "LICENSE" }
|
|
22
|
+
classifiers = [
|
|
23
|
+
"License :: OSI Approved :: MIT License",
|
|
24
|
+
"Programming Language :: Python",
|
|
25
|
+
"Programming Language :: Python :: 3",
|
|
26
|
+
]
|
|
27
|
+
keywords = ["thinkpack", "llm", "reasoning", "think-collapse", "fine-tuning"]
|
|
28
|
+
requires-python = ">=3.11"
|
|
29
|
+
|
|
30
|
+
dependencies = [
|
|
31
|
+
"datasets",
|
|
32
|
+
"transformers",
|
|
33
|
+
]
|
|
34
|
+
|
|
35
|
+
[dependency-groups]
|
|
36
|
+
dev = [
|
|
37
|
+
"pre-commit",
|
|
38
|
+
"pytest",
|
|
39
|
+
"pytest-cov",
|
|
40
|
+
"ty",
|
|
41
|
+
"uv",
|
|
42
|
+
]
|
|
43
|
+
|
|
44
|
+
[project.urls]
|
|
45
|
+
Homepage = "https://github.com/itsluketwist/thinkpack"
|
|
46
|
+
|
|
47
|
+
[tool.uv]
|
|
48
|
+
exclude-newer = "7 days"
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
"""ThinkPack — tools for preventing think collapse in reasoning language models."""
|
|
2
|
+
|
|
3
|
+
from thinkpack._model import ModelInfo, TemplateStyle, detect_model
|
|
4
|
+
from thinkpack.distill import build_prompts, extract_reasoning, update_records
|
|
5
|
+
from thinkpack.hybrid import HybridResult, hybrid_generate
|
|
6
|
+
from thinkpack.mask import Mask, mask
|
|
7
|
+
from thinkpack.parse import ParsedResponse, parse, parse_all, parse_output
|
|
8
|
+
from thinkpack.steer import SimplePrefix, apply_steer_template, steer
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
__all__ = [
|
|
12
|
+
"ModelInfo",
|
|
13
|
+
"TemplateStyle",
|
|
14
|
+
"detect_model",
|
|
15
|
+
"build_prompts",
|
|
16
|
+
"extract_reasoning",
|
|
17
|
+
"update_records",
|
|
18
|
+
"HybridResult",
|
|
19
|
+
"hybrid_generate",
|
|
20
|
+
"Mask",
|
|
21
|
+
"mask",
|
|
22
|
+
"ParsedResponse",
|
|
23
|
+
"parse",
|
|
24
|
+
"parse_all",
|
|
25
|
+
"parse_output",
|
|
26
|
+
"SimplePrefix",
|
|
27
|
+
"apply_steer_template",
|
|
28
|
+
"steer",
|
|
29
|
+
]
|