thinkpack 0.0.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
thinkpack/__init__.py ADDED
@@ -0,0 +1,29 @@
1
+ """ThinkPack — tools for preventing think collapse in reasoning language models."""
2
+
3
+ from thinkpack._model import ModelInfo, TemplateStyle, detect_model
4
+ from thinkpack.distill import build_prompts, extract_reasoning, update_records
5
+ from thinkpack.hybrid import HybridResult, hybrid_generate
6
+ from thinkpack.mask import Mask, mask
7
+ from thinkpack.parse import ParsedResponse, parse, parse_all, parse_output
8
+ from thinkpack.steer import SimplePrefix, apply_steer_template, steer
9
+
10
+
11
+ __all__ = [
12
+ "ModelInfo",
13
+ "TemplateStyle",
14
+ "detect_model",
15
+ "build_prompts",
16
+ "extract_reasoning",
17
+ "update_records",
18
+ "HybridResult",
19
+ "hybrid_generate",
20
+ "Mask",
21
+ "mask",
22
+ "ParsedResponse",
23
+ "parse",
24
+ "parse_all",
25
+ "parse_output",
26
+ "SimplePrefix",
27
+ "apply_steer_template",
28
+ "steer",
29
+ ]
thinkpack/_model.py ADDED
@@ -0,0 +1,124 @@
1
+ """Model template style detection from tokenizer chat templates."""
2
+
3
+ import re
4
+ from dataclasses import dataclass
5
+ from enum import StrEnum
6
+
7
+
8
+ class TemplateStyle(StrEnum):
9
+ """
10
+ How a model's chat template handles reasoning blocks.
11
+
12
+ INLINE — standard: the model outputs <think>content</think> inline in its response.
13
+ No special template support; tags are injected and parsed as plain text.
14
+ NATIVE — the template has a dedicated reasoning_content field (e.g. Qwen3).
15
+ Reasoning is passed separately when building messages and rendered
16
+ inside the think block by the template itself.
17
+ PREFIXED — the template auto-injects an opening reasoning tag at the end of the
18
+ generation prompt. The model's decoded output begins mid-reasoning
19
+ (no opening tag visible), and always ends with a closing tag.
20
+ """
21
+
22
+ INLINE = "inline"
23
+ NATIVE = "native"
24
+ PREFIXED = "prefixed"
25
+
26
+
27
+ @dataclass
28
+ class ModelInfo:
29
+ """Detected template style for a model's reasoning block handling.
30
+
31
+ Detected once via detect_model() and used internally by mask() and steer()
32
+ to handle model-specific formatting without exposing flags to the user.
33
+ """
34
+
35
+ style: TemplateStyle
36
+ # the opening tag used by this model, e.g. "<think>", "<reasoning>", "<thought>"
37
+ open_tag: str
38
+
39
+
40
+ # default opening tag used when the model has no known preference
41
+ _DEFAULT_OPEN_TAG = "<think>"
42
+
43
+ # sentinel injected into a test message to detect native reasoning_content support —
44
+ # if it appears in the rendered output, the template handles reasoning natively
45
+ _NATIVE_SENTINEL = "__thinkpack_detect__"
46
+
47
+ # matches any xml-like opening tag at the end of a string, e.g. <think>, <reasoning>
48
+ _TRAILING_TAG = re.compile(r"<([a-zA-Z][a-zA-Z0-9_]*)>\s*$")
49
+
50
+ # cache keyed on the chat_template string — the template fully determines detection,
51
+ # and is stable for the lifetime of any real tokenizer instance
52
+ _cache: dict[str, ModelInfo] = {}
53
+
54
+
55
+ def detect_model(tokenizer: object) -> ModelInfo:
56
+ """
57
+ Detect how a tokenizer handles reasoning blocks from its chat template.
58
+
59
+ Checks for native reasoning_content support (NATIVE), a generation prompt
60
+ that auto-injects an opening reasoning tag (PREFIXED), or neither (INLINE).
61
+ Detection is fully behaviour-based — no template source scanning.
62
+
63
+ Returns a ModelInfo with the detected TemplateStyle and open_tag.
64
+ """
65
+ template = getattr(tokenizer, "chat_template", "") or ""
66
+ if cached := _cache.get(template):
67
+ return cached
68
+
69
+ # test for native reasoning_content support by rendering an assistant message
70
+ # with a sentinel value — if the sentinel appears in output, the template
71
+ # handles reasoning as a dedicated field rather than inline tags (e.g. Qwen3)
72
+ try:
73
+ out = tokenizer.apply_chat_template( # type: ignore
74
+ [
75
+ {"role": "user", "content": ""},
76
+ {
77
+ "role": "assistant",
78
+ "content": "",
79
+ "reasoning_content": _NATIVE_SENTINEL,
80
+ },
81
+ ],
82
+ tokenize=False,
83
+ add_generation_prompt=False,
84
+ )
85
+ if isinstance(out, list):
86
+ out = tokenizer.decode(out) # type: ignore
87
+ if _NATIVE_SENTINEL in out:
88
+ # extract the actual tag the template wraps reasoning in, e.g. <think>
89
+ tag_match = re.search(
90
+ r"<([a-zA-Z][a-zA-Z0-9_]*)>[^<]*" + re.escape(_NATIVE_SENTINEL),
91
+ out,
92
+ )
93
+ native_tag = f"<{tag_match.group(1)}>" if tag_match else _DEFAULT_OPEN_TAG
94
+ result = ModelInfo(style=TemplateStyle.NATIVE, open_tag=native_tag)
95
+ _cache[template] = result
96
+ return result
97
+ except Exception:
98
+ pass # template doesn't support this message structure — move on
99
+
100
+ # apply with add_generation_prompt=True and check if any xml-like opening tag
101
+ # was appended — if so, this is a PREFIXED model and we capture the tag name
102
+ gen_prompt = tokenizer.apply_chat_template( # type: ignore
103
+ [{"role": "user", "content": ""}],
104
+ tokenize=False,
105
+ add_generation_prompt=True,
106
+ )
107
+ if isinstance(gen_prompt, list):
108
+ # some tokenizers return token ids despite tokenize=False — decode them
109
+ gen_prompt = tokenizer.decode(gen_prompt) # type: ignore
110
+
111
+ match = _TRAILING_TAG.search(gen_prompt)
112
+ if match:
113
+ result = ModelInfo(
114
+ style=TemplateStyle.PREFIXED,
115
+ open_tag=f"<{match.group(1)}>",
116
+ )
117
+ else:
118
+ result = ModelInfo(
119
+ style=TemplateStyle.INLINE,
120
+ open_tag=_DEFAULT_OPEN_TAG,
121
+ )
122
+
123
+ _cache[template] = result
124
+ return result
thinkpack/_tags.py ADDED
@@ -0,0 +1,16 @@
1
+ """Shared regex patterns for reasoning block tags."""
2
+
3
+ import re
4
+
5
+
6
+ # matches any opening reasoning tag, e.g. <think>, <thinking>, <reasoning>, <thought>
7
+ OPEN_TAG = re.compile(
8
+ r"<(think|thinking|reasoning|thought)>",
9
+ re.IGNORECASE,
10
+ )
11
+
12
+ # matches any closing reasoning tag, e.g. </think>, </thinking>, etc.
13
+ CLOSE_TAG = re.compile(
14
+ r"</(think|thinking|reasoning|thought)>",
15
+ re.IGNORECASE,
16
+ )
thinkpack/distill.py ADDED
@@ -0,0 +1,166 @@
1
+ """Distillation utilities for constructing reasoning prompts and extracting reasoning traces."""
2
+
3
+ import re
4
+ from typing import overload
5
+
6
+ from thinkpack.parse import parse
7
+
8
+
9
+ # default preamble used when none is provided — presents the task as a
10
+ # backwards explanation: given the answer, produce the reasoning that leads to it
11
+ _DEFAULT_PREAMBLE = (
12
+ "Given the following question and its correct answer, "
13
+ "produce a step-by-step reasoning trace that "
14
+ "explains how to arrive at the answer."
15
+ )
16
+
17
+
18
+ def build_prompts(
19
+ records: list[dict[str, str]],
20
+ instruction_key: str = "instruction",
21
+ response_key: str = "response",
22
+ tag: str = "reasoning_trace",
23
+ preamble: str = _DEFAULT_PREAMBLE,
24
+ example: str | None = None,
25
+ ) -> list[str]:
26
+ """
27
+ Build construct-mode distillation prompts from a list of records.
28
+
29
+ Each prompt presents the question and correct answer, asking the model
30
+ to produce a reasoning trace inside the specified tag. The closing tag
31
+ should be configured as a stop token so the model stops after reasoning.
32
+
33
+ Returns a list of prompt strings, one per record.
34
+ """
35
+ prompts = []
36
+ for record in records:
37
+ instruction = record[instruction_key]
38
+ response = record[response_key]
39
+
40
+ # build the example block only if one was provided
41
+ if example is not None:
42
+ example_block = f"Here is an example:\n<{tag}>\n{example}\n</{tag}>\n\n"
43
+ else:
44
+ example_block = ""
45
+
46
+ prompt = (
47
+ f"{preamble}\n\n"
48
+ f"Question: {instruction}\n\n"
49
+ f"Answer: {response}\n\n"
50
+ f"{example_block}"
51
+ f"Provide your reasoning inside <{tag}> tags."
52
+ )
53
+ prompts.append(prompt)
54
+
55
+ return prompts
56
+
57
+
58
+ @overload
59
+ def extract_reasoning(
60
+ text: str,
61
+ tag: str | None = ...,
62
+ prefixed: bool = ...,
63
+ strip_think: bool = ...,
64
+ ) -> str | None: ...
65
+
66
+
67
+ @overload
68
+ def extract_reasoning(
69
+ text: list[str],
70
+ tag: str | None = ...,
71
+ prefixed: bool = ...,
72
+ strip_think: bool = ...,
73
+ ) -> list[str | None]: ...
74
+
75
+
76
+ def extract_reasoning(
77
+ text: str | list[str],
78
+ tag: str | None = None,
79
+ prefixed: bool = False,
80
+ strip_think: bool = True,
81
+ ) -> str | None | list[str | None]:
82
+ """
83
+ Extract a reasoning trace from a model response or a list of responses.
84
+
85
+ Accepts a single string or a list; the return type matches the input.
86
+ Delegates to parse() for standard think/reasoning/thought tags, including
87
+ the truncated case where the closing tag is a stop token.
88
+
89
+ For custom tags (e.g. "reasoning_trace"), finds the opening tag and takes
90
+ everything after it — the closing tag is assumed to be a stop token and
91
+ absent from the output.
92
+
93
+ Returns the extracted reasoning string (or None if not found / blank) for
94
+ a single input, or a list of the same for a list input.
95
+ """
96
+ if isinstance(text, list):
97
+ return [
98
+ extract_reasoning( # type: ignore[misc]
99
+ text=t,
100
+ tag=tag,
101
+ prefixed=prefixed,
102
+ strip_think=strip_think,
103
+ )
104
+ for t in text
105
+ ]
106
+
107
+ if tag is None:
108
+ # delegate to parse() which handles all standard reasoning tags and
109
+ # the truncated case (open tag, no close tag = stop token scenario)
110
+ parsed = parse(response=text, prefixed=prefixed)
111
+ content = parsed.reasoning.strip()
112
+ return content if content else None
113
+
114
+ # custom tag mode: the closing tag is a stop token and never present,
115
+ # so find the opening tag and take everything after it
116
+ if strip_think:
117
+ # strip any standard think block first (its </think> is NOT a stop
118
+ # token in this mode, so it will appear in the output)
119
+ parsed = parse(response=text, prefixed=prefixed)
120
+ search_text = parsed.answer
121
+ else:
122
+ search_text = text
123
+
124
+ open_tag_re = re.compile(rf"<{re.escape(tag)}>", re.IGNORECASE)
125
+ match = open_tag_re.search(search_text)
126
+ if match is None:
127
+ return None
128
+
129
+ content = search_text[match.end() :].strip()
130
+ return content if content else None
131
+
132
+
133
+ def update_records(
134
+ records: list[dict[str, str]],
135
+ responses: list[str],
136
+ field: str = "reasoning_constructed",
137
+ tag: str | None = None,
138
+ prefixed: bool = False,
139
+ strip_think: bool = True,
140
+ ) -> list[dict[str, str]]:
141
+ """
142
+ Add extracted reasoning traces into a list of records.
143
+
144
+ Calls extract_reasoning on each response and writes the result into the
145
+ corresponding record under field. Only adds the field where extraction
146
+ succeeded; records where extraction returns None are returned unchanged.
147
+ Original records are not mutated.
148
+
149
+ Returns a new list of record dicts with the reasoning field added where available.
150
+ """
151
+ # extract from all responses in one call (list path)
152
+ extractions: list[str | None] = extract_reasoning(
153
+ text=responses,
154
+ tag=tag,
155
+ prefixed=prefixed,
156
+ strip_think=strip_think,
157
+ )
158
+
159
+ updated = []
160
+ for record, reasoning in zip(records, extractions, strict=True):
161
+ new_record = {**record}
162
+ if reasoning is not None:
163
+ new_record[field] = reasoning
164
+ updated.append(new_record)
165
+
166
+ return updated
thinkpack/hybrid.py ADDED
@@ -0,0 +1,135 @@
1
+ """
2
+ Hybrid decoding: base model generates reasoning, fine-tuned adapter generates the answer.
3
+
4
+ Requires vLLM. The LLM must be loaded with enable_lora=True so the adapter
5
+ can be toggled between the two generation phases without reloading the model.
6
+ """
7
+
8
+ from dataclasses import dataclass
9
+ from typing import Any, Protocol
10
+
11
+ from thinkpack.parse import parse
12
+
13
+
14
+ class _LLM(Protocol):
15
+ """Minimal protocol for a vLLM-compatible LLM instance."""
16
+
17
+ def generate(
18
+ self,
19
+ prompts: list[str],
20
+ sampling_params: Any,
21
+ lora_request: Any = None,
22
+ ) -> list[Any]: ...
23
+
24
+
25
+ @dataclass
26
+ class HybridResult:
27
+ """Output from a single hybrid decoding generation.
28
+
29
+ Contains the reasoning produced by the base model, the answer produced
30
+ by the fine-tuned model, and a combined raw string for convenience.
31
+ """
32
+
33
+ # reasoning block content, generated by the base model
34
+ reasoning: str
35
+ # answer text, generated by the fine-tuned model
36
+ answer: str
37
+ # full combined string reconstructed as "<tag>reasoning</tag>\nanswer"
38
+ raw: str
39
+
40
+
41
+ def _build_reasoning_prefix(reasoning: str, tag: str) -> str:
42
+ """Wrap a reasoning string in its original tag to form a closed block.
43
+
44
+ Used to construct the phase 2 prompt prefix so the fine-tuned model
45
+ receives the full reasoning block before generating its answer.
46
+
47
+ Returns a string of the form "<tag>\\nreasoning\\n</tag>\\n".
48
+ """
49
+ return f"<{tag}>\n{reasoning}\n</{tag}>\n"
50
+
51
+
52
+ def hybrid_generate(
53
+ prompts: list[str],
54
+ llm: _LLM,
55
+ lora_request: Any,
56
+ sampling_params: Any,
57
+ prefixed: bool = False,
58
+ ) -> list[HybridResult]:
59
+ """Run hybrid decoding: base model reasons, fine-tuned model answers.
60
+
61
+ Two sequential generation passes over the same loaded vLLM model:
62
+
63
+ Phase 1 — reasoning (no adapter):
64
+ Generates with lora_request=None so the base model produces a
65
+ reasoning block freely, without fine-tuning influence.
66
+
67
+ Phase 2 — answer (with adapter):
68
+ Prepends the reasoning block from phase 1 to each prompt, then
69
+ generates with the provided lora_request so the fine-tuned model
70
+ produces the final answer conditioned on that reasoning.
71
+
72
+ The LLM must be loaded with enable_lora=True to support toggling the
73
+ adapter between phases. Pass the same SamplingParams to both phases;
74
+ for different per-phase params, use steer() and parse() directly.
75
+
76
+ Set prefixed=True for PREFIXED template models (e.g. OLMo-3) whose
77
+ generation prompt already ends with the opening reasoning tag — used
78
+ when parsing the phase 1 outputs.
79
+
80
+ Returns a list of HybridResult, one per prompt.
81
+ """
82
+ # phase 1: base model generates reasoning (no adapter)
83
+ phase1_outputs = llm.generate(
84
+ prompts=prompts,
85
+ sampling_params=sampling_params,
86
+ lora_request=None,
87
+ )
88
+
89
+ # parse reasoning from each phase 1 output (first sample only)
90
+ phase1_parsed = [
91
+ parse(
92
+ response=output.outputs[0].text, # type: ignore[union-attr]
93
+ prefixed=prefixed,
94
+ )
95
+ for output in phase1_outputs
96
+ ]
97
+
98
+ # build phase 2 prompts: original prompt + closed reasoning block
99
+ # use the detected tag from phase 1, falling back to "think" if not found
100
+ phase2_prompts = []
101
+ for prompt, p1 in zip(prompts, phase1_parsed):
102
+ if p1.has_valid_reasoning:
103
+ tag = p1.reasoning_tag or "think"
104
+ prefix = _build_reasoning_prefix(reasoning=p1.reasoning, tag=tag)
105
+ phase2_prompts.append(prompt + prefix)
106
+ else:
107
+ # no usable reasoning — phase 2 proceeds without a prefix
108
+ phase2_prompts.append(prompt)
109
+
110
+ # phase 2: fine-tuned model generates answer (with adapter)
111
+ phase2_outputs = llm.generate(
112
+ prompts=phase2_prompts,
113
+ sampling_params=sampling_params,
114
+ lora_request=lora_request,
115
+ )
116
+
117
+ # combine reasoning and answer into HybridResult objects
118
+ results = []
119
+ for p1, output in zip(phase1_parsed, phase2_outputs):
120
+ answer = output.outputs[0].text.strip() # type: ignore[union-attr]
121
+ tag = p1.reasoning_tag or "think"
122
+ raw = (
123
+ _build_reasoning_prefix(reasoning=p1.reasoning, tag=tag) + answer
124
+ if p1.has_valid_reasoning
125
+ else answer
126
+ )
127
+ results.append(
128
+ HybridResult(
129
+ reasoning=p1.reasoning,
130
+ answer=answer,
131
+ raw=raw,
132
+ )
133
+ )
134
+
135
+ return results
thinkpack/mask.py ADDED
@@ -0,0 +1,257 @@
1
+ """Training-time loss masking for reasoning blocks."""
2
+
3
+ import re
4
+ from enum import IntFlag
5
+
6
+ from datasets import Dataset
7
+
8
+ from thinkpack._model import TemplateStyle, detect_model
9
+
10
+
11
+ # pytorch's CrossEntropyLoss uses ignore_index=-100 by default, and all major
12
+ # training frameworks (transformers Trainer, trl SFTTrainer, unsloth) inherit
13
+ # this default — so -100 is the correct value unless the trainer is configured
14
+ # otherwise. exposed as a parameter on mask() for the rare case where it differs.
15
+ _DEFAULT_IGNORE_INDEX = -100
16
+
17
+
18
+ class Mask(IntFlag):
19
+ """
20
+ Sections of the training sequence to mask from the loss.
21
+
22
+ Combine sections with | to mask multiple parts at once:
23
+ Mask.THINK — mask only the think block (most common)
24
+ Mask.PROMPT | Mask.THINK — mask prompt and think (train on response only)
25
+
26
+ PROMPT covers the user instruction. THINK covers the full reasoning block
27
+ including its opening and closing tags. RESPONSE covers the model's answer.
28
+ Masking RESPONSE is unusual (nothing useful remains to train on) but valid.
29
+ """
30
+
31
+ PROMPT = 1
32
+ THINK = 2
33
+ RESPONSE = 4
34
+
35
+
36
+ def _build_assistant_message(
37
+ record: dict[str, str],
38
+ style: TemplateStyle,
39
+ open_tag: str,
40
+ ) -> dict[str, str]:
41
+ """
42
+ Build the assistant message dict from a training record.
43
+
44
+ For NATIVE templates, passes reasoning as a separate reasoning_content field.
45
+ For INLINE and PREFIXED templates, wraps reasoning in inline reasoning tags.
46
+ The presence of a "reasoning" key (even if empty) controls whether the think
47
+ block appears in the sequence — required when masking so the model sees the
48
+ same context at training time as at inference time.
49
+
50
+ Returns an assistant message dict ready to pass to apply_chat_template.
51
+ """
52
+ # use None sentinel to distinguish "key absent" from "key present but empty"
53
+ reasoning_raw = record.get("reasoning", None)
54
+ response = record["response"]
55
+
56
+ message: dict[str, str] = {"role": "assistant"}
57
+
58
+ if reasoning_raw is not None and style == TemplateStyle.NATIVE:
59
+ # template natively handles reasoning via a dedicated field (e.g. Qwen3)
60
+ message["content"] = response
61
+ message["reasoning_content"] = reasoning_raw.strip()
62
+ elif reasoning_raw is not None:
63
+ # derive the closing tag from the opening tag, e.g. <think> -> </think>
64
+ close_tag = open_tag.replace("<", "</", 1)
65
+ reasoning = reasoning_raw.strip()
66
+ message["content"] = f"{open_tag}\n{reasoning}\n{close_tag}\n{response}"
67
+ else:
68
+ message["content"] = response
69
+
70
+ return message
71
+
72
+
73
+ def _tokenize_prefix(
74
+ tokenizer: object,
75
+ text: str,
76
+ max_seq_length: int,
77
+ ) -> int:
78
+ """
79
+ Tokenize a text prefix and return its token count.
80
+
81
+ Used to locate section boundaries within the full token sequence by
82
+ tokenizing the text up to a known character position.
83
+
84
+ Returns the number of tokens in the prefix.
85
+ """
86
+ return len(
87
+ tokenizer.encode( # type: ignore
88
+ text,
89
+ add_special_tokens=False,
90
+ truncation=True,
91
+ max_length=max_seq_length,
92
+ )
93
+ )
94
+
95
+
96
+ def _tokenize_record(
97
+ record: dict[str, str],
98
+ tokenizer: object,
99
+ style: TemplateStyle,
100
+ open_tag: str,
101
+ max_seq_length: int,
102
+ masked: Mask,
103
+ ignore_index: int,
104
+ ) -> dict[str, list[int]]:
105
+ """
106
+ Tokenize a single training record and apply label masking.
107
+
108
+ Locates the PROMPT / THINK / RESPONSE boundaries in the token sequence by
109
+ tokenizing text prefixes (rather than using add_generation_prompt=True). This
110
+ avoids a subtle issue with PREFIXED templates: the generation prompt already
111
+ ends with <think>, so using it as a prefix boundary would leave the opening
112
+ tag trainable while masking the closing tag — teaching the model to "open but
113
+ never close" the reasoning block.
114
+
115
+ Each section flagged in `masked` has its labels set to _IGNORE_INDEX so
116
+ PyTorch's cross-entropy ignores those tokens during loss computation.
117
+
118
+ Returns a dict with input_ids, labels, and attention_mask.
119
+ """
120
+ messages = [
121
+ {"role": "user", "content": record["instruction"]},
122
+ _build_assistant_message(
123
+ record=record,
124
+ style=style,
125
+ open_tag=open_tag,
126
+ ),
127
+ ]
128
+ full_text = tokenizer.apply_chat_template( # type: ignore
129
+ messages,
130
+ tokenize=False,
131
+ add_generation_prompt=False,
132
+ )
133
+ input_ids = tokenizer.encode( # type: ignore
134
+ full_text,
135
+ add_special_tokens=False,
136
+ truncation=True,
137
+ max_length=max_seq_length,
138
+ )
139
+
140
+ # default: all tokens contribute to the loss
141
+ labels = list(input_ids)
142
+
143
+ if not masked:
144
+ # no sections to mask — return labels unchanged
145
+ return {
146
+ "input_ids": input_ids,
147
+ "labels": labels,
148
+ "attention_mask": [1] * len(input_ids),
149
+ }
150
+
151
+ # find the opening reasoning tag to locate the think block boundary
152
+ open_match = re.search(re.escape(open_tag), full_text)
153
+ think_start = (
154
+ _tokenize_prefix(
155
+ tokenizer=tokenizer,
156
+ text=full_text[: open_match.start()],
157
+ max_seq_length=max_seq_length,
158
+ )
159
+ if open_match is not None
160
+ else None # no think block present in this record
161
+ )
162
+
163
+ # locate the response boundary (rfind to handle response text in the instruction)
164
+ response_start_char = full_text.rfind(record["response"])
165
+ response_start = _tokenize_prefix(
166
+ tokenizer=tokenizer,
167
+ text=full_text[:response_start_char],
168
+ max_seq_length=max_seq_length,
169
+ )
170
+
171
+ # mask each requested section independently
172
+ if Mask.PROMPT in masked:
173
+ # mask everything from the start up to the think block (or response if no think)
174
+ prompt_end = think_start if think_start is not None else response_start
175
+ for i in range(prompt_end):
176
+ labels[i] = ignore_index
177
+
178
+ if Mask.THINK in masked and think_start is not None:
179
+ # mask the full reasoning block including its opening and closing tags
180
+ for i in range(think_start, response_start):
181
+ labels[i] = ignore_index
182
+
183
+ if Mask.RESPONSE in masked:
184
+ # mask the response tokens
185
+ for i in range(response_start, len(labels)):
186
+ labels[i] = ignore_index
187
+
188
+ return {
189
+ "input_ids": input_ids,
190
+ "labels": labels,
191
+ "attention_mask": [1] * len(input_ids),
192
+ }
193
+
194
+
195
+ def mask(
196
+ records: list[dict[str, str]],
197
+ tokenizer: object,
198
+ masked: Mask | None = Mask.THINK,
199
+ max_seq_length: int = 32768,
200
+ ignore_index: int = _DEFAULT_IGNORE_INDEX,
201
+ tag: str | None = None,
202
+ ) -> Dataset:
203
+ """
204
+ Format training records into a pretokenized dataset with selected sections masked.
205
+
206
+ Each record must have "instruction" and "response" keys. An optional "reasoning"
207
+ key provides think block content — if absent when masking is applied, an empty
208
+ reasoning block is injected so training context matches inference time.
209
+
210
+ Template style (INLINE, NATIVE, PREFIXED) is detected automatically from the
211
+ tokenizer. Combine Mask flags with | to mask multiple sections at once (see
212
+ the Mask class for details). Pass masked=None to train on all tokens.
213
+
214
+ Returns a HuggingFace Dataset with input_ids, labels, and attention_mask columns.
215
+ """
216
+ model_info = detect_model(tokenizer=tokenizer)
217
+ # user-supplied tag overrides the detected default (useful for INLINE models
218
+ # whose tag differs from the <think> default, e.g. <reasoning>)
219
+ open_tag = f"<{tag}>" if tag is not None else model_info.open_tag
220
+
221
+ # normalise None to an empty Mask so downstream logic is consistent
222
+ effective_masked = masked if masked is not None else Mask(0)
223
+
224
+ # when masking is active, inject an empty "reasoning" key for records that lack one
225
+ # so the think block appears in the sequence — required for training/inference context
226
+ # alignment on PREFIXED models that always emit think blocks at inference time
227
+ if effective_masked:
228
+ records = [
229
+ record if "reasoning" in record else {**record, "reasoning": ""}
230
+ for record in records
231
+ ]
232
+
233
+ all_input_ids = []
234
+ all_labels = []
235
+ all_attention_mask = []
236
+
237
+ for record in records:
238
+ result = _tokenize_record(
239
+ record=record,
240
+ tokenizer=tokenizer,
241
+ style=model_info.style,
242
+ open_tag=open_tag,
243
+ max_seq_length=max_seq_length,
244
+ masked=effective_masked,
245
+ ignore_index=ignore_index,
246
+ )
247
+ all_input_ids.append(result["input_ids"])
248
+ all_labels.append(result["labels"])
249
+ all_attention_mask.append(result["attention_mask"])
250
+
251
+ return Dataset.from_dict(
252
+ {
253
+ "input_ids": all_input_ids,
254
+ "labels": all_labels,
255
+ "attention_mask": all_attention_mask,
256
+ }
257
+ )
thinkpack/parse.py ADDED
@@ -0,0 +1,173 @@
1
+ """Parsing of model responses into reasoning and answer components."""
2
+
3
+ import re
4
+ from dataclasses import dataclass
5
+ from typing import cast
6
+
7
+ from thinkpack._tags import CLOSE_TAG as _REASONING_CLOSE_TAG
8
+ from thinkpack._tags import OPEN_TAG as _REASONING_OPEN_TAG
9
+
10
+
11
+ @dataclass
12
+ class ParsedResponse:
13
+ """
14
+ A model response split into reasoning and answer components.
15
+
16
+ answer and reasoning contain the extracted text; the boolean flags
17
+ describe the structure of the reasoning block at a glance.
18
+ """
19
+
20
+ # the model's final answer — text after the closing reasoning tag, or the
21
+ # full response if no reasoning block was found
22
+ answer: str
23
+
24
+ # content inside the reasoning block; empty string if no block was present
25
+ reasoning: str
26
+
27
+ # lowercase tag name used, e.g. "think", "reasoning" (None if no tag found)
28
+ reasoning_tag: str | None
29
+
30
+ # true if any reasoning block structure is present, even if blank or truncated
31
+ has_reasoning_block: bool
32
+
33
+ # true if the reasoning block was completed and non-blank
34
+ has_valid_reasoning: bool
35
+
36
+ # true if an opening tag was found but the model never produced a closing tag
37
+ has_truncated_reasoning: bool
38
+
39
+
40
+ def parse(
41
+ response: str,
42
+ prefixed: bool = False,
43
+ tag: str | None = None,
44
+ ) -> ParsedResponse:
45
+ """Parse a single model response into reasoning and answer components.
46
+
47
+ Handles four formats:
48
+ - standard: <think>content</think>answer
49
+ - prefixed: content</think>answer (opening tag injected by chat template)
50
+ - truncated standard: <think>content... (open tag, no close tag)
51
+ - truncated prefixed: content... (no tags; only detectable with prefixed=True)
52
+
53
+ Returns a ParsedResponse with the split content and status flags.
54
+ """
55
+ # compile tag-specific patterns if the caller has specified an exact tag name,
56
+ # otherwise fall back to the shared patterns that match all known variants
57
+ if tag is not None:
58
+ escaped = re.escape(tag)
59
+ open_re = re.compile(rf"<({escaped})>", re.IGNORECASE)
60
+ close_re = re.compile(rf"</({escaped})>", re.IGNORECASE)
61
+ else:
62
+ open_re = _REASONING_OPEN_TAG
63
+ close_re = _REASONING_CLOSE_TAG
64
+
65
+ close_match = close_re.search(response)
66
+
67
+ if close_match:
68
+ tag_name = close_match.group(1).lower()
69
+ # extract everything before the closing tag, then strip any open tag
70
+ # (prefixed template responses have no opening tag in decoded output)
71
+ before_close = response[: close_match.start()]
72
+ thinking = open_re.sub("", before_close, count=1)
73
+ answer = response[close_match.end() :].strip()
74
+ has_valid_reasoning = bool(thinking.strip())
75
+ return ParsedResponse(
76
+ answer=answer,
77
+ reasoning=thinking,
78
+ reasoning_tag=tag_name,
79
+ has_reasoning_block=True,
80
+ has_valid_reasoning=has_valid_reasoning,
81
+ has_truncated_reasoning=False,
82
+ )
83
+
84
+ # no closing tag — check for a truncated reasoning block (open tag present)
85
+ open_match = open_re.search(response)
86
+ if open_match:
87
+ # model started reasoning but output was cut off before the close tag
88
+ return ParsedResponse(
89
+ answer="",
90
+ reasoning=response[open_match.end() :],
91
+ reasoning_tag=open_match.group(1).lower(),
92
+ has_reasoning_block=True,
93
+ has_valid_reasoning=False,
94
+ has_truncated_reasoning=True,
95
+ )
96
+
97
+ if prefixed:
98
+ # for PREFIXED template models the opening tag is injected by the chat
99
+ # template and never appears in decoded output — a missing close tag means
100
+ # the reasoning was truncated, not that there was no think block at all
101
+ return ParsedResponse(
102
+ answer="",
103
+ reasoning=response,
104
+ reasoning_tag=None,
105
+ has_reasoning_block=True,
106
+ has_valid_reasoning=False,
107
+ has_truncated_reasoning=True,
108
+ )
109
+
110
+ # no reasoning tags at all — plain response with no think block
111
+ return ParsedResponse(
112
+ answer=response,
113
+ reasoning="",
114
+ reasoning_tag=None,
115
+ has_reasoning_block=False,
116
+ has_valid_reasoning=False,
117
+ has_truncated_reasoning=False,
118
+ )
119
+
120
+
121
+ def parse_all(
122
+ responses: list[list[str]],
123
+ prefixed: bool = False,
124
+ tag: str | None = None,
125
+ ) -> list[list[ParsedResponse]]:
126
+ """Parse a batch of model responses into ParsedResponse objects.
127
+
128
+ Accepts a nested list of shape [task][sample] and returns the same shape.
129
+ Pass tag to restrict matching to a single tag name (see parse() for details).
130
+
131
+ Returns a nested list of ParsedResponse objects matching the input shape.
132
+ """
133
+ return [
134
+ [parse(response=r, prefixed=prefixed, tag=tag) for r in sample_responses]
135
+ for sample_responses in responses
136
+ ]
137
+
138
+
139
+ def parse_output(
140
+ output: object | list[object],
141
+ prefixed: bool = False,
142
+ tag: str | None = None,
143
+ ) -> list[ParsedResponse] | list[list[ParsedResponse]]:
144
+ """Parse one or more generation output objects into ParsedResponse objects.
145
+
146
+ Accepts either:
147
+ - a single output object with an .outputs attribute (e.g. a vLLM RequestOutput),
148
+ returning a flat list of ParsedResponse — one per sample/completion, or
149
+ - a list of such objects, returning a nested list of shape [task][sample].
150
+
151
+ Each completion object is expected to have a .text (str) attribute —
152
+ compatible with vLLM's RequestOutput and similar interfaces.
153
+
154
+ Pass tag to restrict matching to a single tag name (see parse() for details).
155
+
156
+ Returns a list of ParsedResponse (single output) or list[list[ParsedResponse]] (list).
157
+ """
158
+ if isinstance(output, list):
159
+ # list of output objects — recurse to produce a nested [task][sample] structure
160
+ return cast(
161
+ list[list[ParsedResponse]],
162
+ [parse_output(output=o, prefixed=prefixed, tag=tag) for o in output],
163
+ )
164
+ # single output object — parse each completion in its .outputs attribute
165
+ completions = output.outputs # type: ignore
166
+ return [
167
+ parse(
168
+ response=completion.text,
169
+ prefixed=prefixed,
170
+ tag=tag,
171
+ )
172
+ for completion in completions
173
+ ]
thinkpack/steer.py ADDED
@@ -0,0 +1,138 @@
1
+ """Inference-time thought-steering prefix injection."""
2
+
3
+ from enum import StrEnum
4
+
5
+ from thinkpack._model import detect_model
6
+
7
+
8
+ class SimplePrefix(StrEnum):
9
+ """
10
+ A small set of basic steering prefixes for common use cases.
11
+
12
+ These are provided as convenient starting points — pass any string to
13
+ steer() to use a custom prefix instead.
14
+ """
15
+
16
+ # minimal opening; lets the model continue naturally with a slight nudge
17
+ BRIEF = "Okay, "
18
+ # explicit step-by-step framing
19
+ STEPS = "Okay, let me think this through step by step."
20
+ # step-by-step framing with a reminder to stay concise and produce an answer
21
+ CONCISE = (
22
+ "Okay, let me think this through, "
23
+ "but I need to be concise and make sure I also provide an answer."
24
+ )
25
+
26
+
27
+ def steer(
28
+ prompts: list[str],
29
+ tokenizer: object,
30
+ prefix: SimplePrefix | str | None = None,
31
+ tag: str | None = None,
32
+ close: bool = False,
33
+ ) -> list[str]:
34
+ """Inject a thought-steering prefix into chat-templated prompt strings.
35
+
36
+ Ensures each prompt ends with an open reasoning block, optionally seeded
37
+ with a prefix string to guide the model's thinking. Use SimplePrefix for
38
+ common presets, or pass any string for a custom prefix. Template style
39
+ (INLINE, NATIVE, PREFIXED) is detected automatically from the tokenizer.
40
+
41
+ The prompts should already be chat-templated strings (e.g. as returned by
42
+ tokenizer.apply_chat_template with add_generation_prompt=True).
43
+
44
+ When close=True, the reasoning block is closed after the prefix, producing
45
+ a complete <think>...</think> block. The model then generates its response
46
+ after the closed block. This is useful as a universal interface for injecting
47
+ a fixed reasoning block rather than steering an open-ended thought.
48
+
49
+ Returns a list of prompt strings ready to pass directly to a generation function.
50
+ """
51
+ model_info = detect_model(tokenizer=tokenizer)
52
+ # user-supplied tag overrides the detected default (useful for INLINE models
53
+ # whose tag differs from the <think> default, e.g. <reasoning>)
54
+ open_tag = f"<{tag}>" if tag is not None else model_info.open_tag
55
+ # close tag is derived by inserting "/" after the opening "<"
56
+ close_tag = open_tag.replace("<", "</", 1)
57
+
58
+ steered = []
59
+ for prompt in prompts:
60
+ # check for any recognised reasoning tag, not just <think>
61
+ already_open = prompt.rstrip("\n").endswith(open_tag)
62
+
63
+ if prefix is None:
64
+ if close:
65
+ # inject an empty reasoning block — model responds after it
66
+ if already_open:
67
+ steered.append(prompt + close_tag + "\n")
68
+ else:
69
+ steered.append(prompt + open_tag + "\n" + close_tag + "\n")
70
+ else:
71
+ # just ensure the prompt ends with an open reasoning tag
72
+ if already_open:
73
+ steered.append(prompt)
74
+ else:
75
+ steered.append(prompt + open_tag + "\n")
76
+ else:
77
+ # inject the prefix as the beginning of the thought content
78
+ if already_open:
79
+ # PREFIXED template already appended the tag — add only the body
80
+ if close:
81
+ steered.append(
82
+ prompt + "\n" + str(prefix) + "\n" + close_tag + "\n"
83
+ )
84
+ else:
85
+ steered.append(prompt + "\n" + str(prefix))
86
+ else:
87
+ if close:
88
+ steered.append(
89
+ prompt
90
+ + open_tag
91
+ + "\n"
92
+ + str(prefix)
93
+ + "\n"
94
+ + close_tag
95
+ + "\n",
96
+ )
97
+ else:
98
+ steered.append(prompt + open_tag + "\n" + str(prefix))
99
+
100
+ return steered
101
+
102
+
103
+ def apply_steer_template(
104
+ conversations: list[list[dict[str, str]]],
105
+ tokenizer: object,
106
+ prefix: SimplePrefix | str | None = None,
107
+ tag: str | None = None,
108
+ close: bool = False,
109
+ ) -> list[str]:
110
+ """Apply the chat template and inject a thought-steering prefix in one step.
111
+
112
+ Convenience wrapper that combines tokenizer.apply_chat_template() and steer()
113
+ into a single call. Accepts a list of conversations (each a list of message dicts
114
+ with "role" and "content" keys) and returns steered prompt strings ready for
115
+ generation. Pass close=True to produce a complete closed reasoning block.
116
+
117
+ Returns a list of steered prompt strings, one per conversation.
118
+ """
119
+ # apply the chat template to each conversation
120
+ templated = []
121
+ for messages in conversations:
122
+ result = tokenizer.apply_chat_template( # type: ignore
123
+ messages,
124
+ tokenize=False,
125
+ add_generation_prompt=True,
126
+ )
127
+ if isinstance(result, list):
128
+ # some tokenizers return token ids despite tokenize=False — decode them
129
+ result = tokenizer.decode(result) # type: ignore
130
+ templated.append(result)
131
+
132
+ return steer(
133
+ prompts=templated,
134
+ tokenizer=tokenizer,
135
+ prefix=prefix,
136
+ tag=tag,
137
+ close=close,
138
+ )
@@ -0,0 +1,268 @@
1
+ Metadata-Version: 2.4
2
+ Name: thinkpack
3
+ Version: 0.0.2
4
+ Summary: Tools for preventing think collapse in reasoning language models.
5
+ Author-email: Lukas Twist <itsluketwist@gmail.com>
6
+ License: MIT License
7
+
8
+ Copyright (c) 2023 Luke Twist
9
+
10
+ Permission is hereby granted, free of charge, to any person obtaining a copy
11
+ of this software and associated documentation files (the "Software"), to deal
12
+ in the Software without restriction, including without limitation the rights
13
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14
+ copies of the Software, and to permit persons to whom the Software is
15
+ furnished to do so, subject to the following conditions:
16
+
17
+ The above copyright notice and this permission notice shall be included in all
18
+ copies or substantial portions of the Software.
19
+
20
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
23
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
26
+ SOFTWARE.
27
+
28
+ Project-URL: Homepage, https://github.com/itsluketwist/thinkpack
29
+ Keywords: thinkpack,llm,reasoning,think-collapse,fine-tuning
30
+ Classifier: License :: OSI Approved :: MIT License
31
+ Classifier: Programming Language :: Python
32
+ Classifier: Programming Language :: Python :: 3
33
+ Requires-Python: >=3.11
34
+ Description-Content-Type: text/markdown
35
+ License-File: LICENSE
36
+ Requires-Dist: datasets
37
+ Requires-Dist: transformers
38
+ Dynamic: license-file
39
+
40
+ # ThinkPack
41
+
42
+ ![ThinkPack](assets/banner.png)
43
+
44
+ A lightweight toolkit for working with reasoning blocks in language models — preventing think collapse via los masking, steering reasoning at inference time, and parsing model outputs.
45
+
46
+ **Think collapse** is a failure mode where reasoning models stop using their `<think>...</think>` blocks during or after fine-tuning.
47
+ Without intervention, the model learns to skip reasoning entirely — producing answers directly and losing the chain-of-thought behaviour it was trained on.
48
+ ThinkPack provides three targeted tools to prevent this:
49
+
50
+ - **Loss masking** (`thinkpack.mask`) — keeps reasoning blocks in the training context while masking them from the loss, so the model doesn't learn to skip them.
51
+ - **Thought steering** (`thinkpack.steer`) — injects a short primer after the opening reasoning tag at inference time, nudging the model to reason before answering.
52
+ - **Response parsing** (`thinkpack.parse`) — splits raw model output into reasoning and answer components, with flags for truncation detection.
53
+
54
+ ---
55
+
56
+ ## Installation
57
+
58
+ ```bash
59
+ pip install thinkpack
60
+ ```
61
+
62
+ ---
63
+
64
+ ## Modules
65
+
66
+ ### `thinkpack.mask` — Training-time loss masking
67
+
68
+ When fine-tuning a reasoning model, naively training on all tokens can cause the model to learn to skip its reasoning block entirely. `mask()` formats your training records into a pretokenized HuggingFace dataset with selected parts of the sequence excluded from the loss.
69
+
70
+ ```python
71
+ import thinkpack
72
+
73
+ dataset = thinkpack.mask(
74
+ records=records, # list of dicts with "instruction" and "response" keys
75
+ tokenizer=tokenizer,
76
+ masked=thinkpack.Mask.THINK, # mask only the think block (default)
77
+ )
78
+ ```
79
+
80
+ The `masked` parameter is a composable flag — combine sections with `|`:
81
+
82
+ | Value | Effect |
83
+ |---|---|
84
+ | `Mask.THINK` | Think block hidden from loss; model trains on prompt + response |
85
+ | `Mask.PROMPT \| Mask.THINK` | Train on response only |
86
+ | `None` | No masking; all tokens contribute to the loss |
87
+
88
+ Model-specific template handling (Qwen3's native `reasoning_content` field, OLMo-3's auto-injected opening tag) is detected automatically from the tokenizer — no manual configuration needed.
89
+
90
+ See [examples/training.py](examples/training.py) for a complete training loop.
91
+
92
+ ---
93
+
94
+ ### `thinkpack.steer` — Inference-time thought steering
95
+
96
+ Think collapse can also be addressed at inference time by injecting a short prefix after the opening reasoning tag, seeding the model's reasoning before it generates its own thought content.
97
+
98
+ ```python
99
+ # ensure the opening reasoning tag is present without seeding the thought
100
+ steered_prompts = thinkpack.steer(
101
+ prompts=templated_prompts, # already chat-templated strings
102
+ tokenizer=tokenizer,
103
+ )
104
+
105
+ # seed the model's thought with a preset
106
+ steered_prompts = thinkpack.steer(
107
+ prompts=templated_prompts,
108
+ tokenizer=tokenizer,
109
+ prefix=thinkpack.SimplePrefix.CONCISE,
110
+ )
111
+
112
+ # or pass any custom string
113
+ steered_prompts = thinkpack.steer(
114
+ prompts=templated_prompts,
115
+ tokenizer=tokenizer,
116
+ prefix="Okay, this is a tricky one. Let me consider each part carefully.",
117
+ )
118
+ ```
119
+
120
+ `SimplePrefix` provides a few basic presets:
121
+
122
+ | Preset | Text |
123
+ |---|---|
124
+ | `BRIEF` | `"Okay, "` |
125
+ | `STEPS` | `"Okay, let me think this through step by step."` |
126
+ | `CONCISE` | `"Okay, let me think this through, but I need to be concise and make sure I also provide an answer."` |
127
+
128
+ `steer()` handles the PREFIXED template quirk automatically: models like OLMo-3 whose chat template already ends with an opening reasoning tag do not get a duplicate tag injected.
129
+
130
+ See [examples/inference.py](examples/inference.py) for a complete inference loop.
131
+
132
+ ---
133
+
134
+ ### `thinkpack.parse` — Response parsing
135
+
136
+ Parse raw model outputs into structured components — useful for evaluation, analysis, and hybrid decoding pipelines.
137
+
138
+ ```python
139
+ # single response
140
+ parsed = thinkpack.parse(response=raw_text)
141
+ parsed.answer # str — text after the closing reasoning tag
142
+ parsed.reasoning # str — content of the reasoning block
143
+ parsed.has_valid_reasoning # bool — non-empty, completed reasoning block
144
+ parsed.has_truncated_reasoning # bool — reasoning block started but never closed
145
+
146
+ # directly from vLLM output objects (single output → list, list of outputs → list[list])
147
+ parsed = thinkpack.parse_output(output=outputs)
148
+ ```
149
+
150
+ Handles all four output formats:
151
+
152
+ | Format | Example |
153
+ |---|---|
154
+ | Standard | `<think>reasoning</think>answer` |
155
+ | Prefixed template | `reasoning</think>answer` (opening tag injected by template) |
156
+ | Truncated standard | `<think>reasoning...` (no closing tag) |
157
+ | Truncated prefixed | `reasoning...` (pass `prefixed=True`) |
158
+
159
+ Recognises tag variants: `think`, `thinking`, `reasoning`, `thought` (case-insensitive).
160
+
161
+ ---
162
+
163
+ ### `thinkpack.distill` — Distillation prompt building and reasoning extraction
164
+
165
+ When training data lacks reasoning traces, `distill` helps construct them. It builds prompts that ask a teacher model to produce a reasoning trace given a question and its known answer, then extracts and writes those traces back into your records.
166
+
167
+ ```python
168
+ import thinkpack
169
+
170
+ # build prompts for a teacher model to generate reasoning traces
171
+ prompts = thinkpack.build_prompts(
172
+ records=records, # list of dicts with "instruction" and "response" keys
173
+ )
174
+
175
+ # after generating responses from the teacher model, extract the traces
176
+ traces = thinkpack.extract_reasoning(text=responses, tag="reasoning_trace")
177
+
178
+ # or write traces back into records in one step
179
+ records = thinkpack.update_records(
180
+ records=records,
181
+ responses=responses,
182
+ field="reasoning", # key to write extracted traces into
183
+ )
184
+ ```
185
+
186
+ `extract_reasoning` accepts a single string or a list, and returns `None` where extraction fails (blank or no tag found):
187
+
188
+ ```python
189
+ # single response — returns str | None
190
+ trace = thinkpack.extract_reasoning(text=response)
191
+
192
+ # list of responses — returns list[str | None]
193
+ traces = thinkpack.extract_reasoning(text=responses)
194
+ ```
195
+
196
+ ---
197
+
198
+ ### `thinkpack.hybrid` — Hybrid decoding
199
+
200
+ Hybrid decoding separates reasoning from answering across two model variants: the base model generates the reasoning block freely (without fine-tuning influence), and the fine-tuned adapter generates the final answer conditioned on that reasoning. This can improve answer quality when the adapter has partially collapsed.
201
+
202
+ Requires vLLM with `enable_lora=True`.
203
+
204
+ ```python
205
+ from thinkpack import hybrid_generate, SimplePrefix
206
+
207
+ # steered_prompts = prompts already ending with an open reasoning tag (from steer())
208
+ results = thinkpack.hybrid_generate(
209
+ prompts=steered_prompts,
210
+ llm=llm, # vLLM LLM loaded with enable_lora=True
211
+ lora_request=lora_request, # adapter used for phase 2
212
+ sampling_params=sampling_params,
213
+ )
214
+
215
+ for r in results:
216
+ r.reasoning # str — reasoning produced by the base model
217
+ r.answer # str — answer produced by the fine-tuned model
218
+ r.raw # str — full combined string for convenience
219
+ ```
220
+
221
+ ---
222
+
223
+ ## *development*
224
+
225
+ Clone the repository code:
226
+
227
+ ```shell
228
+ git clone https://github.com/itsluketwist/thinkpack.git
229
+ ```
230
+
231
+ We use [`uv`](https://astral.sh/blog/uv) for project management.
232
+ Once cloned, create a virtual environment and install the project with dev dependencies:
233
+
234
+ ```shell
235
+ python -m venv .venv
236
+
237
+ . .venv/bin/activate
238
+
239
+ pip install uv
240
+
241
+ uv sync
242
+ ```
243
+
244
+ Use `make` commands to lint and test:
245
+
246
+ ```shell
247
+ make lint
248
+
249
+ make test
250
+ ```
251
+
252
+ Use `uv` to add new dependencies into the project:
253
+
254
+ ```shell
255
+ uv add transformers
256
+ ```
257
+
258
+ Or to upgrade dependencies:
259
+
260
+ ```shell
261
+ uv sync --upgrade
262
+ ```
263
+
264
+ Check typings with `ty`:
265
+
266
+ ```shell
267
+ uv run --extra dev ty check src tests
268
+ ```
@@ -0,0 +1,13 @@
1
+ thinkpack/__init__.py,sha256=m1rj52BolZLTUE-pgPzU_uLDEgalGFMuyXieJE81VNk,818
2
+ thinkpack/_model.py,sha256=4gMvsCK4uYWlGM0Hr1OFO6X5Yp1EgzMQ2lCGDMYc2G4,4790
3
+ thinkpack/_tags.py,sha256=nApuOPnMyOhbtMy-zyxuJJL5ZamHMYsWqy3oElFYyPY,404
4
+ thinkpack/distill.py,sha256=7IQ784aX-sRF6M-FSSbtJmzxe2XP1yFxwVLRpLEFJa4,5285
5
+ thinkpack/hybrid.py,sha256=jivvmliUEPj9KsA1aTgmh0tNtHcJaRsX7yLoz3oFxM0,4511
6
+ thinkpack/mask.py,sha256=2naW16nclNBUHX1OCdStersraU-jJ1CwmSsF6lGAO50,8972
7
+ thinkpack/parse.py,sha256=14PVAkyqjglfGsbpklcqY0A4toZc5bJ5LuzvpNHpIwg,6314
8
+ thinkpack/steer.py,sha256=p1QdHcksiilMsvO87ruUjhIF0iStBNv1P8muqOhURU0,5247
9
+ thinkpack-0.0.2.dist-info/licenses/LICENSE,sha256=ywssDcJhpfEdT6kAZ2cLvnn79hg5D68z2Q6wfuiMkIo,1067
10
+ thinkpack-0.0.2.dist-info/METADATA,sha256=8xRFTU9_EMS-4Q_dhOfUFJsOzj6S-QDym8z2jvf7818,9553
11
+ thinkpack-0.0.2.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
12
+ thinkpack-0.0.2.dist-info/top_level.txt,sha256=GuOf1CxzlEiRGloTGSo1td4qHlXoUAdJZd3i7GfBITM,10
13
+ thinkpack-0.0.2.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: setuptools (82.0.1)
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
5
+
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2023 Luke Twist
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1 @@
1
+ thinkpack