PyPI - thinkpack - Versions diffs - 0.0.2__py3-none-any.whl - Mend

thinkpack 0.0.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

thinkpack/__init__.py +29 -0
thinkpack/_model.py +124 -0
thinkpack/_tags.py +16 -0
thinkpack/distill.py +166 -0
thinkpack/hybrid.py +135 -0
thinkpack/mask.py +257 -0
thinkpack/parse.py +173 -0
thinkpack/steer.py +138 -0
thinkpack-0.0.2.dist-info/METADATA +268 -0
thinkpack-0.0.2.dist-info/RECORD +13 -0
thinkpack-0.0.2.dist-info/WHEEL +5 -0
thinkpack-0.0.2.dist-info/licenses/LICENSE +21 -0
thinkpack-0.0.2.dist-info/top_level.txt +1 -0

thinkpack/__init__.py ADDED Viewed

@@ -0,0 +1,29 @@
+"""ThinkPack — tools for preventing think collapse in reasoning language models."""
+from thinkpack._model import ModelInfo, TemplateStyle, detect_model
+from thinkpack.distill import build_prompts, extract_reasoning, update_records
+from thinkpack.hybrid import HybridResult, hybrid_generate
+from thinkpack.mask import Mask, mask
+from thinkpack.parse import ParsedResponse, parse, parse_all, parse_output
+from thinkpack.steer import SimplePrefix, apply_steer_template, steer
+__all__ = [
+    "ModelInfo",
+    "TemplateStyle",
+    "detect_model",
+    "build_prompts",
+    "extract_reasoning",
+    "update_records",
+    "HybridResult",
+    "hybrid_generate",
+    "Mask",
+    "mask",
+    "ParsedResponse",
+    "parse",
+    "parse_all",
+    "parse_output",
+    "SimplePrefix",
+    "apply_steer_template",
+    "steer",
+]

thinkpack/_model.py ADDED Viewed

@@ -0,0 +1,124 @@
+"""Model template style detection from tokenizer chat templates."""
+import re
+from dataclasses import dataclass
+from enum import StrEnum
+class TemplateStyle(StrEnum):
+    """
+    How a model's chat template handles reasoning blocks.
+    INLINE   — standard: the model outputs <think>content</think> inline in its response.
+               No special template support; tags are injected and parsed as plain text.
+    NATIVE   — the template has a dedicated reasoning_content field (e.g. Qwen3).
+               Reasoning is passed separately when building messages and rendered
+               inside the think block by the template itself.
+    PREFIXED — the template auto-injects an opening reasoning tag at the end of the
+               generation prompt. The model's decoded output begins mid-reasoning
+               (no opening tag visible), and always ends with a closing tag.
+    """
+    INLINE = "inline"
+    NATIVE = "native"
+    PREFIXED = "prefixed"
+@dataclass
+class ModelInfo:
+    """Detected template style for a model's reasoning block handling.
+    Detected once via detect_model() and used internally by mask() and steer()
+    to handle model-specific formatting without exposing flags to the user.
+    """
+    style: TemplateStyle
+    # the opening tag used by this model, e.g. "<think>", "<reasoning>", "<thought>"
+    open_tag: str
+# default opening tag used when the model has no known preference
+_DEFAULT_OPEN_TAG = "<think>"
+# sentinel injected into a test message to detect native reasoning_content support —
+# if it appears in the rendered output, the template handles reasoning natively
+_NATIVE_SENTINEL = "__thinkpack_detect__"
+# matches any xml-like opening tag at the end of a string, e.g. <think>, <reasoning>
+_TRAILING_TAG = re.compile(r"<([a-zA-Z][a-zA-Z0-9_]*)>\s*$")
+# cache keyed on the chat_template string — the template fully determines detection,
+# and is stable for the lifetime of any real tokenizer instance
+_cache: dict[str, ModelInfo] = {}
+def detect_model(tokenizer: object) -> ModelInfo:
+    """
+    Detect how a tokenizer handles reasoning blocks from its chat template.
+    Checks for native reasoning_content support (NATIVE), a generation prompt
+    that auto-injects an opening reasoning tag (PREFIXED), or neither (INLINE).
+    Detection is fully behaviour-based — no template source scanning.
+    Returns a ModelInfo with the detected TemplateStyle and open_tag.
+    """
+    template = getattr(tokenizer, "chat_template", "") or ""
+    if cached := _cache.get(template):
+        return cached
+    # test for native reasoning_content support by rendering an assistant message
+    # with a sentinel value — if the sentinel appears in output, the template
+    # handles reasoning as a dedicated field rather than inline tags (e.g. Qwen3)
+    try:
+        out = tokenizer.apply_chat_template(  # type: ignore
+            [
+                {"role": "user", "content": ""},
+                {
+                    "role": "assistant",
+                    "content": "",
+                    "reasoning_content": _NATIVE_SENTINEL,
+                },
+            ],
+            tokenize=False,
+            add_generation_prompt=False,
+        )
+        if isinstance(out, list):
+            out = tokenizer.decode(out)  # type: ignore
+        if _NATIVE_SENTINEL in out:
+            # extract the actual tag the template wraps reasoning in, e.g. <think>
+            tag_match = re.search(
+                r"<([a-zA-Z][a-zA-Z0-9_]*)>[^<]*" + re.escape(_NATIVE_SENTINEL),
+                out,
+            )
+            native_tag = f"<{tag_match.group(1)}>" if tag_match else _DEFAULT_OPEN_TAG
+            result = ModelInfo(style=TemplateStyle.NATIVE, open_tag=native_tag)
+            _cache[template] = result
+            return result
+    except Exception:
+        pass  # template doesn't support this message structure — move on
+    # apply with add_generation_prompt=True and check if any xml-like opening tag
+    # was appended — if so, this is a PREFIXED model and we capture the tag name
+    gen_prompt = tokenizer.apply_chat_template(  # type: ignore
+        [{"role": "user", "content": ""}],
+        tokenize=False,
+        add_generation_prompt=True,
+    )
+    if isinstance(gen_prompt, list):
+        # some tokenizers return token ids despite tokenize=False — decode them
+        gen_prompt = tokenizer.decode(gen_prompt)  # type: ignore
+    match = _TRAILING_TAG.search(gen_prompt)
+    if match:
+        result = ModelInfo(
+            style=TemplateStyle.PREFIXED,
+            open_tag=f"<{match.group(1)}>",
+        )
+    else:
+        result = ModelInfo(
+            style=TemplateStyle.INLINE,
+            open_tag=_DEFAULT_OPEN_TAG,
+        )
+    _cache[template] = result
+    return result

thinkpack/_tags.py ADDED Viewed

@@ -0,0 +1,16 @@
+"""Shared regex patterns for reasoning block tags."""
+import re
+# matches any opening reasoning tag, e.g. <think>, <thinking>, <reasoning>, <thought>
+OPEN_TAG = re.compile(
+    r"<(think|thinking|reasoning|thought)>",
+    re.IGNORECASE,
+)
+# matches any closing reasoning tag, e.g. </think>, </thinking>, etc.
+CLOSE_TAG = re.compile(
+    r"</(think|thinking|reasoning|thought)>",
+    re.IGNORECASE,
+)

thinkpack/distill.py ADDED Viewed

@@ -0,0 +1,166 @@
+"""Distillation utilities for constructing reasoning prompts and extracting reasoning traces."""
+import re
+from typing import overload
+from thinkpack.parse import parse
+# default preamble used when none is provided — presents the task as a
+# backwards explanation: given the answer, produce the reasoning that leads to it
+_DEFAULT_PREAMBLE = (
+    "Given the following question and its correct answer, "
+    "produce a step-by-step reasoning trace that "
+    "explains how to arrive at the answer."
+)
+def build_prompts(
+    records: list[dict[str, str]],
+    instruction_key: str = "instruction",
+    response_key: str = "response",
+    tag: str = "reasoning_trace",
+    preamble: str = _DEFAULT_PREAMBLE,
+    example: str | None = None,
+) -> list[str]:
+    """
+    Build construct-mode distillation prompts from a list of records.
+    Each prompt presents the question and correct answer, asking the model
+    to produce a reasoning trace inside the specified tag. The closing tag
+    should be configured as a stop token so the model stops after reasoning.
+    Returns a list of prompt strings, one per record.
+    """
+    prompts = []
+    for record in records:
+        instruction = record[instruction_key]
+        response = record[response_key]
+        # build the example block only if one was provided
+        if example is not None:
+            example_block = f"Here is an example:\n<{tag}>\n{example}\n</{tag}>\n\n"
+        else:
+            example_block = ""
+        prompt = (
+            f"{preamble}\n\n"
+            f"Question: {instruction}\n\n"
+            f"Answer: {response}\n\n"
+            f"{example_block}"
+            f"Provide your reasoning inside <{tag}> tags."
+        )
+        prompts.append(prompt)
+    return prompts
+@overload
+def extract_reasoning(
+    text: str,
+    tag: str | None = ...,
+    prefixed: bool = ...,
+    strip_think: bool = ...,
+) -> str | None: ...
+@overload
+def extract_reasoning(
+    text: list[str],
+    tag: str | None = ...,
+    prefixed: bool = ...,
+    strip_think: bool = ...,
+) -> list[str | None]: ...
+def extract_reasoning(
+    text: str | list[str],
+    tag: str | None = None,
+    prefixed: bool = False,
+    strip_think: bool = True,
+) -> str | None | list[str | None]:
+    """
+    Extract a reasoning trace from a model response or a list of responses.
+    Accepts a single string or a list; the return type matches the input.
+    Delegates to parse() for standard think/reasoning/thought tags, including
+    the truncated case where the closing tag is a stop token.
+    For custom tags (e.g. "reasoning_trace"), finds the opening tag and takes
+    everything after it — the closing tag is assumed to be a stop token and
+    absent from the output.
+    Returns the extracted reasoning string (or None if not found / blank) for
+    a single input, or a list of the same for a list input.
+    """
+    if isinstance(text, list):
+        return [
+            extract_reasoning(  # type: ignore[misc]
+                text=t,
+                tag=tag,
+                prefixed=prefixed,
+                strip_think=strip_think,
+            )
+            for t in text
+        ]
+    if tag is None:
+        # delegate to parse() which handles all standard reasoning tags and
+        # the truncated case (open tag, no close tag = stop token scenario)
+        parsed = parse(response=text, prefixed=prefixed)
+        content = parsed.reasoning.strip()
+        return content if content else None
+    # custom tag mode: the closing tag is a stop token and never present,
+    # so find the opening tag and take everything after it
+    if strip_think:
+        # strip any standard think block first (its </think> is NOT a stop
+        # token in this mode, so it will appear in the output)
+        parsed = parse(response=text, prefixed=prefixed)
+        search_text = parsed.answer
+    else:
+        search_text = text
+    open_tag_re = re.compile(rf"<{re.escape(tag)}>", re.IGNORECASE)
+    match = open_tag_re.search(search_text)
+    if match is None:
+        return None
+    content = search_text[match.end() :].strip()
+    return content if content else None
+def update_records(
+    records: list[dict[str, str]],
+    responses: list[str],
+    field: str = "reasoning_constructed",
+    tag: str | None = None,
+    prefixed: bool = False,
+    strip_think: bool = True,
+) -> list[dict[str, str]]:
+    """
+    Add extracted reasoning traces into a list of records.
+    Calls extract_reasoning on each response and writes the result into the
+    corresponding record under field. Only adds the field where extraction
+    succeeded; records where extraction returns None are returned unchanged.
+    Original records are not mutated.
+    Returns a new list of record dicts with the reasoning field added where available.
+    """
+    # extract from all responses in one call (list path)
+    extractions: list[str | None] = extract_reasoning(
+        text=responses,
+        tag=tag,
+        prefixed=prefixed,
+        strip_think=strip_think,
+    )
+    updated = []
+    for record, reasoning in zip(records, extractions, strict=True):
+        new_record = {**record}
+        if reasoning is not None:
+            new_record[field] = reasoning
+        updated.append(new_record)
+    return updated

thinkpack/hybrid.py ADDED Viewed

@@ -0,0 +1,135 @@
+"""
+Hybrid decoding: base model generates reasoning, fine-tuned adapter generates the answer.
+Requires vLLM. The LLM must be loaded with enable_lora=True so the adapter
+can be toggled between the two generation phases without reloading the model.
+"""
+from dataclasses import dataclass
+from typing import Any, Protocol
+from thinkpack.parse import parse
+class _LLM(Protocol):
+    """Minimal protocol for a vLLM-compatible LLM instance."""
+    def generate(
+        self,
+        prompts: list[str],
+        sampling_params: Any,
+        lora_request: Any = None,
+    ) -> list[Any]: ...
+@dataclass
+class HybridResult:
+    """Output from a single hybrid decoding generation.
+    Contains the reasoning produced by the base model, the answer produced
+    by the fine-tuned model, and a combined raw string for convenience.
+    """
+    # reasoning block content, generated by the base model
+    reasoning: str
+    # answer text, generated by the fine-tuned model
+    answer: str
+    # full combined string reconstructed as "<tag>reasoning</tag>\nanswer"
+    raw: str
+def _build_reasoning_prefix(reasoning: str, tag: str) -> str:
+    """Wrap a reasoning string in its original tag to form a closed block.
+    Used to construct the phase 2 prompt prefix so the fine-tuned model
+    receives the full reasoning block before generating its answer.
+    Returns a string of the form "<tag>\\nreasoning\\n</tag>\\n".
+    """
+    return f"<{tag}>\n{reasoning}\n</{tag}>\n"
+def hybrid_generate(
+    prompts: list[str],
+    llm: _LLM,
+    lora_request: Any,
+    sampling_params: Any,
+    prefixed: bool = False,
+) -> list[HybridResult]:
+    """Run hybrid decoding: base model reasons, fine-tuned model answers.
+    Two sequential generation passes over the same loaded vLLM model:
+    Phase 1 — reasoning (no adapter):
+        Generates with lora_request=None so the base model produces a
+        reasoning block freely, without fine-tuning influence.
+    Phase 2 — answer (with adapter):
+        Prepends the reasoning block from phase 1 to each prompt, then
+        generates with the provided lora_request so the fine-tuned model
+        produces the final answer conditioned on that reasoning.
+    The LLM must be loaded with enable_lora=True to support toggling the
+    adapter between phases. Pass the same SamplingParams to both phases;
+    for different per-phase params, use steer() and parse() directly.
+    Set prefixed=True for PREFIXED template models (e.g. OLMo-3) whose
+    generation prompt already ends with the opening reasoning tag — used
+    when parsing the phase 1 outputs.
+    Returns a list of HybridResult, one per prompt.
+    """
+    # phase 1: base model generates reasoning (no adapter)
+    phase1_outputs = llm.generate(
+        prompts=prompts,
+        sampling_params=sampling_params,
+        lora_request=None,
+    )
+    # parse reasoning from each phase 1 output (first sample only)
+    phase1_parsed = [
+        parse(
+            response=output.outputs[0].text,  # type: ignore[union-attr]
+            prefixed=prefixed,
+        )
+        for output in phase1_outputs
+    ]
+    # build phase 2 prompts: original prompt + closed reasoning block
+    # use the detected tag from phase 1, falling back to "think" if not found
+    phase2_prompts = []
+    for prompt, p1 in zip(prompts, phase1_parsed):
+        if p1.has_valid_reasoning:
+            tag = p1.reasoning_tag or "think"
+            prefix = _build_reasoning_prefix(reasoning=p1.reasoning, tag=tag)
+            phase2_prompts.append(prompt + prefix)
+        else:
+            # no usable reasoning — phase 2 proceeds without a prefix
+            phase2_prompts.append(prompt)
+    # phase 2: fine-tuned model generates answer (with adapter)
+    phase2_outputs = llm.generate(
+        prompts=phase2_prompts,
+        sampling_params=sampling_params,
+        lora_request=lora_request,
+    )
+    # combine reasoning and answer into HybridResult objects
+    results = []
+    for p1, output in zip(phase1_parsed, phase2_outputs):
+        answer = output.outputs[0].text.strip()  # type: ignore[union-attr]
+        tag = p1.reasoning_tag or "think"
+        raw = (
+            _build_reasoning_prefix(reasoning=p1.reasoning, tag=tag) + answer
+            if p1.has_valid_reasoning
+            else answer
+        )
+        results.append(
+            HybridResult(
+                reasoning=p1.reasoning,
+                answer=answer,
+                raw=raw,
+            )
+        )
+    return results

thinkpack/mask.py ADDED Viewed

@@ -0,0 +1,257 @@
+"""Training-time loss masking for reasoning blocks."""
+import re
+from enum import IntFlag
+from datasets import Dataset
+from thinkpack._model import TemplateStyle, detect_model
+# pytorch's CrossEntropyLoss uses ignore_index=-100 by default, and all major
+# training frameworks (transformers Trainer, trl SFTTrainer, unsloth) inherit
+# this default — so -100 is the correct value unless the trainer is configured
+# otherwise. exposed as a parameter on mask() for the rare case where it differs.
+_DEFAULT_IGNORE_INDEX = -100
+class Mask(IntFlag):
+    """
+    Sections of the training sequence to mask from the loss.
+    Combine sections with | to mask multiple parts at once:
+        Mask.THINK              — mask only the think block (most common)
+        Mask.PROMPT | Mask.THINK — mask prompt and think (train on response only)
+    PROMPT covers the user instruction. THINK covers the full reasoning block
+    including its opening and closing tags. RESPONSE covers the model's answer.
+    Masking RESPONSE is unusual (nothing useful remains to train on) but valid.
+    """
+    PROMPT = 1
+    THINK = 2
+    RESPONSE = 4
+def _build_assistant_message(
+    record: dict[str, str],
+    style: TemplateStyle,
+    open_tag: str,
+) -> dict[str, str]:
+    """
+    Build the assistant message dict from a training record.
+    For NATIVE templates, passes reasoning as a separate reasoning_content field.
+    For INLINE and PREFIXED templates, wraps reasoning in inline reasoning tags.
+    The presence of a "reasoning" key (even if empty) controls whether the think
+    block appears in the sequence — required when masking so the model sees the
+    same context at training time as at inference time.
+    Returns an assistant message dict ready to pass to apply_chat_template.
+    """
+    # use None sentinel to distinguish "key absent" from "key present but empty"
+    reasoning_raw = record.get("reasoning", None)
+    response = record["response"]
+    message: dict[str, str] = {"role": "assistant"}
+    if reasoning_raw is not None and style == TemplateStyle.NATIVE:
+        # template natively handles reasoning via a dedicated field (e.g. Qwen3)
+        message["content"] = response
+        message["reasoning_content"] = reasoning_raw.strip()
+    elif reasoning_raw is not None:
+        # derive the closing tag from the opening tag, e.g. <think> -> </think>
+        close_tag = open_tag.replace("<", "</", 1)
+        reasoning = reasoning_raw.strip()
+        message["content"] = f"{open_tag}\n{reasoning}\n{close_tag}\n{response}"
+    else:
+        message["content"] = response
+    return message
+def _tokenize_prefix(
+    tokenizer: object,
+    text: str,
+    max_seq_length: int,
+) -> int:
+    """
+    Tokenize a text prefix and return its token count.
+    Used to locate section boundaries within the full token sequence by
+    tokenizing the text up to a known character position.
+    Returns the number of tokens in the prefix.
+    """
+    return len(
+        tokenizer.encode(  # type: ignore
+            text,
+            add_special_tokens=False,
+            truncation=True,
+            max_length=max_seq_length,
+        )
+    )
+def _tokenize_record(
+    record: dict[str, str],
+    tokenizer: object,
+    style: TemplateStyle,
+    open_tag: str,
+    max_seq_length: int,
+    masked: Mask,
+    ignore_index: int,
+) -> dict[str, list[int]]:
+    """
+    Tokenize a single training record and apply label masking.
+    Locates the PROMPT / THINK / RESPONSE boundaries in the token sequence by
+    tokenizing text prefixes (rather than using add_generation_prompt=True). This
+    avoids a subtle issue with PREFIXED templates: the generation prompt already
+    ends with <think>, so using it as a prefix boundary would leave the opening
+    tag trainable while masking the closing tag — teaching the model to "open but
+    never close" the reasoning block.
+    Each section flagged in `masked` has its labels set to _IGNORE_INDEX so
+    PyTorch's cross-entropy ignores those tokens during loss computation.
+    Returns a dict with input_ids, labels, and attention_mask.
+    """
+    messages = [
+        {"role": "user", "content": record["instruction"]},
+        _build_assistant_message(
+            record=record,
+            style=style,
+            open_tag=open_tag,
+        ),
+    ]
+    full_text = tokenizer.apply_chat_template(  # type: ignore
+        messages,
+        tokenize=False,
+        add_generation_prompt=False,
+    )
+    input_ids = tokenizer.encode(  # type: ignore
+        full_text,
+        add_special_tokens=False,
+        truncation=True,
+        max_length=max_seq_length,
+    )
+    # default: all tokens contribute to the loss
+    labels = list(input_ids)
+    if not masked:
+        # no sections to mask — return labels unchanged
+        return {
+            "input_ids": input_ids,
+            "labels": labels,
+            "attention_mask": [1] * len(input_ids),
+        }
+    # find the opening reasoning tag to locate the think block boundary
+    open_match = re.search(re.escape(open_tag), full_text)
+    think_start = (
+        _tokenize_prefix(
+            tokenizer=tokenizer,
+            text=full_text[: open_match.start()],
+            max_seq_length=max_seq_length,
+        )
+        if open_match is not None
+        else None  # no think block present in this record
+    )
+    # locate the response boundary (rfind to handle response text in the instruction)
+    response_start_char = full_text.rfind(record["response"])
+    response_start = _tokenize_prefix(
+        tokenizer=tokenizer,
+        text=full_text[:response_start_char],
+        max_seq_length=max_seq_length,
+    )
+    # mask each requested section independently
+    if Mask.PROMPT in masked:
+        # mask everything from the start up to the think block (or response if no think)
+        prompt_end = think_start if think_start is not None else response_start
+        for i in range(prompt_end):
+            labels[i] = ignore_index
+    if Mask.THINK in masked and think_start is not None:
+        # mask the full reasoning block including its opening and closing tags
+        for i in range(think_start, response_start):
+            labels[i] = ignore_index
+    if Mask.RESPONSE in masked:
+        # mask the response tokens
+        for i in range(response_start, len(labels)):
+            labels[i] = ignore_index
+    return {
+        "input_ids": input_ids,
+        "labels": labels,
+        "attention_mask": [1] * len(input_ids),
+    }
+def mask(
+    records: list[dict[str, str]],
+    tokenizer: object,
+    masked: Mask | None = Mask.THINK,
+    max_seq_length: int = 32768,
+    ignore_index: int = _DEFAULT_IGNORE_INDEX,
+    tag: str | None = None,
+) -> Dataset:
+    """
+    Format training records into a pretokenized dataset with selected sections masked.
+    Each record must have "instruction" and "response" keys. An optional "reasoning"
+    key provides think block content — if absent when masking is applied, an empty
+    reasoning block is injected so training context matches inference time.
+    Template style (INLINE, NATIVE, PREFIXED) is detected automatically from the
+    tokenizer. Combine Mask flags with | to mask multiple sections at once (see
+    the Mask class for details). Pass masked=None to train on all tokens.
+    Returns a HuggingFace Dataset with input_ids, labels, and attention_mask columns.
+    """
+    model_info = detect_model(tokenizer=tokenizer)
+    # user-supplied tag overrides the detected default (useful for INLINE models
+    # whose tag differs from the <think> default, e.g. <reasoning>)
+    open_tag = f"<{tag}>" if tag is not None else model_info.open_tag
+    # normalise None to an empty Mask so downstream logic is consistent
+    effective_masked = masked if masked is not None else Mask(0)
+    # when masking is active, inject an empty "reasoning" key for records that lack one
+    # so the think block appears in the sequence — required for training/inference context
+    # alignment on PREFIXED models that always emit think blocks at inference time
+    if effective_masked:
+        records = [
+            record if "reasoning" in record else {**record, "reasoning": ""}
+            for record in records
+        ]
+    all_input_ids = []
+    all_labels = []
+    all_attention_mask = []
+    for record in records:
+        result = _tokenize_record(
+            record=record,
+            tokenizer=tokenizer,
+            style=model_info.style,
+            open_tag=open_tag,
+            max_seq_length=max_seq_length,
+            masked=effective_masked,
+            ignore_index=ignore_index,
+        )
+        all_input_ids.append(result["input_ids"])
+        all_labels.append(result["labels"])
+        all_attention_mask.append(result["attention_mask"])
+    return Dataset.from_dict(
+        {
+            "input_ids": all_input_ids,
+            "labels": all_labels,
+            "attention_mask": all_attention_mask,
+        }
+    )

thinkpack/parse.py ADDED Viewed

@@ -0,0 +1,173 @@
+"""Parsing of model responses into reasoning and answer components."""
+import re
+from dataclasses import dataclass
+from typing import cast
+from thinkpack._tags import CLOSE_TAG as _REASONING_CLOSE_TAG
+from thinkpack._tags import OPEN_TAG as _REASONING_OPEN_TAG
+@dataclass
+class ParsedResponse:
+    """
+    A model response split into reasoning and answer components.
+    answer and reasoning contain the extracted text; the boolean flags
+    describe the structure of the reasoning block at a glance.
+    """
+    # the model's final answer — text after the closing reasoning tag, or the
+    # full response if no reasoning block was found
+    answer: str
+    # content inside the reasoning block; empty string if no block was present
+    reasoning: str
+    # lowercase tag name used, e.g. "think", "reasoning" (None if no tag found)
+    reasoning_tag: str | None
+    # true if any reasoning block structure is present, even if blank or truncated
+    has_reasoning_block: bool
+    # true if the reasoning block was completed and non-blank
+    has_valid_reasoning: bool
+    # true if an opening tag was found but the model never produced a closing tag
+    has_truncated_reasoning: bool
+def parse(
+    response: str,
+    prefixed: bool = False,
+    tag: str | None = None,
+) -> ParsedResponse:
+    """Parse a single model response into reasoning and answer components.
+    Handles four formats:
+    - standard:           <think>content</think>answer
+    - prefixed:           content</think>answer  (opening tag injected by chat template)
+    - truncated standard: <think>content...      (open tag, no close tag)
+    - truncated prefixed: content...             (no tags; only detectable with prefixed=True)
+    Returns a ParsedResponse with the split content and status flags.
+    """
+    # compile tag-specific patterns if the caller has specified an exact tag name,
+    # otherwise fall back to the shared patterns that match all known variants
+    if tag is not None:
+        escaped = re.escape(tag)
+        open_re = re.compile(rf"<({escaped})>", re.IGNORECASE)
+        close_re = re.compile(rf"</({escaped})>", re.IGNORECASE)
+    else:
+        open_re = _REASONING_OPEN_TAG
+        close_re = _REASONING_CLOSE_TAG
+    close_match = close_re.search(response)
+    if close_match:
+        tag_name = close_match.group(1).lower()
+        # extract everything before the closing tag, then strip any open tag
+        # (prefixed template responses have no opening tag in decoded output)
+        before_close = response[: close_match.start()]
+        thinking = open_re.sub("", before_close, count=1)
+        answer = response[close_match.end() :].strip()
+        has_valid_reasoning = bool(thinking.strip())
+        return ParsedResponse(
+            answer=answer,
+            reasoning=thinking,
+            reasoning_tag=tag_name,
+            has_reasoning_block=True,
+            has_valid_reasoning=has_valid_reasoning,
+            has_truncated_reasoning=False,
+        )
+    # no closing tag — check for a truncated reasoning block (open tag present)
+    open_match = open_re.search(response)
+    if open_match:
+        # model started reasoning but output was cut off before the close tag
+        return ParsedResponse(
+            answer="",
+            reasoning=response[open_match.end() :],
+            reasoning_tag=open_match.group(1).lower(),
+            has_reasoning_block=True,
+            has_valid_reasoning=False,
+            has_truncated_reasoning=True,
+        )
+    if prefixed:
+        # for PREFIXED template models the opening tag is injected by the chat
+        # template and never appears in decoded output — a missing close tag means
+        # the reasoning was truncated, not that there was no think block at all
+        return ParsedResponse(
+            answer="",
+            reasoning=response,
+            reasoning_tag=None,
+            has_reasoning_block=True,
+            has_valid_reasoning=False,
+            has_truncated_reasoning=True,
+        )
+    # no reasoning tags at all — plain response with no think block
+    return ParsedResponse(
+        answer=response,
+        reasoning="",
+        reasoning_tag=None,
+        has_reasoning_block=False,
+        has_valid_reasoning=False,
+        has_truncated_reasoning=False,
+    )
+def parse_all(
+    responses: list[list[str]],
+    prefixed: bool = False,
+    tag: str | None = None,
+) -> list[list[ParsedResponse]]:
+    """Parse a batch of model responses into ParsedResponse objects.
+    Accepts a nested list of shape [task][sample] and returns the same shape.
+    Pass tag to restrict matching to a single tag name (see parse() for details).
+    Returns a nested list of ParsedResponse objects matching the input shape.
+    """
+    return [
+        [parse(response=r, prefixed=prefixed, tag=tag) for r in sample_responses]
+        for sample_responses in responses
+    ]
+def parse_output(
+    output: object | list[object],
+    prefixed: bool = False,
+    tag: str | None = None,
+) -> list[ParsedResponse] | list[list[ParsedResponse]]:
+    """Parse one or more generation output objects into ParsedResponse objects.
+    Accepts either:
+    - a single output object with an .outputs attribute (e.g. a vLLM RequestOutput),
+      returning a flat list of ParsedResponse — one per sample/completion, or
+    - a list of such objects, returning a nested list of shape [task][sample].
+    Each completion object is expected to have a .text (str) attribute —
+    compatible with vLLM's RequestOutput and similar interfaces.
+    Pass tag to restrict matching to a single tag name (see parse() for details).
+    Returns a list of ParsedResponse (single output) or list[list[ParsedResponse]] (list).
+    """
+    if isinstance(output, list):
+        # list of output objects — recurse to produce a nested [task][sample] structure
+        return cast(
+            list[list[ParsedResponse]],
+            [parse_output(output=o, prefixed=prefixed, tag=tag) for o in output],
+        )
+    # single output object — parse each completion in its .outputs attribute
+    completions = output.outputs  # type: ignore
+    return [
+        parse(
+            response=completion.text,
+            prefixed=prefixed,
+            tag=tag,
+        )
+        for completion in completions
+    ]

thinkpack/steer.py ADDED Viewed

@@ -0,0 +1,138 @@
+"""Inference-time thought-steering prefix injection."""
+from enum import StrEnum
+from thinkpack._model import detect_model
+class SimplePrefix(StrEnum):
+    """
+    A small set of basic steering prefixes for common use cases.
+    These are provided as convenient starting points — pass any string to
+    steer() to use a custom prefix instead.
+    """
+    # minimal opening; lets the model continue naturally with a slight nudge
+    BRIEF = "Okay, "
+    # explicit step-by-step framing
+    STEPS = "Okay, let me think this through step by step."
+    # step-by-step framing with a reminder to stay concise and produce an answer
+    CONCISE = (
+        "Okay, let me think this through, "
+        "but I need to be concise and make sure I also provide an answer."
+    )
+def steer(
+    prompts: list[str],
+    tokenizer: object,
+    prefix: SimplePrefix | str | None = None,
+    tag: str | None = None,
+    close: bool = False,
+) -> list[str]:
+    """Inject a thought-steering prefix into chat-templated prompt strings.
+    Ensures each prompt ends with an open reasoning block, optionally seeded
+    with a prefix string to guide the model's thinking. Use SimplePrefix for
+    common presets, or pass any string for a custom prefix. Template style
+    (INLINE, NATIVE, PREFIXED) is detected automatically from the tokenizer.
+    The prompts should already be chat-templated strings (e.g. as returned by
+    tokenizer.apply_chat_template with add_generation_prompt=True).
+    When close=True, the reasoning block is closed after the prefix, producing
+    a complete <think>...</think> block. The model then generates its response
+    after the closed block. This is useful as a universal interface for injecting
+    a fixed reasoning block rather than steering an open-ended thought.
+    Returns a list of prompt strings ready to pass directly to a generation function.
+    """
+    model_info = detect_model(tokenizer=tokenizer)
+    # user-supplied tag overrides the detected default (useful for INLINE models
+    # whose tag differs from the <think> default, e.g. <reasoning>)
+    open_tag = f"<{tag}>" if tag is not None else model_info.open_tag
+    # close tag is derived by inserting "/" after the opening "<"
+    close_tag = open_tag.replace("<", "</", 1)
+    steered = []
+    for prompt in prompts:
+        # check for any recognised reasoning tag, not just <think>
+        already_open = prompt.rstrip("\n").endswith(open_tag)
+        if prefix is None:
+            if close:
+                # inject an empty reasoning block — model responds after it
+                if already_open:
+                    steered.append(prompt + close_tag + "\n")
+                else:
+                    steered.append(prompt + open_tag + "\n" + close_tag + "\n")
+            else:
+                # just ensure the prompt ends with an open reasoning tag
+                if already_open:
+                    steered.append(prompt)
+                else:
+                    steered.append(prompt + open_tag + "\n")
+        else:
+            # inject the prefix as the beginning of the thought content
+            if already_open:
+                # PREFIXED template already appended the tag — add only the body
+                if close:
+                    steered.append(
+                        prompt + "\n" + str(prefix) + "\n" + close_tag + "\n"
+                    )
+                else:
+                    steered.append(prompt + "\n" + str(prefix))
+            else:
+                if close:
+                    steered.append(
+                        prompt
+                        + open_tag
+                        + "\n"
+                        + str(prefix)
+                        + "\n"
+                        + close_tag
+                        + "\n",
+                    )
+                else:
+                    steered.append(prompt + open_tag + "\n" + str(prefix))
+    return steered
+def apply_steer_template(
+    conversations: list[list[dict[str, str]]],
+    tokenizer: object,
+    prefix: SimplePrefix | str | None = None,
+    tag: str | None = None,
+    close: bool = False,
+) -> list[str]:
+    """Apply the chat template and inject a thought-steering prefix in one step.
+    Convenience wrapper that combines tokenizer.apply_chat_template() and steer()
+    into a single call. Accepts a list of conversations (each a list of message dicts
+    with "role" and "content" keys) and returns steered prompt strings ready for
+    generation. Pass close=True to produce a complete closed reasoning block.
+    Returns a list of steered prompt strings, one per conversation.
+    """
+    # apply the chat template to each conversation
+    templated = []
+    for messages in conversations:
+        result = tokenizer.apply_chat_template(  # type: ignore
+            messages,
+            tokenize=False,
+            add_generation_prompt=True,
+        )
+        if isinstance(result, list):
+            # some tokenizers return token ids despite tokenize=False — decode them
+            result = tokenizer.decode(result)  # type: ignore
+        templated.append(result)
+    return steer(
+        prompts=templated,
+        tokenizer=tokenizer,
+        prefix=prefix,
+        tag=tag,
+        close=close,
+    )

thinkpack-0.0.2.dist-info/METADATA ADDED Viewed

@@ -0,0 +1,268 @@
+Metadata-Version: 2.4
+Name: thinkpack
+Version: 0.0.2
+Summary: Tools for preventing think collapse in reasoning language models.
+Author-email: Lukas Twist <itsluketwist@gmail.com>
+License: MIT License
+        Copyright (c) 2023 Luke Twist
+        Permission is hereby granted, free of charge, to any person obtaining a copy
+        of this software and associated documentation files (the "Software"), to deal
+        in the Software without restriction, including without limitation the rights
+        to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+        copies of the Software, and to permit persons to whom the Software is
+        furnished to do so, subject to the following conditions:
+        The above copyright notice and this permission notice shall be included in all
+        copies or substantial portions of the Software.
+        THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+        IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+        FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+        AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+        LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+        OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+        SOFTWARE.
+Project-URL: Homepage, https://github.com/itsluketwist/thinkpack
+Keywords: thinkpack,llm,reasoning,think-collapse,fine-tuning
+Classifier: License :: OSI Approved :: MIT License
+Classifier: Programming Language :: Python
+Classifier: Programming Language :: Python :: 3
+Requires-Python: >=3.11
+Description-Content-Type: text/markdown
+License-File: LICENSE
+Requires-Dist: datasets
+Requires-Dist: transformers
+Dynamic: license-file
+# ThinkPack
+![ThinkPack](assets/banner.png)
+A lightweight toolkit for working with reasoning blocks in language models — preventing think collapse via los masking, steering reasoning at inference time, and parsing model outputs.
+**Think collapse** is a failure mode where reasoning models stop using their `<think>...</think>` blocks during or after fine-tuning.
+Without intervention, the model learns to skip reasoning entirely — producing answers directly and losing the chain-of-thought behaviour it was trained on.
+ThinkPack provides three targeted tools to prevent this:
+- **Loss masking** (`thinkpack.mask`) — keeps reasoning blocks in the training context while masking them from the loss, so the model doesn't learn to skip them.
+- **Thought steering** (`thinkpack.steer`) — injects a short primer after the opening reasoning tag at inference time, nudging the model to reason before answering.
+- **Response parsing** (`thinkpack.parse`) — splits raw model output into reasoning and answer components, with flags for truncation detection.
+---
+## Installation
+```bash
+pip install thinkpack
+```
+---
+## Modules
+### `thinkpack.mask` — Training-time loss masking
+When fine-tuning a reasoning model, naively training on all tokens can cause the model to learn to skip its reasoning block entirely. `mask()` formats your training records into a pretokenized HuggingFace dataset with selected parts of the sequence excluded from the loss.
+```python
+import thinkpack
+dataset = thinkpack.mask(
+    records=records,    # list of dicts with "instruction" and "response" keys
+    tokenizer=tokenizer,
+    masked=thinkpack.Mask.THINK,  # mask only the think block (default)
+)
+```
+The `masked` parameter is a composable flag — combine sections with `|`:
+| Value | Effect |
+|---|---|
+| `Mask.THINK` | Think block hidden from loss; model trains on prompt + response |
+| `Mask.PROMPT \| Mask.THINK` | Train on response only |
+| `None` | No masking; all tokens contribute to the loss |
+Model-specific template handling (Qwen3's native `reasoning_content` field, OLMo-3's auto-injected opening tag) is detected automatically from the tokenizer — no manual configuration needed.
+See [examples/training.py](examples/training.py) for a complete training loop.
+---
+### `thinkpack.steer` — Inference-time thought steering
+Think collapse can also be addressed at inference time by injecting a short prefix after the opening reasoning tag, seeding the model's reasoning before it generates its own thought content.
+```python
+# ensure the opening reasoning tag is present without seeding the thought
+steered_prompts = thinkpack.steer(
+    prompts=templated_prompts,  # already chat-templated strings
+    tokenizer=tokenizer,
+)
+# seed the model's thought with a preset
+steered_prompts = thinkpack.steer(
+    prompts=templated_prompts,
+    tokenizer=tokenizer,
+    prefix=thinkpack.SimplePrefix.CONCISE,
+)
+# or pass any custom string
+steered_prompts = thinkpack.steer(
+    prompts=templated_prompts,
+    tokenizer=tokenizer,
+    prefix="Okay, this is a tricky one. Let me consider each part carefully.",
+)
+```
+`SimplePrefix` provides a few basic presets:
+| Preset | Text |
+|---|---|
+| `BRIEF` | `"Okay, "` |
+| `STEPS` | `"Okay, let me think this through step by step."` |
+| `CONCISE` | `"Okay, let me think this through, but I need to be concise and make sure I also provide an answer."` |
+`steer()` handles the PREFIXED template quirk automatically: models like OLMo-3 whose chat template already ends with an opening reasoning tag do not get a duplicate tag injected.
+See [examples/inference.py](examples/inference.py) for a complete inference loop.
+---
+### `thinkpack.parse` — Response parsing
+Parse raw model outputs into structured components — useful for evaluation, analysis, and hybrid decoding pipelines.
+```python
+# single response
+parsed = thinkpack.parse(response=raw_text)
+parsed.answer                   # str — text after the closing reasoning tag
+parsed.reasoning                # str — content of the reasoning block
+parsed.has_valid_reasoning      # bool — non-empty, completed reasoning block
+parsed.has_truncated_reasoning  # bool — reasoning block started but never closed
+# directly from vLLM output objects (single output → list, list of outputs → list[list])
+parsed = thinkpack.parse_output(output=outputs)
+```
+Handles all four output formats:
+| Format | Example |
+|---|---|
+| Standard | `<think>reasoning</think>answer` |
+| Prefixed template | `reasoning</think>answer` (opening tag injected by template) |
+| Truncated standard | `<think>reasoning...` (no closing tag) |
+| Truncated prefixed | `reasoning...` (pass `prefixed=True`) |
+Recognises tag variants: `think`, `thinking`, `reasoning`, `thought` (case-insensitive).
+---
+### `thinkpack.distill` — Distillation prompt building and reasoning extraction
+When training data lacks reasoning traces, `distill` helps construct them. It builds prompts that ask a teacher model to produce a reasoning trace given a question and its known answer, then extracts and writes those traces back into your records.
+```python
+import thinkpack
+# build prompts for a teacher model to generate reasoning traces
+prompts = thinkpack.build_prompts(
+    records=records,  # list of dicts with "instruction" and "response" keys
+)
+# after generating responses from the teacher model, extract the traces
+traces = thinkpack.extract_reasoning(text=responses, tag="reasoning_trace")
+# or write traces back into records in one step
+records = thinkpack.update_records(
+    records=records,
+    responses=responses,
+    field="reasoning",  # key to write extracted traces into
+)
+```
+`extract_reasoning` accepts a single string or a list, and returns `None` where extraction fails (blank or no tag found):
+```python
+# single response — returns str | None
+trace = thinkpack.extract_reasoning(text=response)
+# list of responses — returns list[str | None]
+traces = thinkpack.extract_reasoning(text=responses)
+```
+---
+### `thinkpack.hybrid` — Hybrid decoding
+Hybrid decoding separates reasoning from answering across two model variants: the base model generates the reasoning block freely (without fine-tuning influence), and the fine-tuned adapter generates the final answer conditioned on that reasoning. This can improve answer quality when the adapter has partially collapsed.
+Requires vLLM with `enable_lora=True`.
+```python
+from thinkpack import hybrid_generate, SimplePrefix
+# steered_prompts = prompts already ending with an open reasoning tag (from steer())
+results = thinkpack.hybrid_generate(
+    prompts=steered_prompts,
+    llm=llm,                        # vLLM LLM loaded with enable_lora=True
+    lora_request=lora_request,      # adapter used for phase 2
+    sampling_params=sampling_params,
+)
+for r in results:
+    r.reasoning  # str — reasoning produced by the base model
+    r.answer     # str — answer produced by the fine-tuned model
+    r.raw        # str — full combined string for convenience
+```
+---
+## *development*
+Clone the repository code:
+```shell
+git clone https://github.com/itsluketwist/thinkpack.git
+```
+We use [`uv`](https://astral.sh/blog/uv) for project management.
+Once cloned, create a virtual environment and install the project with dev dependencies:
+```shell
+python -m venv .venv
+. .venv/bin/activate
+pip install uv
+uv sync
+```
+Use `make` commands to lint and test:
+```shell
+make lint
+make test
+```
+Use `uv` to add new dependencies into the project:
+```shell
+uv add transformers
+```
+Or to upgrade dependencies:
+```shell
+uv sync --upgrade
+```
+Check typings with `ty`:
+```shell
+uv run --extra dev ty check src tests
+```

thinkpack-0.0.2.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,13 @@
+thinkpack/__init__.py,sha256=m1rj52BolZLTUE-pgPzU_uLDEgalGFMuyXieJE81VNk,818
+thinkpack/_model.py,sha256=4gMvsCK4uYWlGM0Hr1OFO6X5Yp1EgzMQ2lCGDMYc2G4,4790
+thinkpack/_tags.py,sha256=nApuOPnMyOhbtMy-zyxuJJL5ZamHMYsWqy3oElFYyPY,404
+thinkpack/distill.py,sha256=7IQ784aX-sRF6M-FSSbtJmzxe2XP1yFxwVLRpLEFJa4,5285
+thinkpack/hybrid.py,sha256=jivvmliUEPj9KsA1aTgmh0tNtHcJaRsX7yLoz3oFxM0,4511
+thinkpack/mask.py,sha256=2naW16nclNBUHX1OCdStersraU-jJ1CwmSsF6lGAO50,8972
+thinkpack/parse.py,sha256=14PVAkyqjglfGsbpklcqY0A4toZc5bJ5LuzvpNHpIwg,6314
+thinkpack/steer.py,sha256=p1QdHcksiilMsvO87ruUjhIF0iStBNv1P8muqOhURU0,5247
+thinkpack-0.0.2.dist-info/licenses/LICENSE,sha256=ywssDcJhpfEdT6kAZ2cLvnn79hg5D68z2Q6wfuiMkIo,1067
+thinkpack-0.0.2.dist-info/METADATA,sha256=8xRFTU9_EMS-4Q_dhOfUFJsOzj6S-QDym8z2jvf7818,9553
+thinkpack-0.0.2.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
+thinkpack-0.0.2.dist-info/top_level.txt,sha256=GuOf1CxzlEiRGloTGSo1td4qHlXoUAdJZd3i7GfBITM,10
+thinkpack-0.0.2.dist-info/RECORD,,

thinkpack-0.0.2.dist-info/WHEEL ADDED Viewed

@@ -0,0 +1,5 @@
+Wheel-Version: 1.0
+Generator: setuptools (82.0.1)
+Root-Is-Purelib: true
+Tag: py3-none-any

thinkpack-0.0.2.dist-info/licenses/LICENSE ADDED Viewed

@@ -0,0 +1,21 @@
+MIT License
+Copyright (c) 2023 Luke Twist
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

thinkpack-0.0.2.dist-info/top_level.txt ADDED Viewed

	@@ -0,0 +1 @@
1	+ thinkpack