PyPI - tokenbreak-scanner - Versions diffs - 0.1.0__py3-none-any.whl - Mend

tokenbreak-scanner 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

tokenbreak_scanner/__init__.py +7 -0
tokenbreak_scanner/cli.py +203 -0
tokenbreak_scanner/inspector.py +294 -0
tokenbreak_scanner/models.py +67 -0
tokenbreak_scanner/tokenizers.py +388 -0
tokenbreak_scanner/validator.py +361 -0
tokenbreak_scanner-0.1.0.dist-info/METADATA +324 -0
tokenbreak_scanner-0.1.0.dist-info/RECORD +12 -0
tokenbreak_scanner-0.1.0.dist-info/WHEEL +5 -0
tokenbreak_scanner-0.1.0.dist-info/entry_points.txt +2 -0
tokenbreak_scanner-0.1.0.dist-info/licenses/LICENSE +33 -0
tokenbreak_scanner-0.1.0.dist-info/top_level.txt +1 -0

tokenbreak_scanner/__init__.py ADDED Viewed

@@ -0,0 +1,7 @@
+"""TokenBreak Model File Scanner.
+Audit NLP model artifacts for TokenBreak vulnerabilities by inspecting
+tokenizer configurations and model architectures.
+"""
+__version__ = "0.1.0"

tokenbreak_scanner/cli.py ADDED Viewed

@@ -0,0 +1,203 @@
+"""CLI entrypoint for the TokenBreak model scanner."""
+from __future__ import annotations
+import json
+import sys
+import click
+from rich.console import Console
+from rich.panel import Panel
+from rich.table import Table
+from rich.text import Text
+from .inspector import inspect_model
+from .models import RiskLevel, ScannerReport
+from .validator import AttackValidationResult, validate_attack
+console = Console(stderr=True)
+def _build_table(report: ScannerReport) -> Table:
+    """Build a Rich table for a scanner report."""
+    table = Table(title=f"TokenBreak Scan Report: {report.model_name}", show_header=False)
+    table.add_column("Field", style="cyan", no_wrap=True)
+    table.add_column("Value", style="white")
+    # Risk level with color
+    risk_color = {
+        RiskLevel.LOW: "green",
+        RiskLevel.HIGH: "red",
+        RiskLevel.UNKNOWN: "yellow",
+    }.get(report.risk_level, "white")
+    table.add_row("Model Name", report.model_name)
+    table.add_row("Model Type", report.model_type)
+    table.add_row("Model Family", report.model_family)
+    table.add_row("Tokenizer Class", report.tokenizer_class)
+    table.add_row("Tokenizer Algorithm", report.tokenizer_algorithm.value)
+    table.add_row("Vocab Size", str(report.vocab_size) if report.vocab_size else "N/A")
+    table.add_row("Confidence Score", f"{report.confidence_score:.2f}")
+    table.add_row(
+        "Vulnerable to TokenBreak",
+        Text("YES", style="bold red") if report.vulnerable_to_tokenbreak else Text("NO", style="bold green"),
+    )
+    table.add_row("Risk Level", Text(report.risk_level.value, style=f"bold {risk_color}"))
+    table.add_row("Source", report.source)
+    table.add_row("Recommendation", report.recommendation)
+    # Evidence tree
+    if report.detection_sources:
+        table.add_row("", "")
+        table.add_row("Detection Sources", Text("(evidence tree)", style="dim"))
+        for i, src in enumerate(report.detection_sources, 1):
+            bullet = f"  {i}. [{src.signal}]"
+            detail = f"inferred={src.inferred or 'N/A'}, weight={src.weight:.2f}"
+            if src.reason:
+                detail += f" — {src.reason}"
+            table.add_row(bullet, detail)
+    return table
+def _print_json(report: ScannerReport, attack_result: AttackValidationResult | None = None) -> None:
+    """Print report as JSON."""
+    data = report.model_dump(mode="json")
+    if attack_result is not None:
+        data["attack_validation"] = attack_result.model_dump(mode="json")
+    click.echo(json.dumps(data, indent=2))
+def _print_table(report: ScannerReport, attack_result: AttackValidationResult | None = None) -> None:
+    """Print report as a Rich table."""
+    click.echo()
+    table = _build_table(report)
+    console.print(table)
+    if attack_result is not None:
+        click.echo()
+        if attack_result.success:
+            console.print(
+                Panel(
+                    f"[bold red]Attack Validation: VULNERABLE[/bold red]\n"
+                    f"Original text classified as: {attack_result.original_label} "
+                    f"(confidence: {attack_result.original_confidence:.4f})\n"
+                    f"Manipulated text: {attack_result.manipulated_text}\n"
+                    f"Manipulated text classified as: {attack_result.manipulated_label} "
+                    f"(confidence: {attack_result.manipulated_confidence:.4f})\n"
+                    f"Bypass successful: TokenBreak evades detection.",
+                    title="Live Attack Test",
+                    border_style="red",
+                )
+            )
+        else:
+            console.print(
+                Panel(
+                    f"[bold green]Attack Validation: NOT VULNERABLE[/bold green]\n"
+                    f"Original text classified as: {attack_result.original_label} "
+                    f"(confidence: {attack_result.original_confidence:.4f})\n"
+                    f"Manipulated text: {attack_result.manipulated_text or 'N/A'}\n"
+                    f"TokenBreak did not produce a successful bypass.",
+                    title="Live Attack Test",
+                    border_style="green",
+                )
+            )
+    click.echo()
+@click.command(name="tokenbreak-scan")
+@click.argument("source")
+@click.option(
+    "--output",
+    "output_format",
+    type=click.Choice(["json", "table"], case_sensitive=False),
+    default="table",
+    show_default=True,
+    help="Output format for the report.",
+)
+@click.option(
+    "--download",
+    is_flag=True,
+    default=False,
+    help="Download model files from HuggingFace if source is a model ID.",
+)
+@click.option(
+    "--trust-remote-code",
+    is_flag=True,
+    default=False,
+    help="Trust remote code when loading tokenizers.",
+)
+@click.option(
+    "--test-attack",
+    is_flag=True,
+    default=False,
+    help="Run a live TokenBreak attack validation against the model. "
+         "Requires model weights and a classification head.",
+)
+@click.option(
+    "--threshold",
+    type=float,
+    default=0.995,
+    show_default=True,
+    help="Confidence threshold for TokenBreak attack validation.",
+)
+@click.version_option(version="0.1.0")
+def main(
+    source: str,
+    output_format: str,
+    download: bool,
+    trust_remote_code: bool,
+    test_attack: bool,
+    threshold: float,
+) -> None:
+    """Scan MODEL_PATH_OR_ID for TokenBreak tokenizer vulnerabilities.
+    SOURCE can be a local directory containing model files
+    (config.json, tokenizer.json, etc.) or a HuggingFace model ID.
+    """
+    try:
+        report = inspect_model(
+            source,
+            download=download,
+            trust_remote_code=trust_remote_code,
+        )
+    except FileNotFoundError as exc:
+        console.print(f"[bold red]Error:[/bold red] {exc}")
+        sys.exit(2)
+    except Exception as exc:
+        console.print(f"[bold red]Unexpected error during inspection:[/bold red] {exc}")
+        sys.exit(2)
+    attack_result: AttackValidationResult | None = None
+    if test_attack:
+        if report.vulnerable_to_tokenbreak:
+            try:
+                attack_result = validate_attack(
+                    source,
+                    threshold=threshold,
+                    download=download,
+                    trust_remote_code=trust_remote_code,
+                )
+            except Exception as exc:
+                console.print(
+                    f"[bold yellow]Warning:[/bold yellow] Attack validation failed: {exc}"
+                )
+        else:
+            console.print(
+                "[bold yellow]Skipping attack test:[/bold yellow] "
+                "Model is not flagged as vulnerable (Unigram tokenizer detected)."
+            )
+    if output_format == "json":
+        _print_json(report, attack_result)
+    else:
+        _print_table(report, attack_result)
+    # Exit codes for CI pipelines
+    if report.risk_level == RiskLevel.HIGH:
+        sys.exit(1)
+    sys.exit(0)
+if __name__ == "__main__":
+    main()

tokenbreak_scanner/inspector.py ADDED Viewed

@@ -0,0 +1,294 @@
+"""Model file introspection engine.
+Scans downloaded model artifacts (config.json, tokenizer.json, tokenizer_config.json)
+to determine tokenizer type, model family, and TokenBreak vulnerability.
+"""
+from __future__ import annotations
+import json
+import logging
+import os
+from pathlib import Path
+from typing import Any
+from transformers import AutoTokenizer
+from transformers.utils import cached_file
+from .models import DetectionSource, RiskLevel, ScannerReport, TokenizerAlgorithm
+from .tokenizers import (
+    detect_from_remote_source,
+    detect_from_runtime_tokenizer,
+    detect_from_source_code,
+    detect_tokenizer_from_config,
+    detect_tokenizer_from_json,
+    get_model_family,
+    get_recommendation,
+    is_vulnerable,
+)
+logger = logging.getLogger(__name__)
+# Files we expect to find in a HuggingFace model directory
+CONFIG_FILENAME = "config.json"
+TOKENIZER_CONFIG_FILENAME = "tokenizer_config.json"
+TOKENIZER_JSON_FILENAME = "tokenizer.json"
+# Weights for each detection signal (must each be ≤ 1.0; total can exceed 1.0
+# because we use a cap-and-normalise strategy).
+SIGNAL_WEIGHTS: dict[str, float] = {
+    "tokenizer.json model.type": 0.40,
+    "runtime._tokenizer.model": 0.40,
+    "source_code_fingerprint": 0.30,
+    "remote_source_file": 0.30,
+    "tokenizer_config.json class": 0.20,
+    "config.json model_type": 0.15,
+}
+def _load_json(path: Path | str) -> dict[str, Any] | None:
+    """Safely load a JSON file, returning None on any error."""
+    try:
+        with open(path, encoding="utf-8") as f:
+            return json.load(f)
+    except (OSError, json.JSONDecodeError):
+        return None
+def _resolve_model_path(source: str, *, download: bool = False) -> Path:
+    """Resolve a model identifier to a local path.
+    * If `source` is an existing local directory, return it directly.
+    * If `source` looks like a HuggingFace model ID and `download` is True,
+      attempt to download/cache the tokenizer files via `transformers`.
+    * Otherwise raise FileNotFoundError.
+    """
+    local = Path(source)
+    if local.is_dir():
+        return local.resolve()
+    if download:
+        logger.info("Downloading tokenizer files for '%s' from HuggingFace...", source)
+        try:
+            # Use cached_file to resolve and download individual files
+            config_path = cached_file(source, CONFIG_FILENAME, _raise_exceptions_for_missing_entries=False)
+            tokenizer_config_path = cached_file(
+                source, TOKENIZER_CONFIG_FILENAME, _raise_exceptions_for_missing_entries=False
+            )
+            tokenizer_json_path = cached_file(
+                source, TOKENIZER_JSON_FILENAME, _raise_exceptions_for_missing_entries=False
+            )
+            if config_path:
+                return Path(config_path).parent.resolve()
+            if tokenizer_config_path:
+                return Path(tokenizer_config_path).parent.resolve()
+            if tokenizer_json_path:
+                return Path(tokenizer_json_path).parent.resolve()
+        except Exception as exc:
+            raise FileNotFoundError(
+                f"Could not download or cache model '{source}' from HuggingFace."
+            ) from exc
+    raise FileNotFoundError(
+        f"Model path not found: '{source}'. "
+        "Provide a valid local directory or use --download to fetch from HuggingFace."
+    )
+def inspect_model(
+    source: str,
+    *,
+    download: bool = False,
+    trust_remote_code: bool = False,
+) -> ScannerReport:
+    """Inspect a model directory or HuggingFace model ID and return a vulnerability report.
+    Parameters
+    ----------
+    source
+        Local model directory path or HuggingFace model ID (e.g. ``distilbert-base-uncased``).
+    download
+        If True and ``source`` is a HuggingFace model ID, download tokenizer files.
+    trust_remote_code
+        Passed through to ``transformers.AutoTokenizer`` when probing vocab size.
+    """
+    model_path = _resolve_model_path(source, download=download)
+    # Load available metadata files
+    config = _load_json(model_path / CONFIG_FILENAME) or {}
+    tokenizer_config = _load_json(model_path / TOKENIZER_CONFIG_FILENAME) or {}
+    tokenizer_json = _load_json(model_path / TOKENIZER_JSON_FILENAME)
+    # Extract model type
+    model_type = config.get("model_type", "")
+    model_family = get_model_family(model_type)
+    # ── Detection: collect signals, then aggregate ──
+    sources: list[DetectionSource] = []
+    # Signal 1: tokenizer.json "model.type" — most reliable
+    if tokenizer_json is not None:
+        algo = detect_tokenizer_from_json(tokenizer_json)
+        if algo is not None:
+            sources.append(
+                DetectionSource(
+                    signal="tokenizer.json model.type",
+                    value=str(tokenizer_json.get("model", {}).get("type") or tokenizer_json.get("type")),
+                    inferred=algo.value,
+                    weight=SIGNAL_WEIGHTS["tokenizer.json model.type"],
+                    reason="Direct algorithm type from tokenizers library metadata",
+                )
+            )
+            logger.debug("Tokenizer algorithm detected from tokenizer.json: %s", algo)
+    # Signal 2: Attempt to load AutoTokenizer and inspect Rust backend
+    loaded_tokenizer: Any | None = None
+    vocab_size: int | None = None
+    tok_cls_name: str = "unknown"
+    try:
+        loaded_tokenizer = AutoTokenizer.from_pretrained(
+            str(model_path),
+            trust_remote_code=trust_remote_code,
+            local_files_only=True,
+        )
+        vocab_size = len(loaded_tokenizer)
+        tok_cls_name = loaded_tokenizer.__class__.__name__
+    except Exception as exc:
+        logger.warning("Could not load tokenizer: %s", exc)
+    if loaded_tokenizer is not None:
+        algo, reason = detect_from_runtime_tokenizer(loaded_tokenizer)
+        if algo is not None:
+            sources.append(
+                DetectionSource(
+                    signal="runtime._tokenizer.model",
+                    value=reason,
+                    inferred=algo.value,
+                    weight=SIGNAL_WEIGHTS["runtime._tokenizer.model"],
+                    reason="Rust fast-tokenizer backend model type",
+                )
+            )
+        # Signal 3: source-code fingerprint (if inspect.getsource succeeds)
+        algo_src, reason_src = detect_from_source_code(loaded_tokenizer)
+        if algo_src is not None:
+            sources.append(
+                DetectionSource(
+                    signal="source_code_fingerprint",
+                    value=reason_src,
+                    inferred=algo_src.value,
+                    weight=SIGNAL_WEIGHTS["source_code_fingerprint"],
+                    reason="Keyword fingerprinting on tokenizer class source",
+                )
+            )
+    # Signal 4: tokenizer_config.json → tokenizer_class / model_type
+    algo_cfg = detect_tokenizer_from_config(tokenizer_config)
+    if algo_cfg is not None:
+        sources.append(
+            DetectionSource(
+                signal="tokenizer_config.json class",
+                value=tokenizer_config.get("tokenizer_class", tokenizer_config.get("model_type", "")),
+                inferred=algo_cfg.value,
+                weight=SIGNAL_WEIGHTS["tokenizer_config.json class"],
+                reason="Tokenizer class name or model_type from tokenizer_config.json",
+            )
+        )
+    # Signal 5: config.json model_type fallback
+    if model_type:
+        from .tokenizers import MODEL_TYPE_MAP
+        algo_meta = MODEL_TYPE_MAP.get(model_type)
+        if algo_meta is not None:
+            sources.append(
+                DetectionSource(
+                    signal="config.json model_type",
+                    value=model_type,
+                    inferred=algo_meta.value,
+                    weight=SIGNAL_WEIGHTS["config.json model_type"],
+                    reason="Architecture model_type from config.json",
+                )
+            )
+    # Signal 6: remote source file for trust_remote_code models
+    algo_remote, reason_remote = detect_from_remote_source(model_path, trust_remote_code=trust_remote_code)
+    if algo_remote is not None:
+        sources.append(
+            DetectionSource(
+                signal="remote_source_file",
+                value=reason_remote,
+                inferred=algo_remote.value,
+                weight=SIGNAL_WEIGHTS["remote_source_file"],
+                reason="Tokenization Python module downloaded from HF Hub",
+            )
+        )
+    # ── Aggregate weighted votes ──
+    algorithm = _aggregate_signals(sources)
+    confidence_score = _confidence_from_sources(sources)
+    # Risk assessment
+    vulnerable = is_vulnerable(algorithm)
+    risk_level = RiskLevel.HIGH if vulnerable else RiskLevel.LOW
+    if algorithm == TokenizerAlgorithm.UNKNOWN:
+        risk_level = RiskLevel.UNKNOWN
+    recommendation = get_recommendation(algorithm)
+    return ScannerReport(
+        model_name=Path(source).name if Path(source).exists() else source,
+        model_type=model_type or "unknown",
+        model_family=model_family,
+        tokenizer_class=tok_cls_name,
+        tokenizer_algorithm=algorithm,
+        vocab_size=vocab_size,
+        vulnerable_to_tokenbreak=vulnerable,
+        risk_level=risk_level,
+        confidence_score=round(confidence_score, 3),
+        detection_sources=sources,
+        recommendation=recommendation,
+        source=str(model_path),
+        config_metadata={
+            "config.json": config,
+            "tokenizer_config.json": tokenizer_config,
+            "detection_confidence": confidence_score,
+        },
+        tokenizer_metadata=tokenizer_json or {},
+    )
+def _aggregate_signals(sources: list[DetectionSource]) -> TokenizerAlgorithm:
+    """Weighted-majority vote over detection signals.
+    Each source contributes ``weight`` points to the algorithm it inferred.
+    The algorithm with the highest total weight wins.  If no votes were cast,
+    returns :attr:`TokenizerAlgorithm.UNKNOWN`.
+    """
+    from collections import defaultdict
+    votes: dict[TokenizerAlgorithm, float] = defaultdict(float)
+    for src in sources:
+        if src.inferred:
+            try:
+                algo = TokenizerAlgorithm(src.inferred)
+            except ValueError:
+                continue
+            votes[algo] += src.weight
+    if not votes:
+        return TokenizerAlgorithm.UNKNOWN
+    best_algo = max(votes, key=lambda a: votes[a])
+    return best_algo
+def _confidence_from_sources(sources: list[DetectionSource]) -> float:
+    """Cap-and-normalise confidence from evidence.
+    Sum raw weights, then clamp to ``[0, 1]``.  This is deliberately simple so
+    that adding more signals cannot push confidence past certainty.
+    """
+    total = sum(src.weight for src in sources)
+    return min(total, 1.0)

tokenbreak_scanner/models.py ADDED Viewed

@@ -0,0 +1,67 @@
+"""Pydantic data models for scanner reports."""
+from __future__ import annotations
+from enum import Enum
+from typing import Any
+from pydantic import BaseModel, Field
+class TokenizerAlgorithm(str, Enum):
+    """Known tokenizer algorithms relevant to TokenBreak."""
+    BPE = "BPE"
+    WORDPIECE = "WordPiece"
+    UNIGRAM = "Unigram"
+    SENTENCEPIECE = "SentencePiece"
+    UNKNOWN = "Unknown"
+class RiskLevel(str, Enum):
+    """Risk assessment levels."""
+    LOW = "Low"
+    HIGH = "High"
+    UNKNOWN = "Unknown"
+class DetectionSource(BaseModel):
+    """A single piece of evidence that contributed to the algorithm detection."""
+    signal: str = Field(description="Name of the detection signal")
+    value: str | None = Field(default=None, description="Raw value returned by the signal")
+    inferred: str | None = Field(default=None, description="Algorithm inferred from this signal")
+    weight: float = Field(default=0.0, ge=0.0, le=1.0, description="Confidence weight of this signal")
+    reason: str = Field(default="", description="Human-readable explanation")
+class ScannerReport(BaseModel):
+    """Complete report for a scanned model."""
+    model_name: str = Field(description="Name or identifier of the model")
+    model_type: str = Field(description="Model architecture type (e.g., roberta, bert)")
+    model_family: str = Field(description="High-level model family (e.g., RoBERTa, BERT)")
+    tokenizer_class: str = Field(description="Tokenizer class name (e.g., RobertaTokenizerFast)")
+    tokenizer_algorithm: TokenizerAlgorithm = Field(description="Detected tokenizer algorithm")
+    vocab_size: int | None = Field(default=None, description="Tokenizer vocabulary size")
+    vulnerable_to_tokenbreak: bool = Field(description="Whether model is vulnerable to TokenBreak")
+    risk_level: RiskLevel = Field(description="Risk level assessment")
+    confidence_score: float = Field(
+        default=0.0, ge=-0.01, le=1.01,
+        description="Aggregated confidence score (0.0–1.0) for the detection",
+    )
+    detection_sources: list[DetectionSource] = Field(
+        default_factory=list,
+        description="Evidence tree showing why the algorithm was detected",
+    )
+    recommendation: str = Field(description="Remediation recommendation")
+    source: str = Field(description="Source of scan: local path or HuggingFace ID")
+    config_metadata: dict[str, Any] = Field(
+        default_factory=dict,
+        description="Raw metadata from config.json and tokenizer_config.json",
+    )
+    tokenizer_metadata: dict[str, Any] = Field(
+        default_factory=dict,
+        description="Raw metadata from tokenizer.json",
+    )