PyPI - tico - Versions diffs - 0.1.0.dev250902__py3-none-any.whl → 0.1.0.dev250903__py3-none-any.whl - Mend

tico 0.1.0.dev250902py3-none-any.whl → 0.1.0.dev250903py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (21) hide show

tico/__init__.py CHANGED Viewed

@@ -29,7 +29,7 @@ __all__ = [
 ]
 # THIS LINE IS AUTOMATICALLY GENERATED BY setup.py
-__version__ = "0.1.0.dev250902"
+__version__ = "0.1.0.dev250903"
 MINIMUM_SUPPORTED_VERSION = "2.5.0"
 SECURE_TORCH_VERSION = "2.6.0"

tico/experimental/quantization/ptq/__init__.py CHANGED Viewed

@@ -1,13 +1 @@
-"""
-Public PTQ API — re-export the most common symbols.
-"""
-from tico.experimental.quantization.ptq.dtypes import DType
-from tico.experimental.quantization.ptq.mode import Mode
-from tico.experimental.quantization.ptq.qscheme import QScheme
-__all__ = [
-    "DType",
-    "Mode",
-    "QScheme",
-]
+# DO NOT REMOVE THIS FILE

tico/experimental/quantization/ptq/examples/compare_ppl.py CHANGED Viewed

@@ -26,7 +26,7 @@ from datasets import load_dataset
 from transformers import AutoModelForCausalLM, AutoTokenizer
 from tico.experimental.quantization.ptq.quant_config import QuantConfig
-from tico.experimental.quantization.ptq.utils import perplexity
+from tico.experimental.quantization.ptq.utils.metrics import perplexity
 from tico.experimental.quantization.ptq.wrappers.ptq_wrapper import PTQWrapper
 # -------------------------------------------------------------------------

tico/experimental/quantization/ptq/examples/debug_quant_outputs.py ADDED Viewed

@@ -0,0 +1,129 @@
+# Copyright (c) 2025 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import torch
+import tqdm
+from datasets import load_dataset
+from transformers import AutoModelForCausalLM, AutoTokenizer
+from tico.experimental.quantization.ptq.quant_config import QuantConfig
+from tico.experimental.quantization.ptq.utils.introspection import (
+    build_fqn_map,
+    compare_layer_outputs,
+    save_fp_outputs,
+)
+from tico.experimental.quantization.ptq.wrappers.ptq_wrapper import PTQWrapper
+# ============================================================================
+# LAYER-WISE DIFF DEBUGGING PIPELINE
+# ----------------------------------------------------------------------------
+# A quantization debugging pipeline that identifies accuracy regressions
+# by comparing UINT vs FP outputs at each layer.
+#
+#   1. Load a full-precision (FP) LLaMA-3-1B model.
+#   2. Wrap each Transformer block with PTQWrapper (activations → fake-quant).
+#   3. Capture reference FP layer outputs before quantization.
+#   4. Calibrate UINT-8 activation observers in a single pass.
+#   5. Freeze quantization parameters (scale, zero-point).
+#   6. Re-run inference and compare UINT-8 vs FP outputs per layer.
+#   7. Report where quantization hurts the most.
+#
+# Use this pipeline to trace precision loss layer by layer, and pinpoint
+# problematic modules during post-training quantization.
+# ============================================================================
+# -------------------------------------------------------------------------
+# 0. Global configuration
+# -------------------------------------------------------------------------
+MODEL_NAME = "meta-llama/Meta-Llama-3-1B"
+DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
+STRIDE = 512
+# Token-budget presets for activation calibration
+TOKENS: dict[str, int] = {
+    # Smoke test (<1 min turnaround on CPU/GPU)
+    "debug": 2_000,  # ≈16 × 128-seq batches
+    # Good default for 1-7B models (≲3 % ppl delta)
+    "baseline": 50_000,
+    # Production / 4-bit observer smoothing
+    "production": 200_000,
+}
+CALIB_TOKENS = TOKENS["baseline"]
+print(f"Calibrating with {CALIB_TOKENS:,} tokens.\n")
+# -------------------------------------------------------------------------
+# 1. Load the FP backbone
+# -------------------------------------------------------------------------
+print("Loading FP model …")
+tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
+model = AutoModelForCausalLM.from_pretrained(MODEL_NAME).to(DEVICE).eval()
+model.config.use_cache = False  # disable KV-cache → full forward
+m_to_fqn = build_fqn_map(model)  # map modules → fully-qualified names
+# Use Wikitext-2 train split for calibration.
+dataset = load_dataset("wikitext", "wikitext-2-raw-v1", split="train")
+# -------------------------------------------------------------------------
+# 2. Wrap every layer with PTQWrapper (UINT-8 activations)
+# -------------------------------------------------------------------------
+print("Wrapping layers with PTQWrapper …")
+qcfg = QuantConfig()  # default: per-tensor UINT8
+new_layers = torch.nn.ModuleList()
+for idx, fp_layer in enumerate(model.model.layers):
+    layer_cfg = qcfg.child(f"layer{idx}")
+    q_layer = PTQWrapper(
+        fp_layer,
+        qcfg=layer_cfg,
+        fp_name=m_to_fqn.get(fp_layer),
+    )
+    new_layers.append(q_layer)
+model.model.layers = new_layers  # swap in quant wrappers
+# -------------------------------------------------------------------------
+# 3. Activation calibration plus FP-vs-UINT8 diffing
+# -------------------------------------------------------------------------
+print("Calibrating UINT-8 observers …")
+calib_txt = " ".join(dataset["text"])[:CALIB_TOKENS]
+ids = tokenizer(calib_txt, return_tensors="pt").input_ids.to(DEVICE)
+# (a) Enable CALIB mode on every QuantModuleBase
+for l in model.model.layers:
+    l.enable_calibration()
+# Save reference FP activations before observers clamp/quantize
+save_handles, act_cache = save_fp_outputs(model)
+with torch.no_grad():
+    for i in tqdm.trange(0, ids.size(1) - 1, STRIDE, desc="Act-calibration"):
+        inputs = ids[:, i : i + STRIDE]
+        model(inputs)  # observers collect act. ranges
+# Remove save hooks now that FP activations are cached
+for h in save_handles:
+    h.remove()
+# (b) Freeze (scale, zero-point) after calibration
+for l in model.model.layers:
+    l.freeze_qparams()
+# (c) Register diff hooks and measure per-layer deltas
+cmp_handles = compare_layer_outputs(model, act_cache, metrics=["diff", "peir"])
+# Use same inputs for comparison.
+model(inputs)
+assert isinstance(cmp_handles, list)
+for h in cmp_handles:
+    h.remove()

tico/experimental/quantization/ptq/examples/quantize_with_gptq.py ADDED Viewed

@@ -0,0 +1,165 @@
+# Copyright (c) 2025 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# =============================================================================
+# PTQ + GPTQ HYBRID QUANTIZATION PIPELINE
+# -----------------------------------------------------------------------------
+# This script shows how to:
+#   1. Load a pretrained FP Llama-3 model.
+#   2. Run GPTQ to quantize weights only.
+#   3. Wrap every Transformer layer with a PTQWrapper to quantize activations.
+#   4. Calibrate UINT-8 observers in a single pass over a text corpus.
+#   5. Inject GPTQ’s per-tensor weight scales / zero-points into the PTQ graph.
+#   6. Freeze all Q-params and compute Wikitext-2 perplexity.
+# =============================================================================
+from typing import Any
+import torch
+import tqdm
+from datasets import load_dataset
+from transformers import AutoModelForCausalLM, AutoTokenizer
+from tico.experimental.quantization import convert, prepare
+from tico.experimental.quantization.config import GPTQConfig
+from tico.experimental.quantization.ptq.observers.affine_base import AffineObserverBase
+from tico.experimental.quantization.ptq.quant_config import QuantConfig
+from tico.experimental.quantization.ptq.utils.introspection import build_fqn_map
+from tico.experimental.quantization.ptq.utils.metrics import perplexity
+from tico.experimental.quantization.ptq.wrappers.ptq_wrapper import PTQWrapper
+from tico.experimental.quantization.ptq.wrappers.quant_module_base import (
+    QuantModuleBase,
+)
+# -------------------------------------------------------------------------
+# 0. Global configuration
+# -------------------------------------------------------------------------
+MODEL_NAME = "meta-llama/Meta-Llama-3-1B"
+DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
+STRIDE = 512
+# Token-budget presets for activation calibration
+TOKENS: dict[str, int] = {
+    # Smoke test (<1 min turnaround on CPU/GPU)
+    "debug": 2_000,  # ≈16 × 128-seq batches
+    # Good default for 1-7B models (≲3 % ppl delta)
+    "baseline": 50_000,
+    # Production / 4-bit observer smoothing
+    "production": 200_000,
+}
+CALIB_TOKENS = TOKENS["baseline"]
+# -------------------------------------------------------------------------
+# 1. Helper — copy GPTQ (scale, zp) into PTQ observers
+# -------------------------------------------------------------------------
+def inject_gptq_qparams(
+    root: torch.nn.Module,
+    gptq_quantizers: dict[str, Any],  # {fp_name: quantizer}
+    weight_obs_name: str = "weight",
+):
+    """
+    For every `QuantModuleBase` whose `fp_name` matches a GPTQ key,
+    locate the observer called `weight_obs_name` and overwrite its
+    (scale, zero-point), then lock them against further updates.
+    """
+    for m in root.modules():
+        if not isinstance(m, QuantModuleBase):
+            continue
+        if m.fp_name is None:
+            continue
+        quantizer = gptq_quantizers.get(m.fp_name)
+        if quantizer is None:
+            continue
+        obs = m.get_observer(weight_obs_name)
+        if obs is None:
+            continue
+        assert isinstance(obs, AffineObserverBase)
+        # GPTQ quantizer attributes
+        obs.load_qparams(quantizer.scale, quantizer.zero, lock=True)
+# -------------------------------------------------------------------------
+# 2. Load the FP backbone
+# -------------------------------------------------------------------------
+print("Loading FP model …")
+tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
+model = AutoModelForCausalLM.from_pretrained(MODEL_NAME).to(DEVICE).eval()
+model.config.use_cache = False  # disable KV-cache → full forward
+m_to_fqn = build_fqn_map(model)  # map modules → fully-qualified names
+# -------------------------------------------------------------------------
+# 3. Run GPTQ (weight-only) pass
+# -------------------------------------------------------------------------
+print("Applying GPTQ …")
+dataset = load_dataset("wikiText", "wikitext-2-raw-v1", split="test")
+q_m = prepare(model, GPTQConfig(), inplace=True)
+for d in tqdm.tqdm(dataset, desc="GPTQ calibration"):
+    ids = tokenizer(d["text"], return_tensors="pt").input_ids.to(DEVICE)
+    q_m(ids)  # observers gather weight stats
+q_m = convert(q_m, inplace=True)  # materialize INT-weight tensors
+# -------------------------------------------------------------------------
+# 4. Wrap every layer with PTQWrapper (activation UINT-8)
+# -------------------------------------------------------------------------
+qcfg = QuantConfig()  # default: per-tensor UINT8
+new_layers = torch.nn.ModuleList()
+for idx, fp_layer in enumerate(q_m.model.layers):
+    layer_cfg = qcfg.child(f"layer{idx}")
+    q_layer = PTQWrapper(
+        fp_layer,
+        qcfg=layer_cfg,
+        fp_name=m_to_fqn.get(fp_layer),
+    )
+    new_layers.append(q_layer)
+q_m.model.layers = new_layers
+# -------------------------------------------------------------------------
+# 5. Single-pass activation calibration
+# -------------------------------------------------------------------------
+print("Calibrating UINT-8 observers …")
+calib_txt = " ".join(
+    load_dataset("wikitext", "wikitext-2-raw-v1", split="train")["text"]
+)[:CALIB_TOKENS]
+ids = tokenizer(calib_txt, return_tensors="pt").input_ids.to(DEVICE)
+# (a) Enable CALIB mode on every QuantModuleBase
+for l in q_m.model.layers:
+    l.enable_calibration()
+# (b) Overwrite weight observers with GPTQ statistics
+inject_gptq_qparams(q_m, q_m.quantizers)
+with torch.no_grad():
+    for i in tqdm.trange(0, ids.size(1) - 1, STRIDE, desc="Act-calibration"):
+        q_m(ids[:, i : i + STRIDE])  # observers collect act. ranges
+# (c) Freeze all Q-params (scale, zp)
+for l in q_m.model.layers:
+    l.freeze_qparams()
+# -------------------------------------------------------------------------
+# 6. Evaluate perplexity on Wikitext-2
+# -------------------------------------------------------------------------
+print("\nCalculating perplexities …")
+test_ds = load_dataset("wikitext", "wikitext-2-raw-v1", split="test")
+enc = tokenizer("\n\n".join(test_ds["text"]), return_tensors="pt")
+ppl_uint8 = perplexity(q_m, enc, DEVICE, stride=STRIDE)
+print("\n┌── Wikitext-2 test perplexity ─────────────")
+print(f"│ UINT-8 : {ppl_uint8:8.2f}")
+print("└───────────────────────────────────────────")

tico/experimental/quantization/ptq/observers/__init__.py CHANGED Viewed

@@ -1,15 +1 @@
-from tico.experimental.quantization.ptq.observers.affine_base import AffineObserverBase
-from tico.experimental.quantization.ptq.observers.base import ObserverBase
-from tico.experimental.quantization.ptq.observers.ema import EMAObserver
-from tico.experimental.quantization.ptq.observers.identity import IdentityObserver
-from tico.experimental.quantization.ptq.observers.minmax import MinMaxObserver
-from tico.experimental.quantization.ptq.observers.mx import MXObserver
-__all__ = [
-    "AffineObserverBase",
-    "ObserverBase",
-    "EMAObserver",
-    "IdentityObserver",
-    "MinMaxObserver",
-    "MXObserver",
-]
+# DO NOT REMOVE THIS FILE

tico/experimental/quantization/ptq/observers/ema.py CHANGED Viewed

@@ -15,7 +15,7 @@
 import torch
 from tico.experimental.quantization.ptq.observers.affine_base import AffineObserverBase
-from tico.experimental.quantization.ptq.utils import channelwise_minmax
+from tico.experimental.quantization.ptq.utils.reduce_utils import channelwise_minmax
 class EMAObserver(AffineObserverBase):

tico/experimental/quantization/ptq/observers/minmax.py CHANGED Viewed

@@ -15,7 +15,7 @@
 import torch
 from tico.experimental.quantization.ptq.observers.affine_base import AffineObserverBase
-from tico.experimental.quantization.ptq.utils import channelwise_minmax
+from tico.experimental.quantization.ptq.utils.reduce_utils import channelwise_minmax
 class MinMaxObserver(AffineObserverBase):

tico/experimental/quantization/ptq/utils/__init__.py CHANGED Viewed

@@ -1,7 +1 @@
-from tico.experimental.quantization.ptq.utils.metrics import perplexity
-from tico.experimental.quantization.ptq.utils.reduce_utils import channelwise_minmax
-__all__ = [
-    "channelwise_minmax",
-    "perplexity",
-]
+# DO NOT REMOVE THIS FILE

tico/experimental/quantization/ptq/utils/introspection.py ADDED Viewed

@@ -0,0 +1,169 @@
+# Copyright (c) 2025 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from typing import Callable, Dict, List, Optional, Tuple
+import torch
+from tico.experimental.quantization.evaluation.metric import MetricCalculator
+from tico.experimental.quantization.ptq.wrappers.ptq_wrapper import PTQWrapper
+from tico.experimental.quantization.ptq.wrappers.quant_module_base import (
+    QuantModuleBase,
+)
+def build_fqn_map(root: torch.nn.Module) -> dict[torch.nn.Module, str]:
+    """
+    Return {module_object: full_qualified_name} without touching the modules.
+    """
+    return {m: n for n, m in root.named_modules()}
+def save_fp_outputs(
+    model: torch.nn.Module,
+) -> Tuple[List[torch.utils.hooks.RemovableHandle], Dict[str, torch.Tensor]]:
+    """
+    Register forward-hooks on every `QuantModuleBase` wrapper itself (not the
+    wrapped `module`) and cache its output while the wrapper runs in CALIB mode.
+    Parameters
+    ----------
+    model : torch.nn.Module
+        The model whose wrappers are already switched to CALIB mode
+        (`enable_calibration()` has been called).
+    Returns
+    -------
+    handles : list[RemovableHandle]
+        Hook handles; call `.remove()` on each one to detach the hooks.
+    cache : dict[str, torch.Tensor]
+        Mapping "wrapper-name → cached FP32 activation" captured from the first
+        forward pass. Keys default to `wrapper.fp_name`; if that attribute is
+        `None`, the `id(wrapper)` string is used instead.
+    """
+    cache: Dict[str, torch.Tensor] = {}
+    handles: List[torch.utils.hooks.RemovableHandle] = []
+    def _save(name: str):
+        def hook(_, __, out: torch.Tensor | Tuple):
+            if isinstance(out, tuple):
+                out = out[0]
+            assert isinstance(out, torch.Tensor)
+            cache[name] = out.detach()
+        return hook
+    for m in model.modules():
+        if isinstance(m, QuantModuleBase):
+            name = m.fp_name or str(id(m))
+            handles.append(m.register_forward_hook(_save(name)))
+    return handles, cache
+def compare_layer_outputs(
+    model: torch.nn.Module,
+    cache: Dict[str, torch.Tensor],
+    *,
+    metrics: Optional[List[str]] = None,
+    custom_metrics: Optional[Dict[str, Callable]] = None,
+    rtol: float = 1e-3,
+    atol: float = 1e-3,
+    collect: bool = False,
+):
+    """
+    Register forward-hooks on every `QuantModuleBase` wrapper to compare its
+    QUANT-mode output to the FP32 reference saved by `save_fp_outputs()`.
+    Each hook prints a per-layer diff report:
+        ✓  layer_name  max=1.23e-02  mean=8.45e-04     (within tolerance)
+        ⚠️ layer_name  max=3.07e+00  mean=5.12e-01     (exceeds tolerance)
+    Parameters
+    ----------
+    model : torch.nn.Module
+        The model whose wrappers are now in QUANT mode
+        (`freeze_qparams()` has been called).
+    cache : dict[str, torch.Tensor]
+        The reference activations captured during CALIB mode.
+    metrics
+        Metrics to compute. Defaults to `["diff"]`. Add `peir` to print PEIR.
+    custom_metrics
+        Optional user metric functions. Same signature as built-ins.
+    rtol, atol : float, optional
+        Relative / absolute tolerances used to flag large deviations
+        (similar to `torch.allclose` semantics).
+    collect : bool, optional
+        • False (default) → print one-line report per layer, return `None`
+        • True            → suppress printing, return a nested dict
+                                {layer_name -> {metric -> value}}
+    Returns
+    -------
+    handles
+        Hook handles; call `.remove()` once diffing is complete.
+    results
+        Only if *collect* is True.
+    """
+    metrics = metrics or ["diff"]
+    calc = MetricCalculator(custom_metrics)
+    handles: List[torch.utils.hooks.RemovableHandle] = []
+    results: Dict[
+        str, Dict[str, float]
+    ] = {}  # Dict[layer_name, Dict[metric_name, value]]
+    def _cmp(name: str):
+        ref = cache.get(name)
+        def hook(_, __, out):
+            if ref is None:
+                if not collect:
+                    print(f"[{name}]  no cached reference")
+                return
+            if isinstance(out, tuple):
+                out = out[0]
+            assert isinstance(out, torch.Tensor)
+            # Compute all requested metrics
+            res = calc.compute([ref], [out], metrics)  # lists with length-1 tensors
+            res = {k: v[0] for k, v in res.items()}  # flatten
+            if collect:
+                results[name] = res  # type: ignore[assignment]
+                return
+            # Pretty print ------------------------------------------------ #
+            diff_val = res.get("diff") or res.get("max_abs_diff")
+            thresh = atol + rtol * ref.abs().max().item()
+            flag = "⚠️" if (diff_val is not None and diff_val > thresh) else "✓"  # type: ignore[operator]
+            pieces = [f"{flag} {name:45s}"]
+            for key, val in res.items():
+                pieces.append(f"{key}={val:<7.4}")
+            print("  ".join(pieces))
+        return hook
+    for m in model.modules():
+        if isinstance(m, PTQWrapper):
+            # skip the internal fp module inside the wrapper
+            continue
+        if isinstance(m, QuantModuleBase):
+            lname = m.fp_name or str(id(m))
+            handles.append(m.register_forward_hook(_cmp(lname)))
+    if collect:
+        return handles, results
+    return handles

tico/experimental/quantization/ptq/wrappers/__init__.py CHANGED Viewed

	@@ -0,0 +1 @@
1	+ # DO NOT REMOVE THIS FILE

tico/experimental/quantization/ptq/wrappers/llama/__init__.py CHANGED Viewed

@@ -1,9 +1 @@
-from tico.experimental.quantization.ptq.wrappers.llama.quant_attn import (
-    QuantLlamaAttention,
-)
-from tico.experimental.quantization.ptq.wrappers.llama.quant_decoder_layer import (
-    QuantLlamaDecoderLayer,
-)
-from tico.experimental.quantization.ptq.wrappers.llama.quant_mlp import QuantLlamaMLP
-__all__ = ["QuantLlamaAttention", "QuantLlamaDecoderLayer", "QuantLlamaMLP"]
+# DO NOT REMOVE THIS FILE

tico/experimental/quantization/ptq/wrappers/llama/quant_attn.py CHANGED Viewed

@@ -25,7 +25,10 @@ from tico.experimental.quantization.ptq.wrappers.quant_module_base import (
 from tico.experimental.quantization.ptq.wrappers.registry import try_register
-@try_register("transformers.models.llama.modeling_llama.LlamaAttention")
+@try_register(
+    "transformers.models.llama.modeling_llama.LlamaAttention",
+    "transformers.models.llama.modeling_llama.LlamaSdpaAttention",
+)
 class QuantLlamaAttention(QuantModuleBase):
     def __init__(
         self,

tico/experimental/quantization/ptq/wrappers/nn/__init__.py CHANGED Viewed

@@ -1,11 +1 @@
-from tico.experimental.quantization.ptq.wrappers.nn.quant_layernorm import (
-    QuantLayerNorm,
-)
-from tico.experimental.quantization.ptq.wrappers.nn.quant_linear import QuantLinear
-from tico.experimental.quantization.ptq.wrappers.nn.quant_silu import QuantSiLU
-__all__ = [
-    "QuantLayerNorm",
-    "QuantLinear",
-    "QuantSiLU",
-]
+# DO NOT REMOVE THIS FILE

tico/experimental/quantization/ptq/wrappers/registry.py CHANGED Viewed

@@ -90,7 +90,9 @@ def register(
 # ───────────────────────────── conditional decorator
-def try_register(path: str) -> Callable[[Type[QuantModuleBase]], Type[QuantModuleBase]]:
+def try_register(
+    *paths: str,
+) -> Callable[[Type[QuantModuleBase]], Type[QuantModuleBase]]:
     """
     @try_register("transformers.models.llama.modeling_llama.LlamaMLP")
@@ -99,14 +101,15 @@ def try_register(path: str) -> Callable[[Type[QuantModuleBase]], Type[QuantModul
     """
     def _decorator(quant_cls: Type[QuantModuleBase]):
-        module_name, _, cls_name = path.rpartition(".")
-        try:
-            mod = importlib.import_module(module_name)
-            fp_cls = getattr(mod, cls_name)
-            _WRAPPERS[fp_cls] = quant_cls
-        except (ModuleNotFoundError, AttributeError):
-            # transformers not installed or class renamed – silently skip
-            pass
+        for path in paths:
+            module_name, _, cls_name = path.rpartition(".")
+            try:
+                mod = importlib.import_module(module_name)
+                fp_cls = getattr(mod, cls_name)
+                _WRAPPERS[fp_cls] = quant_cls
+            except (ModuleNotFoundError, AttributeError):
+                # optional dep missing or class renamed – skip silently
+                pass
         return quant_cls
     return _decorator

{tico-0.1.0.dev250902.dist-info → tico-0.1.0.dev250903.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: tico
-Version: 0.1.0.dev250902
+Version: 0.1.0.dev250903
 Summary: Convert exported Torch module to circle
 Home-page: UNKNOWN
 License: UNKNOWN

{tico-0.1.0.dev250902.dist-info → tico-0.1.0.dev250903.dist-info}/RECORD RENAMED Viewed

@@ -1,4 +1,4 @@
-tico/__init__.py,sha256=PXZzhb0ZexNIwGhVJpg4Ln_RqskbSIMigqj0GdZgbeA,1883
+tico/__init__.py,sha256=Y253CsOm0_3d-G5glZ1O4TGs9GTjnvCqSpe8pqRIGgE,1883
 tico/pt2_to_circle.py,sha256=gu3MD4Iqc0zMZcCZ2IT8oGbyj21CTSbT3Rgd9s2B_9A,2767
 tico/config/__init__.py,sha256=xZzCXjZ84qE-CsBi-dfaL05bqpQ3stKKfTXhnrJRyVs,142
 tico/config/base.py,sha256=q5xMqGxTUZs4mFqt5c7i_y9U00fYgdMGl9nUqIVMlCo,1248
@@ -56,37 +56,40 @@ tico/experimental/quantization/passes/propagate_qparam_backward.py,sha256=TGtyW0
 tico/experimental/quantization/passes/propagate_qparam_forward.py,sha256=RhUHGCR2RpBO5KYkQ7Z8U5u7HEwDq2wdKHLKAJCi-5c,5138
 tico/experimental/quantization/passes/quantize_bias.py,sha256=T7YxJ70N0tSK0FF9VJZA5iP0sHdnnsX9GX4AT4JDFSk,4325
 tico/experimental/quantization/passes/remove_weight_dequant_op.py,sha256=gI1MtrHazWpdNfys7f1ngTTWplzluF7SA-uX0HMR5Mc,6592
-tico/experimental/quantization/ptq/__init__.py,sha256=ZoPdEwZ1i1n5pBFChx8GuUrkfRP2vsSoLPNILQjNBaA,298
+tico/experimental/quantization/ptq/__init__.py,sha256=IO6FP_xYbGy0dW0HL26GXD3ouxARaxCK7bz9dn4blPQ,26
 tico/experimental/quantization/ptq/dtypes.py,sha256=xfCBtq6mQmUYRwsoFgII6gvRl1raQi0Inj9pznDuKwQ,2236
 tico/experimental/quantization/ptq/mode.py,sha256=lT-T8vIv8YWcwrjT7xXVhOw1g7aoAdh_3PWB-ptPKaI,1052
 tico/experimental/quantization/ptq/qscheme.py,sha256=uwhv7bCxOOXB3I-IKlRyr_u4eXOq48uIqGy4TLDqGxY,1301
 tico/experimental/quantization/ptq/quant_config.py,sha256=nm7570Y1X2mOT_8s27ilWid04otor6cVTi9GwgAEaKc,4300
 tico/experimental/quantization/ptq/examples/__init__.py,sha256=IO6FP_xYbGy0dW0HL26GXD3ouxARaxCK7bz9dn4blPQ,26
-tico/experimental/quantization/ptq/examples/compare_ppl.py,sha256=ODaRB234iy2dFfGIBd-OtKxdSzxnIbgKZkQ_o30tSts,5287
+tico/experimental/quantization/ptq/examples/compare_ppl.py,sha256=SmSmaCBVWTcGRPRk2zopDqESD_gF8D7J4kUNNZ-0cMk,5295
+tico/experimental/quantization/ptq/examples/debug_quant_outputs.py,sha256=astXzx-maq1W4gKvX2QaGmD2Tpmjunv4JqDYVk9eZRQ,5177
 tico/experimental/quantization/ptq/examples/quantize_linear.py,sha256=8zq-ZJDYgam0xQ-PbC6Xb1I7W1mv0Wi-b--IP2wwXtw,4539
 tico/experimental/quantization/ptq/examples/quantize_llama_attn.py,sha256=cVWUSSzaZWFp5QZkNkrlpHU3kXyP84QtnZbahVml_yQ,4329
 tico/experimental/quantization/ptq/examples/quantize_llama_decoder_layer.py,sha256=mBWrjkyEovYQsPC4Rrsri6Pm1rlFmDb3NiP0DQQhFyM,5751
 tico/experimental/quantization/ptq/examples/quantize_llama_mlp.py,sha256=N1qZQgt1S-xZrdv-PW7OfXEcv0gsO2q9faOF4aD-zKo,4147
-tico/experimental/quantization/ptq/observers/__init__.py,sha256=WF2MvL9M_jl-B1FqcY9zic34NOCRp17HkRYv-TMxMr4,613
+tico/experimental/quantization/ptq/examples/quantize_with_gptq.py,sha256=w21Qao5_6SnWMuxmnZbZOoqaLQOuSnK52mHin4aedtA,6979
+tico/experimental/quantization/ptq/observers/__init__.py,sha256=IO6FP_xYbGy0dW0HL26GXD3ouxARaxCK7bz9dn4blPQ,26
 tico/experimental/quantization/ptq/observers/affine_base.py,sha256=e2Eba64nrxKQyE4F_WJ7WTSsk3xe6bkdGUKaoLFWGFw,4638
 tico/experimental/quantization/ptq/observers/base.py,sha256=Wons1MzpqK1mfcy-ppl-B2Dum0edXg2dWW2Lw3V18tw,3280
-tico/experimental/quantization/ptq/observers/ema.py,sha256=MAMdBmjVNMg_vsqXrcBzbw_1nFJ-j4Gz651k3-VlaMQ,2057
+tico/experimental/quantization/ptq/observers/ema.py,sha256=oISP1XaD3lapVaHQKscD3rjLcKbhOy4Nvi6dqRFZwF8,2070
 tico/experimental/quantization/ptq/observers/identity.py,sha256=vkec8Or-7VwM4zkFEvEKROQJk8XEHMVX8mBNDnxSyS8,2591
-tico/experimental/quantization/ptq/observers/minmax.py,sha256=mLHkwIzWFzQXev7EU7w1333KckwRjukc3_cUPJOnUfs,1486
+tico/experimental/quantization/ptq/observers/minmax.py,sha256=WWcAyEIrd5j3k9qsoBJi3nUnWtrwPaKlR9CPezbDSqQ,1499
 tico/experimental/quantization/ptq/observers/mx.py,sha256=aP4qmBgeiRIYZJksShN5gs6UyYOFi2-Sbk5k5xvPQ4w,1863
-tico/experimental/quantization/ptq/utils/__init__.py,sha256=MrQwMbbKS0dJrO8jsceCai4Z59iKQNpTPZND3GN6TrM,216
+tico/experimental/quantization/ptq/utils/__init__.py,sha256=IO6FP_xYbGy0dW0HL26GXD3ouxARaxCK7bz9dn4blPQ,26
+tico/experimental/quantization/ptq/utils/introspection.py,sha256=y2oGf7RoApMHJeXLmIz3VVWB9vazGEgyLbxLiVTTQdw,6000
 tico/experimental/quantization/ptq/utils/metrics.py,sha256=EW_FQmJrl9Y4esspZQ0GHfJ58RwuJUz0l8IfYq3NWY4,4461
 tico/experimental/quantization/ptq/utils/reduce_utils.py,sha256=3kWawLB91EcvvHlCrNqqfZF7tpgr22htBSA049mKw_4,973
-tico/experimental/quantization/ptq/wrappers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+tico/experimental/quantization/ptq/wrappers/__init__.py,sha256=IO6FP_xYbGy0dW0HL26GXD3ouxARaxCK7bz9dn4blPQ,26
 tico/experimental/quantization/ptq/wrappers/ptq_wrapper.py,sha256=F9sK_DiRaXiGNHULcwIbs5EUtHz6ZJ7N4r5CWTTfhsM,2442
 tico/experimental/quantization/ptq/wrappers/quant_elementwise.py,sha256=LhEoobfvto6zKrBOKL4gmxfFFc31jHzyQV_zfps-iQM,3604
 tico/experimental/quantization/ptq/wrappers/quant_module_base.py,sha256=vkcDos_knGSS29rIZuEIWkAJLHrENbGz8nCH2-iara8,5969
-tico/experimental/quantization/ptq/wrappers/registry.py,sha256=M1D_foC0PR-Ii4G0lbOO3_pmhvHlMF28NolK_q2DZtw,4783
-tico/experimental/quantization/ptq/wrappers/llama/__init__.py,sha256=4xuAYnJcohMTtBzrH4cxq8WKG2GQo8nbhektVg8w7F0,380
-tico/experimental/quantization/ptq/wrappers/llama/quant_attn.py,sha256=WIUI6EFMTvvruvqu8pBxWy6qJeDyjkaYbJk1R3pAmwE,8578
+tico/experimental/quantization/ptq/wrappers/registry.py,sha256=wauoZdZBR15bGj1Upt9owEfFDT-Tj6HzciG9HDM1BHo,4845
+tico/experimental/quantization/ptq/wrappers/llama/__init__.py,sha256=IO6FP_xYbGy0dW0HL26GXD3ouxARaxCK7bz9dn4blPQ,26
+tico/experimental/quantization/ptq/wrappers/llama/quant_attn.py,sha256=-K1COLHIHfJZhQu-RE6KfJIkaL7S6yR4iUj48QkjMTw,8652
 tico/experimental/quantization/ptq/wrappers/llama/quant_decoder_layer.py,sha256=2XsIf5rcabDXXkahqriSxfo2curFq0Y5bnRPcYkJPg8,7187
 tico/experimental/quantization/ptq/wrappers/llama/quant_mlp.py,sha256=uZMnrX66oZwxhKhcNbLXXeri-WxxRBiZnr15aBXJMm0,3562
-tico/experimental/quantization/ptq/wrappers/nn/__init__.py,sha256=I9uTt5HfcRoMEDYHpAeATMv2TbCQiX0ZbfUFMzSJ4Qw,336
+tico/experimental/quantization/ptq/wrappers/nn/__init__.py,sha256=IO6FP_xYbGy0dW0HL26GXD3ouxARaxCK7bz9dn4blPQ,26
 tico/experimental/quantization/ptq/wrappers/nn/quant_layernorm.py,sha256=G5Sgt-tXnzh0Rxyk-2honmZIfEQOZlRfOsoDBdSGmA4,6887
 tico/experimental/quantization/ptq/wrappers/nn/quant_linear.py,sha256=xW-VEPB7RJoslS3xLVCdhIuMjppknvpkZleRGK4JFVQ,2240
 tico/experimental/quantization/ptq/wrappers/nn/quant_silu.py,sha256=XnJDggkWUTfXC1-BLeAbcCUtp687XLIkIIbuQlqycDw,1864
@@ -244,9 +247,9 @@ tico/utils/mx/__init__.py,sha256=IO6FP_xYbGy0dW0HL26GXD3ouxARaxCK7bz9dn4blPQ,26
 tico/utils/mx/elemwise_ops.py,sha256=V6glyAHsVR1joqpsgnNytatCD_ew92xNWZ19UFDoMTA,10281
 tico/utils/mx/formats.py,sha256=uzNWyu-1onUlwQfX5cZ6fZSUfHMRqorper7_T1k3jfk,3404
 tico/utils/mx/mx_ops.py,sha256=RcfUTYVi-wilGB2sC35OeARdwDqnixv7dG5iyZ-fQT8,8555
-tico-0.1.0.dev250902.dist-info/LICENSE,sha256=kp4JLII7bzRhPb0CPD5XTDZMh22BQ7h3k3B7t8TiSbw,12644
-tico-0.1.0.dev250902.dist-info/METADATA,sha256=CePT5yw5-ln0-Ct8n61iGDnFfnoASlqAfPQmxRQ9QQ0,8450
-tico-0.1.0.dev250902.dist-info/WHEEL,sha256=G16H4A3IeoQmnOrYV4ueZGKSjhipXx8zc8nu9FGlvMA,92
-tico-0.1.0.dev250902.dist-info/entry_points.txt,sha256=kBKYSS_IYrSXmUYevmmepqIVPScq5vF8ulQRu3I_Zf0,59
-tico-0.1.0.dev250902.dist-info/top_level.txt,sha256=oqs7UPoNSKZEwqsX8B-KAWdQwfAa7i60pbxW_Jk7P3w,5
-tico-0.1.0.dev250902.dist-info/RECORD,,
+tico-0.1.0.dev250903.dist-info/LICENSE,sha256=kp4JLII7bzRhPb0CPD5XTDZMh22BQ7h3k3B7t8TiSbw,12644
+tico-0.1.0.dev250903.dist-info/METADATA,sha256=3mrCVEvr_rIq-iHE5roTYJn1Pw3_3za-EMoxG-yg3dg,8450
+tico-0.1.0.dev250903.dist-info/WHEEL,sha256=G16H4A3IeoQmnOrYV4ueZGKSjhipXx8zc8nu9FGlvMA,92
+tico-0.1.0.dev250903.dist-info/entry_points.txt,sha256=kBKYSS_IYrSXmUYevmmepqIVPScq5vF8ulQRu3I_Zf0,59
+tico-0.1.0.dev250903.dist-info/top_level.txt,sha256=oqs7UPoNSKZEwqsX8B-KAWdQwfAa7i60pbxW_Jk7P3w,5
+tico-0.1.0.dev250903.dist-info/RECORD,,

{tico-0.1.0.dev250902.dist-info → tico-0.1.0.dev250903.dist-info}/LICENSE RENAMED Viewed

File without changes

{tico-0.1.0.dev250902.dist-info → tico-0.1.0.dev250903.dist-info}/WHEEL RENAMED Viewed

File without changes

{tico-0.1.0.dev250902.dist-info → tico-0.1.0.dev250903.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{tico-0.1.0.dev250902.dist-info → tico-0.1.0.dev250903.dist-info}/top_level.txt RENAMED Viewed

File without changes

tico 0.1.0.dev250902__py3-none-any.whl → 0.1.0.dev250903__py3-none-any.whl

tico 0.1.0.dev250902py3-none-any.whl → 0.1.0.dev250903py3-none-any.whl