PyPI - tico - Versions diffs - 0.1.0.dev250915__py3-none-any.whl → 0.1.0.dev250917__py3-none-any.whl - Mend

tico 0.1.0.dev250915py3-none-any.whl → 0.1.0.dev250917py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of tico might be problematic. Click here for more details.

Files changed (10) hide show

tico/__init__.py CHANGED Viewed

@@ -29,7 +29,7 @@ __all__ = [
 ]
 # THIS LINE IS AUTOMATICALLY GENERATED BY setup.py
-__version__ = "0.1.0.dev250915"
+__version__ = "0.1.0.dev250917"
 MINIMUM_SUPPORTED_VERSION = "2.5.0"
 SECURE_TORCH_VERSION = "2.6.0"

tico/experimental/quantization/ptq/examples/compare_ppl.py CHANGED Viewed

@@ -77,7 +77,7 @@ def main():
         "--dtype",
         choices=list(DTYPE_MAP.keys()),
         default="float32",
-        help="Model dtype for load (float32|bfloat16|float16).",
+        help=f"Model dtype for load.",
     )
     parser.add_argument(
         "--stride", type=int, default=512, help="Sliding-window stride for perplexity."
@@ -126,7 +126,9 @@ def main():
     print(f"DType            : {args.dtype}")
     print(f"Stride           : {args.stride}")
     print(f"Use HF cache?    : {args.use_cache}")
-    print(f"Calib preset     : {args.calib_preset}")
+    print(
+        f"Calib preset     : {args.calib_preset} ({TOKENS[args.calib_preset]:,} tokens)"
+    )
     print()
     # -------------------------------------------------------------------------

tico/experimental/quantization/ptq/examples/debug_quant_outputs.py CHANGED Viewed

@@ -12,19 +12,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
-import torch
-import tqdm
-from datasets import load_dataset
-from transformers import AutoModelForCausalLM, AutoTokenizer
-from tico.experimental.quantization.ptq.quant_config import QuantConfig
-from tico.experimental.quantization.ptq.utils.introspection import (
-    build_fqn_map,
-    compare_layer_outputs,
-    save_fp_outputs,
-)
-from tico.experimental.quantization.ptq.wrappers.ptq_wrapper import PTQWrapper
 # ============================================================================
 # LAYER-WISE DIFF DEBUGGING PIPELINE
 # ----------------------------------------------------------------------------
@@ -43,12 +30,21 @@ from tico.experimental.quantization.ptq.wrappers.ptq_wrapper import PTQWrapper
 # problematic modules during post-training quantization.
 # ============================================================================
-# -------------------------------------------------------------------------
-# 0. Global configuration
-# -------------------------------------------------------------------------
-MODEL_NAME = "meta-llama/Meta-Llama-3-1B"
-DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
-STRIDE = 512
+import argparse
+import sys
+import torch
+import tqdm
+from datasets import load_dataset
+from transformers import AutoModelForCausalLM, AutoTokenizer
+from tico.experimental.quantization.ptq.quant_config import QuantConfig
+from tico.experimental.quantization.ptq.utils.introspection import (
+    build_fqn_map,
+    compare_layer_outputs,
+    save_fp_outputs,
+)
+from tico.experimental.quantization.ptq.wrappers.ptq_wrapper import PTQWrapper
 # Token-budget presets for activation calibration
 TOKENS: dict[str, int] = {
@@ -59,71 +55,185 @@ TOKENS: dict[str, int] = {
     # Production / 4-bit observer smoothing
     "production": 200_000,
 }
-CALIB_TOKENS = TOKENS["baseline"]
-print(f"Calibrating with {CALIB_TOKENS:,} tokens.\n")
-# -------------------------------------------------------------------------
-# 1. Load the FP backbone
-# -------------------------------------------------------------------------
-print("Loading FP model …")
-tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
-model = AutoModelForCausalLM.from_pretrained(MODEL_NAME).to(DEVICE).eval()
-model.config.use_cache = False  # disable KV-cache → full forward
-m_to_fqn = build_fqn_map(model)  # map modules → fully-qualified names
-# Use Wikitext-2 train split for calibration.
-dataset = load_dataset("wikitext", "wikitext-2-raw-v1", split="train")
-# -------------------------------------------------------------------------
-# 2. Wrap every layer with PTQWrapper (UINT-8 activations)
-# -------------------------------------------------------------------------
-print("Wrapping layers with PTQWrapper …")
-qcfg = QuantConfig()  # default: per-tensor UINT8
-new_layers = torch.nn.ModuleList()
-for idx, fp_layer in enumerate(model.model.layers):
-    layer_cfg = qcfg.child(f"layer{idx}")
-    q_layer = PTQWrapper(
-        fp_layer,
-        qcfg=layer_cfg,
-        fp_name=m_to_fqn.get(fp_layer),
-    )
-    new_layers.append(q_layer)
-model.model.layers = new_layers  # swap in quant wrappers
-# -------------------------------------------------------------------------
-# 3. Activation calibration plus FP-vs-UINT8 diffing
-# -------------------------------------------------------------------------
-print("Calibrating UINT-8 observers …")
-calib_txt = " ".join(dataset["text"])[:CALIB_TOKENS]
-ids = tokenizer(calib_txt, return_tensors="pt").input_ids.to(DEVICE)
+DTYPE_MAP = {
+    "float32": torch.float32,
+    "bfloat16": torch.bfloat16,
+    "float16": torch.float16,
+}
-# (a) Enable CALIB mode on every QuantModuleBase
-for l in model.model.layers:
-    l.enable_calibration()
+# Hardcoded dataset settings
+DATASET_NAME = "wikitext"
+DATASET_CONFIG = "wikitext-2-raw-v1"
+TRAIN_SPLIT = "train"
-# Save reference FP activations before observers clamp/quantize
-save_handles, act_cache = save_fp_outputs(model)
-with torch.no_grad():
-    for i in tqdm.trange(0, ids.size(1) - 1, STRIDE, desc="Act-calibration"):
-        inputs = ids[:, i : i + STRIDE]
-        model(inputs)  # observers collect act. ranges
+def main():
+    parser = argparse.ArgumentParser(
+        description="Layer-wise diff debugging pipeline for PTQ"
+    )
+    parser.add_argument(
+        "--model", type=str, required=True, help="HF repo name or local path."
+    )
+    parser.add_argument(
+        "--device",
+        type=str,
+        default="cuda" if torch.cuda.is_available() else "cpu",
+        help="Device to run on (cuda|cpu|mps).",
+    )
+    parser.add_argument(
+        "--dtype",
+        choices=list(DTYPE_MAP.keys()),
+        default="float32",
+        help=f"Model dtype for load.",
+    )
+    parser.add_argument(
+        "--stride",
+        type=int,
+        default=512,
+        help="Sliding-window stride used during calibration.",
+    )
+    parser.add_argument(
+        "--calib-preset",
+        choices=list(TOKENS.keys()),
+        default="debug",
+        help="Calibration token budget preset.",
+    )
+    parser.add_argument("--seed", type=int, default=42, help="Random seed.")
+    parser.add_argument(
+        "--trust-remote-code",
+        action="store_true",
+        help="Enable only if you trust the model repo code.",
+    )
+    parser.add_argument(
+        "--hf-token",
+        type=str,
+        default=None,
+        help="Optional HF token for gated/private repos.",
+    )
+    parser.add_argument(
+        "--use-cache",
+        dest="use_cache",
+        action="store_true",
+        default=False,
+        help="Use model KV cache if enabled (off by default).",
+    )
+    parser.add_argument(
+        "--no-tqdm", action="store_true", help="Disable tqdm progress bars."
+    )
-# Remove save hooks now that FP activations are cached
-for h in save_handles:
-    h.remove()
+    args = parser.parse_args()
-# (b) Freeze (scale, zero-point) after calibration
-for l in model.model.layers:
-    l.freeze_qparams()
+    # Basic setup
+    torch.manual_seed(args.seed)
+    device = torch.device(args.device)
+    dtype = DTYPE_MAP[args.dtype]  # noqa: E999 (kept readable)
-# (c) Register diff hooks and measure per-layer deltas
-cmp_handles = compare_layer_outputs(model, act_cache, metrics=["diff", "peir"])
-# Use same inputs for comparison.
-model(inputs)
+    print("=== Config ===")
+    print(f"Model            : {args.model}")
+    print(f"Device           : {device.type}")
+    print(f"DType            : {args.dtype}")
+    print(f"Stride           : {args.stride}")
+    print(
+        f"Calib preset     : {args.calib_preset} ({TOKENS[args.calib_preset]:,} tokens)"
+    )
+    print(f"Use HF cache?    : {args.use_cache}")
+    print()
+    # -------------------------------------------------------------------------
+    # 1. Load the FP backbone and tokenizer
+    # -------------------------------------------------------------------------
+    print("Loading FP model …")
+    tokenizer = AutoTokenizer.from_pretrained(
+        args.model,
+        trust_remote_code=args.trust_remote_code,
+        token=args.hf_token,
+    )
+    model = (
+        AutoModelForCausalLM.from_pretrained(
+            args.model,
+            torch_dtype=dtype,
+            trust_remote_code=args.trust_remote_code,
+            token=args.hf_token,
+        )
+        .to(device)
+        .eval()
+    )
-assert isinstance(cmp_handles, list)
-for h in cmp_handles:
-    h.remove()
+    # Disable KV cache to force full forward passes for introspection
+    model.config.use_cache = args.use_cache
+    # Build module -> FQN map before wrapping
+    m_to_fqn = build_fqn_map(model)
+    # Prepare calibration inputs (HF Wikitext-2 train split)
+    CALIB_TOKENS = TOKENS[args.calib_preset]
+    print(f"Calibrating with {CALIB_TOKENS:,} tokens.\n")
+    # Use Wikitext-2 train split for calibration.
+    dataset = load_dataset(DATASET_NAME, DATASET_CONFIG, split=TRAIN_SPLIT)
+    # -------------------------------------------------------------------------
+    # 2. Wrap every layer with PTQWrapper (UINT-8 activations)
+    # -------------------------------------------------------------------------
+    print("Wrapping layers with PTQWrapper …")
+    qcfg = QuantConfig()  # default: per-tensor UINT8
+    new_layers = torch.nn.ModuleList()
+    for idx, fp_layer in enumerate(model.model.layers):
+        layer_cfg = qcfg.child(f"layer{idx}")
+        q_layer = PTQWrapper(
+            fp_layer,
+            qcfg=layer_cfg,
+            fp_name=m_to_fqn.get(fp_layer),
+        )
+        new_layers.append(q_layer)
+    model.model.layers = new_layers  # swap in quant wrappers
+    # -------------------------------------------------------------------------
+    # 3. Activation calibration plus FP-vs-UINT8 diffing
+    # -------------------------------------------------------------------------
+    print("Calibrating UINT-8 observers …")
+    calib_txt = " ".join(dataset["text"])[:CALIB_TOKENS]
+    ids = tokenizer(calib_txt, return_tensors="pt").input_ids.to(device)
+    # (a) Enable CALIB mode on every QuantModuleBase
+    for l in model.model.layers:
+        l.enable_calibration()
+    # Save reference FP activations before observers clamp/quantize
+    save_handles, act_cache = save_fp_outputs(model)
+    iterator = range(0, ids.size(1) - 1, args.stride)
+    if not args.no_tqdm:
+        iterator = tqdm.tqdm(iterator, desc="Act-Calibration")
+    with torch.no_grad():
+        for i in iterator:
+            inputs = ids[:, i : i + args.stride]
+            model(inputs)  # observers collect act. ranges
+    # Remove save hooks now that FP activations are cached
+    for h in save_handles:
+        h.remove()
+    # (b) Freeze (scale, zero-point) after calibration
+    for l in model.model.layers:
+        l.freeze_qparams()
+    # (c) Register diff hooks and measure per-layer deltas
+    cmp_handles = compare_layer_outputs(model, act_cache, metrics=["diff", "peir"])
+    # Use same inputs for comparison.
+    with torch.no_grad():
+        model(inputs)
+    assert isinstance(cmp_handles, list)
+    for h in cmp_handles:
+        h.remove()
+if __name__ == "__main__":
+    try:
+        main()
+    except Exception as e:
+        print(f"\n[Error] {e}", file=sys.stderr)
+        sys.exit(1)

tico/utils/signature.py CHANGED Viewed

@@ -141,22 +141,21 @@ class ModelInputSpec:
         args = flatten_and_convert_args(args)
         kwargs = flatten_and_convert_kwargs(kwargs)
+        arg_num = len(args) + len(kwargs)
+        m_input_num = len(self.names)
+        if arg_num != m_input_num:
+            raise ValueError(
+                f"Mismatch: number of model inputs and number of passed arguments are not the same: inputs({m_input_num}) != passed({arg_num}), input spec: {self.names}"
+            )
         # 1. positional arguments
         for i, val in enumerate(args):
-            if i >= len(self.names):
-                raise ValueError(f"Too many positional arguments ({i+1}).")
             name = self.names[i]
-            if name in kwargs:
-                raise TypeError(
-                    f"Got multiple values for argument '{name}' (positional and keyword)."
-                )
             inputs.append(val)
         # 2. keyword arguments
         for idx in range(len(args), len(self.names)):
             name = self.names[idx]
-            if name not in kwargs:
-                raise ValueError(f"Missing argument for input '{name}'.")
             inputs.append(kwargs[name])
         if check:

{tico-0.1.0.dev250915.dist-info → tico-0.1.0.dev250917.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: tico
-Version: 0.1.0.dev250915
+Version: 0.1.0.dev250917
 Summary: Convert exported Torch module to circle
 Home-page: UNKNOWN
 License: UNKNOWN

{tico-0.1.0.dev250915.dist-info → tico-0.1.0.dev250917.dist-info}/RECORD RENAMED Viewed

@@ -1,4 +1,4 @@
-tico/__init__.py,sha256=RT4YNN5EM4rbOVOWo1BHEO8vnfPWLEwVNjMFh3qRYeY,1883
+tico/__init__.py,sha256=Da7Ln6MuWCBJXrjts6OsAslWSS79toVgPG2PITYPzE0,1883
 tico/pt2_to_circle.py,sha256=gu3MD4Iqc0zMZcCZ2IT8oGbyj21CTSbT3Rgd9s2B_9A,2767
 tico/config/__init__.py,sha256=xZzCXjZ84qE-CsBi-dfaL05bqpQ3stKKfTXhnrJRyVs,142
 tico/config/base.py,sha256=q5xMqGxTUZs4mFqt5c7i_y9U00fYgdMGl9nUqIVMlCo,1248
@@ -62,8 +62,8 @@ tico/experimental/quantization/ptq/mode.py,sha256=lT-T8vIv8YWcwrjT7xXVhOw1g7aoAd
 tico/experimental/quantization/ptq/qscheme.py,sha256=uwhv7bCxOOXB3I-IKlRyr_u4eXOq48uIqGy4TLDqGxY,1301
 tico/experimental/quantization/ptq/quant_config.py,sha256=nm7570Y1X2mOT_8s27ilWid04otor6cVTi9GwgAEaKc,4300
 tico/experimental/quantization/ptq/examples/__init__.py,sha256=IO6FP_xYbGy0dW0HL26GXD3ouxARaxCK7bz9dn4blPQ,26
-tico/experimental/quantization/ptq/examples/compare_ppl.py,sha256=QWUuO50lITnooYqEe57VV6mvIHKWZMB_TOGvtZ8C8qQ,8238
-tico/experimental/quantization/ptq/examples/debug_quant_outputs.py,sha256=astXzx-maq1W4gKvX2QaGmD2Tpmjunv4JqDYVk9eZRQ,5177
+tico/experimental/quantization/ptq/examples/compare_ppl.py,sha256=eVQn8-M24QkoCy_FCBQLSlUrnyqUDSkvUFpUpEdpMx4,8265
+tico/experimental/quantization/ptq/examples/debug_quant_outputs.py,sha256=Hpx_jj46WISwdVp33NrIadizVAzf1nFTXuAcHsKEQuQ,8179
 tico/experimental/quantization/ptq/examples/quantize_linear.py,sha256=8zq-ZJDYgam0xQ-PbC6Xb1I7W1mv0Wi-b--IP2wwXtw,4539
 tico/experimental/quantization/ptq/examples/quantize_llama_attn.py,sha256=cVWUSSzaZWFp5QZkNkrlpHU3kXyP84QtnZbahVml_yQ,4329
 tico/experimental/quantization/ptq/examples/quantize_llama_decoder_layer.py,sha256=mBWrjkyEovYQsPC4Rrsri6Pm1rlFmDb3NiP0DQQhFyM,5751
@@ -243,7 +243,7 @@ tico/utils/pytree_utils.py,sha256=jrk3N6X6LiUnBCX_gM1K9nywbVAJBVnszlTAgeIeDUc,52
 tico/utils/record_input.py,sha256=QN-8D71G_WAX3QQQ5CIwbEfFJZTQ3CvL4wCMiVddua4,3894
 tico/utils/register_custom_op.py,sha256=895SKZeXQzolK-mPG38cQC37Be76xUV_Ujw1k1ts9_w,28218
 tico/utils/serialize.py,sha256=mEuusEzi82WFsz3AkowgWwxSLeo50JDxyOj6yYDQhEI,1914
-tico/utils/signature.py,sha256=R2GV0alRpXEbZISqPKyxCUWbgDcsrQ2ovbVG3737IzA,9595
+tico/utils/signature.py,sha256=3OOwyVJzfcGcgC0LiVmOcUIzfqSk27qoNHhkoCI7zPY,9530
 tico/utils/torch_compat.py,sha256=oc6PztVsXdHcQ3iaVR90wLLxrGaj6zFHWZ8K9rRS6q8,1795
 tico/utils/trace_decorators.py,sha256=ddLIiKQfSaQrxgF1kNpwjFTQnXENzeSfcr1kuAW4jGI,3221
 tico/utils/utils.py,sha256=aySftYnNTsqVAMcGs_3uX3-hz577a2cj4p1aVV-1XeQ,12747
@@ -252,9 +252,9 @@ tico/utils/mx/__init__.py,sha256=IO6FP_xYbGy0dW0HL26GXD3ouxARaxCK7bz9dn4blPQ,26
 tico/utils/mx/elemwise_ops.py,sha256=V6glyAHsVR1joqpsgnNytatCD_ew92xNWZ19UFDoMTA,10281
 tico/utils/mx/formats.py,sha256=uzNWyu-1onUlwQfX5cZ6fZSUfHMRqorper7_T1k3jfk,3404
 tico/utils/mx/mx_ops.py,sha256=RcfUTYVi-wilGB2sC35OeARdwDqnixv7dG5iyZ-fQT8,8555
-tico-0.1.0.dev250915.dist-info/LICENSE,sha256=kp4JLII7bzRhPb0CPD5XTDZMh22BQ7h3k3B7t8TiSbw,12644
-tico-0.1.0.dev250915.dist-info/METADATA,sha256=5l-EgJKZwF179QnVqWApKdARhQxw0c2iibtckWUu-XA,8450
-tico-0.1.0.dev250915.dist-info/WHEEL,sha256=G16H4A3IeoQmnOrYV4ueZGKSjhipXx8zc8nu9FGlvMA,92
-tico-0.1.0.dev250915.dist-info/entry_points.txt,sha256=kBKYSS_IYrSXmUYevmmepqIVPScq5vF8ulQRu3I_Zf0,59
-tico-0.1.0.dev250915.dist-info/top_level.txt,sha256=oqs7UPoNSKZEwqsX8B-KAWdQwfAa7i60pbxW_Jk7P3w,5
-tico-0.1.0.dev250915.dist-info/RECORD,,
+tico-0.1.0.dev250917.dist-info/LICENSE,sha256=kp4JLII7bzRhPb0CPD5XTDZMh22BQ7h3k3B7t8TiSbw,12644
+tico-0.1.0.dev250917.dist-info/METADATA,sha256=WJdcwQ8suuOhdWCv9cW8_RW_qyckaOM5jEzlvi00vbM,8450
+tico-0.1.0.dev250917.dist-info/WHEEL,sha256=G16H4A3IeoQmnOrYV4ueZGKSjhipXx8zc8nu9FGlvMA,92
+tico-0.1.0.dev250917.dist-info/entry_points.txt,sha256=kBKYSS_IYrSXmUYevmmepqIVPScq5vF8ulQRu3I_Zf0,59
+tico-0.1.0.dev250917.dist-info/top_level.txt,sha256=oqs7UPoNSKZEwqsX8B-KAWdQwfAa7i60pbxW_Jk7P3w,5
+tico-0.1.0.dev250917.dist-info/RECORD,,

{tico-0.1.0.dev250915.dist-info → tico-0.1.0.dev250917.dist-info}/LICENSE RENAMED Viewed

File without changes

{tico-0.1.0.dev250915.dist-info → tico-0.1.0.dev250917.dist-info}/WHEEL RENAMED Viewed

File without changes

{tico-0.1.0.dev250915.dist-info → tico-0.1.0.dev250917.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{tico-0.1.0.dev250915.dist-info → tico-0.1.0.dev250917.dist-info}/top_level.txt RENAMED Viewed

File without changes

tico 0.1.0.dev250915__py3-none-any.whl → 0.1.0.dev250917__py3-none-any.whl

Potentially problematic release.

tico 0.1.0.dev250915py3-none-any.whl → 0.1.0.dev250917py3-none-any.whl