PyPI - tico - Versions diffs - 0.1.0.dev250901__py3-none-any.whl → 0.1.0.dev250902__py3-none-any.whl - Mend

tico 0.1.0.dev250901py3-none-any.whl → 0.1.0.dev250902py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

tico/__init__.py CHANGED Viewed

@@ -29,7 +29,7 @@ __all__ = [
 ]
 # THIS LINE IS AUTOMATICALLY GENERATED BY setup.py
-__version__ = "0.1.0.dev250901"
+__version__ = "0.1.0.dev250902"
 MINIMUM_SUPPORTED_VERSION = "2.5.0"
 SECURE_TORCH_VERSION = "2.6.0"

tico/experimental/quantization/ptq/examples/compare_ppl.py ADDED Viewed

@@ -0,0 +1,121 @@
+# Copyright (c) 2025 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# =============================================================================
+# QUICK PTQ WORKFLOW (OPTIONAL FP32 BASELINE)
+# -----------------------------------------------------------------------------
+# Toggle RUN_FP to choose between:
+#   • FP32 perplexity measurement only, OR
+#   • Full post-training UINT-8 flow (wrap → calibrate → eval).
+# =============================================================================
+import torch
+import tqdm
+from datasets import load_dataset
+from transformers import AutoModelForCausalLM, AutoTokenizer
+from tico.experimental.quantization.ptq.quant_config import QuantConfig
+from tico.experimental.quantization.ptq.utils import perplexity
+from tico.experimental.quantization.ptq.wrappers.ptq_wrapper import PTQWrapper
+# -------------------------------------------------------------------------
+# 0. Global configuration
+# -------------------------------------------------------------------------
+MODEL_NAME = "meta-llama/Meta-Llama-3-1B"
+DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
+STRIDE = 512  # sliding-window stride for perplexity
+RUN_FP = True  # set False → run UINT-8 path
+# Token-budget presets for activation calibration
+TOKENS: dict[str, int] = {
+    # Smoke test (<1 min turnaround on CPU/GPU)
+    "debug": 2_000,  # ≈16 × 128-seq batches
+    # Good default for 1-7B models (≲3 % ppl delta)
+    "baseline": 50_000,
+    # Production / 4-bit observer smoothing
+    "production": 200_000,
+}
+CALIB_TOKENS = TOKENS["baseline"]
+print(f"Calibrating with {CALIB_TOKENS:,} tokens.\n")
+# -------------------------------------------------------------------------
+# 1. Load model
+# -------------------------------------------------------------------------
+tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
+if RUN_FP:
+    # -- FP32 baseline ------------------------------------------------------
+    print("Loading FP32 model …")
+    fp_model = AutoModelForCausalLM.from_pretrained(MODEL_NAME).to(DEVICE).eval()
+    fp_model.config.use_cache = False
+else:
+    # -- UINT-8 pipeline -----------------------------------------------------
+    print("Creating UINT-8 clone …")
+    uint8_model = AutoModelForCausalLM.from_pretrained(MODEL_NAME).to(DEVICE).eval()
+    uint8_model.config.use_cache = False
+    # ---------------------------------------------------------------------
+    # 2. Wrap every Transformer layer with PTQWrapper
+    # ---------------------------------------------------------------------
+    qcfg = QuantConfig()  # all-uint8 defaults
+    wrapped_layers = torch.nn.ModuleList()
+    for idx, layer in enumerate(uint8_model.model.layers):
+        layer_cfg = qcfg.child(f"layer{idx}")
+        wrapped_layers.append(PTQWrapper(layer, qcfg=layer_cfg))
+    uint8_model.model.layers = wrapped_layers
+    # ---------------------------------------------------------------------
+    # 3. Single-pass activation calibration
+    # ---------------------------------------------------------------------
+    print("Calibrating UINT-8 observers …")
+    calib_txt = " ".join(
+        load_dataset("wikitext", "wikitext-2-raw-v1", split="train")["text"]
+    )[:CALIB_TOKENS]
+    ids = tokenizer(calib_txt, return_tensors="pt").input_ids.to(DEVICE)
+    # (a) switch every QuantModuleBase to CALIB mode
+    for l in uint8_model.model.layers:
+        l.enable_calibration()
+    # (b) run inference to collect ranges
+    with torch.no_grad():
+        for i in tqdm.trange(0, ids.size(1) - 1, STRIDE, desc="Calibration"):
+            uint8_model(ids[:, i : i + STRIDE])
+    # (c) freeze (scale, zero-point)
+    for l in uint8_model.model.layers:
+        l.freeze_qparams()
+# -------------------------------------------------------------------------
+# 4. Evaluate perplexity on Wikitext-2
+# -------------------------------------------------------------------------
+print("\nCalculating perplexities …")
+test_ds = load_dataset("wikitext", "wikitext-2-raw-v1", split="test")
+enc = tokenizer("\n\n".join(test_ds["text"]), return_tensors="pt")
+if RUN_FP:
+    ppl_fp = perplexity(fp_model, enc, DEVICE, stride=STRIDE)
+else:
+    ppl_int8 = perplexity(uint8_model, enc, DEVICE, stride=STRIDE)
+# -------------------------------------------------------------------------
+# 5. Report
+# -------------------------------------------------------------------------
+print("\n┌── Wikitext-2 test perplexity ─────────────")
+if RUN_FP:
+    print(f"│ FP32  : {ppl_fp:8.2f}")
+else:
+    print(f"│ UINT-8 : {ppl_int8:8.2f}")
+print("└───────────────────────────────────────────")

tico/experimental/quantization/ptq/utils/__init__.py CHANGED Viewed

@@ -1,5 +1,7 @@
+from tico.experimental.quantization.ptq.utils.metrics import perplexity
 from tico.experimental.quantization.ptq.utils.reduce_utils import channelwise_minmax
 __all__ = [
     "channelwise_minmax",
+    "perplexity",
 ]

tico/experimental/quantization/ptq/utils/metrics.py ADDED Viewed

@@ -0,0 +1,123 @@
+# Copyright (c) 2025 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from typing import Optional
+import torch
+import tqdm
+def perplexity(
+    model: torch.nn.Module,
+    encodings: torch.Tensor,
+    device: torch.device | str,
+    *,
+    max_length: Optional[int] = None,
+    stride: int = 512,
+    ignore_index: int | None = -100,
+    show_progress: bool = True,
+) -> float:
+    """
+    Compute perplexity (PPL) using a "strided sliding-window"
+     evaluation strategy.
+    The function:
+    1. Splits the token sequence into overlapping windows of length
+       `max_length` (model context size).
+    2. Masks tokens that were already scored in previous windows
+       (`labels == -100`), so each token's negative log-likelihood (NLL)
+       is counted EXACTLY once.
+    3. Aggregates token-wise NLL to return corpus-level PPL.
+    Parameters
+    ----------
+    model : torch.nn.Module
+        Causal LM loaded in evaluation mode (`model.eval()`).
+    encodings : torch.Tensor | transformers.BatchEncoding
+        Tokenised corpus.  If a `BatchEncoding` is passed, its
+        `.input_ids` field is used.  Shape must be `(1, seq_len)`.
+    device : torch.device | str
+        CUDA or CPU device on which to run evaluation.
+    max_length : int, optional
+        Context window size.  Defaults to `model.config.max_position_embeddings`.
+    stride : int, default = 512
+        Step size by which the sliding window advances.  Must satisfy
+        `1 ≤ stride ≤ max_length`.
+    ignore_index : int, default = -100
+        Label value to ignore in loss computation. This should match
+        the `ignore_index` used by the model's internal
+        `CrossEntropyLoss`. For Hugging Face causal LMs, the
+        convention is `-100`.
+    show_progress : bool, default = True
+        If True, displays a tqdm progess bar while evaluating.
+    Returns
+    -------
+    float
+        Corpus-level perplexity.
+    """
+    # -------- input preparation -------- #
+    try:
+        # transformers.BatchEncoding has `input_ids`
+        input_ids_full = encodings.input_ids  # type: ignore[attr-defined]
+    except AttributeError:  # already a tensor
+        input_ids_full = encodings
+    assert isinstance(input_ids_full, torch.Tensor)
+    input_ids_full = input_ids_full.to(device)
+    if max_length is None:
+        assert hasattr(model, "config")
+        assert hasattr(model.config, "max_position_embeddings")
+        assert isinstance(model.config.max_position_embeddings, int)
+        max_length = model.config.max_position_embeddings
+    assert max_length is not None
+    assert (
+        1 <= stride <= max_length
+    ), f"stride ({stride}) must be in [1, max_length ({max_length})]"
+    seq_len = input_ids_full.size(1)
+    nll_sum = 0.0
+    n_tokens = 0
+    prev_end = 0
+    # -------- main loop -------- #
+    for begin in tqdm.trange(0, seq_len, stride, desc="PPL", disable=not show_progress):
+        end = min(begin + max_length, seq_len)
+        trg_len = end - prev_end  # fresh tokens in this window
+        input_ids = input_ids_full[:, begin:end]
+        target_ids = input_ids.clone()
+        target_ids[:, :-trg_len] = ignore_index  # mask previously-scored tokens
+        with torch.no_grad():
+            outputs = model(input_ids, labels=target_ids)
+            # loss is already averaged over non-masked labels
+            neg_log_likelihood = outputs.loss
+        # exact number of labels that contributed to loss
+        loss_tokens = (target_ids[:, 1:] != ignore_index).sum().item()
+        nll_sum += neg_log_likelihood * loss_tokens
+        n_tokens += int(loss_tokens)
+        prev_end = end
+        if end == seq_len:
+            break
+    avg_nll: float | torch.Tensor = nll_sum / n_tokens
+    if not isinstance(avg_nll, torch.Tensor):
+        avg_nll = torch.tensor(avg_nll)
+    assert isinstance(avg_nll, torch.Tensor)
+    ppl = torch.exp(avg_nll)
+    return ppl.item()

tico/passes/remove_redundant_assert_nodes.py CHANGED Viewed

@@ -21,7 +21,9 @@ from tico.utils.utils import is_target_node
 assert_node_targets = [
+    torch.ops.aten._assert_scalar.default,
     torch.ops.aten._assert_tensor_metadata.default,
+    torch.ops.aten.sym_constrain_range_for_size.default,  # Related to symbolic shape validation
 ]
@@ -29,7 +31,7 @@ assert_node_targets = [
 class RemoveRedundantAssertionNodes(PassBase):
     """
     This removes redundant assertion nodes.
-    - `aten.assert_tensor_meta.default`
+    When assertion node is erased, related comparison nodes are also removed by graph.eliminate_dead_code().
     """
     def __init__(self):

{tico-0.1.0.dev250901.dist-info → tico-0.1.0.dev250902.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: tico
-Version: 0.1.0.dev250901
+Version: 0.1.0.dev250902
 Summary: Convert exported Torch module to circle
 Home-page: UNKNOWN
 License: UNKNOWN

{tico-0.1.0.dev250901.dist-info → tico-0.1.0.dev250902.dist-info}/RECORD RENAMED Viewed

@@ -1,4 +1,4 @@
-tico/__init__.py,sha256=MgvCVXWMpNL2dxPn54C8fdQaTJPdtHivhuNHH4qN5R8,1883
+tico/__init__.py,sha256=PXZzhb0ZexNIwGhVJpg4Ln_RqskbSIMigqj0GdZgbeA,1883
 tico/pt2_to_circle.py,sha256=gu3MD4Iqc0zMZcCZ2IT8oGbyj21CTSbT3Rgd9s2B_9A,2767
 tico/config/__init__.py,sha256=xZzCXjZ84qE-CsBi-dfaL05bqpQ3stKKfTXhnrJRyVs,142
 tico/config/base.py,sha256=q5xMqGxTUZs4mFqt5c7i_y9U00fYgdMGl9nUqIVMlCo,1248
@@ -62,6 +62,7 @@ tico/experimental/quantization/ptq/mode.py,sha256=lT-T8vIv8YWcwrjT7xXVhOw1g7aoAd
 tico/experimental/quantization/ptq/qscheme.py,sha256=uwhv7bCxOOXB3I-IKlRyr_u4eXOq48uIqGy4TLDqGxY,1301
 tico/experimental/quantization/ptq/quant_config.py,sha256=nm7570Y1X2mOT_8s27ilWid04otor6cVTi9GwgAEaKc,4300
 tico/experimental/quantization/ptq/examples/__init__.py,sha256=IO6FP_xYbGy0dW0HL26GXD3ouxARaxCK7bz9dn4blPQ,26
+tico/experimental/quantization/ptq/examples/compare_ppl.py,sha256=ODaRB234iy2dFfGIBd-OtKxdSzxnIbgKZkQ_o30tSts,5287
 tico/experimental/quantization/ptq/examples/quantize_linear.py,sha256=8zq-ZJDYgam0xQ-PbC6Xb1I7W1mv0Wi-b--IP2wwXtw,4539
 tico/experimental/quantization/ptq/examples/quantize_llama_attn.py,sha256=cVWUSSzaZWFp5QZkNkrlpHU3kXyP84QtnZbahVml_yQ,4329
 tico/experimental/quantization/ptq/examples/quantize_llama_decoder_layer.py,sha256=mBWrjkyEovYQsPC4Rrsri6Pm1rlFmDb3NiP0DQQhFyM,5751
@@ -73,7 +74,8 @@ tico/experimental/quantization/ptq/observers/ema.py,sha256=MAMdBmjVNMg_vsqXrcBzb
 tico/experimental/quantization/ptq/observers/identity.py,sha256=vkec8Or-7VwM4zkFEvEKROQJk8XEHMVX8mBNDnxSyS8,2591
 tico/experimental/quantization/ptq/observers/minmax.py,sha256=mLHkwIzWFzQXev7EU7w1333KckwRjukc3_cUPJOnUfs,1486
 tico/experimental/quantization/ptq/observers/mx.py,sha256=aP4qmBgeiRIYZJksShN5gs6UyYOFi2-Sbk5k5xvPQ4w,1863
-tico/experimental/quantization/ptq/utils/__init__.py,sha256=PL9IZgiWoMtsXVljeOy7KymmLVP238SXEFRLXYK72WQ,126
+tico/experimental/quantization/ptq/utils/__init__.py,sha256=MrQwMbbKS0dJrO8jsceCai4Z59iKQNpTPZND3GN6TrM,216
+tico/experimental/quantization/ptq/utils/metrics.py,sha256=EW_FQmJrl9Y4esspZQ0GHfJ58RwuJUz0l8IfYq3NWY4,4461
 tico/experimental/quantization/ptq/utils/reduce_utils.py,sha256=3kWawLB91EcvvHlCrNqqfZF7tpgr22htBSA049mKw_4,973
 tico/experimental/quantization/ptq/wrappers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 tico/experimental/quantization/ptq/wrappers/ptq_wrapper.py,sha256=F9sK_DiRaXiGNHULcwIbs5EUtHz6ZJ7N4r5CWTTfhsM,2442
@@ -119,7 +121,7 @@ tico/passes/lower_to_slice.py,sha256=OzlFzK3lBYyYwC3WThsWd94Ob4JINIJF8UaLAtnumzU
 tico/passes/merge_consecutive_cat.py,sha256=ayZNLDA1DFM7Fxxi2Dmk1CujkgUuaVCH1rhQgLrvvOQ,2701
 tico/passes/ops.py,sha256=cSj3Sk2x2cOE9b8oU5pmSa_rHr-iX2lORzu3N_UHMSQ,2967
 tico/passes/remove_nop.py,sha256=Hf91p_EJAOC6DyWNthash0_UWtEcNc_M7znamQfYQ5Y,2686
-tico/passes/remove_redundant_assert_nodes.py,sha256=IONd3xBy6I8tH6_Y1eN3_eCHH7WTC8soBgjXzOju9cQ,1612
+tico/passes/remove_redundant_assert_nodes.py,sha256=rYbTCyuNIXIC-2NreHKBVCuaSUkEQvB_iSRzb26P_EA,1821
 tico/passes/remove_redundant_expand.py,sha256=auyqIoQT4HJhiJfuUe6BrEtUhvz221ohnIK5EuszWeg,2112
 tico/passes/remove_redundant_permute.py,sha256=98UsaZzFZdQzEEAR1pIzRisAf6hgfXLa88aayjalt3E,4292
 tico/passes/remove_redundant_reshape.py,sha256=aeep6LDvY58GEuOrWckkEXnJa6wkkbiJ9FrimT9F3-s,16384
@@ -242,9 +244,9 @@ tico/utils/mx/__init__.py,sha256=IO6FP_xYbGy0dW0HL26GXD3ouxARaxCK7bz9dn4blPQ,26
 tico/utils/mx/elemwise_ops.py,sha256=V6glyAHsVR1joqpsgnNytatCD_ew92xNWZ19UFDoMTA,10281
 tico/utils/mx/formats.py,sha256=uzNWyu-1onUlwQfX5cZ6fZSUfHMRqorper7_T1k3jfk,3404
 tico/utils/mx/mx_ops.py,sha256=RcfUTYVi-wilGB2sC35OeARdwDqnixv7dG5iyZ-fQT8,8555
-tico-0.1.0.dev250901.dist-info/LICENSE,sha256=kp4JLII7bzRhPb0CPD5XTDZMh22BQ7h3k3B7t8TiSbw,12644
-tico-0.1.0.dev250901.dist-info/METADATA,sha256=LMgoYoHYFT8cJU9VNYiiX89tMSxEX30x17x_6eWAr4o,8450
-tico-0.1.0.dev250901.dist-info/WHEEL,sha256=G16H4A3IeoQmnOrYV4ueZGKSjhipXx8zc8nu9FGlvMA,92
-tico-0.1.0.dev250901.dist-info/entry_points.txt,sha256=kBKYSS_IYrSXmUYevmmepqIVPScq5vF8ulQRu3I_Zf0,59
-tico-0.1.0.dev250901.dist-info/top_level.txt,sha256=oqs7UPoNSKZEwqsX8B-KAWdQwfAa7i60pbxW_Jk7P3w,5
-tico-0.1.0.dev250901.dist-info/RECORD,,
+tico-0.1.0.dev250902.dist-info/LICENSE,sha256=kp4JLII7bzRhPb0CPD5XTDZMh22BQ7h3k3B7t8TiSbw,12644
+tico-0.1.0.dev250902.dist-info/METADATA,sha256=CePT5yw5-ln0-Ct8n61iGDnFfnoASlqAfPQmxRQ9QQ0,8450
+tico-0.1.0.dev250902.dist-info/WHEEL,sha256=G16H4A3IeoQmnOrYV4ueZGKSjhipXx8zc8nu9FGlvMA,92
+tico-0.1.0.dev250902.dist-info/entry_points.txt,sha256=kBKYSS_IYrSXmUYevmmepqIVPScq5vF8ulQRu3I_Zf0,59
+tico-0.1.0.dev250902.dist-info/top_level.txt,sha256=oqs7UPoNSKZEwqsX8B-KAWdQwfAa7i60pbxW_Jk7P3w,5
+tico-0.1.0.dev250902.dist-info/RECORD,,

{tico-0.1.0.dev250901.dist-info → tico-0.1.0.dev250902.dist-info}/LICENSE RENAMED Viewed

File without changes

{tico-0.1.0.dev250901.dist-info → tico-0.1.0.dev250902.dist-info}/WHEEL RENAMED Viewed

File without changes

{tico-0.1.0.dev250901.dist-info → tico-0.1.0.dev250902.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{tico-0.1.0.dev250901.dist-info → tico-0.1.0.dev250902.dist-info}/top_level.txt RENAMED Viewed

File without changes

tico 0.1.0.dev250901__py3-none-any.whl → 0.1.0.dev250902__py3-none-any.whl

tico 0.1.0.dev250901py3-none-any.whl → 0.1.0.dev250902py3-none-any.whl