PyPI - tico - Versions diffs - 0.1.0.dev250924__py3-none-any.whl → 0.1.0.dev251109__py3-none-any.whl - Mend

tico 0.1.0.dev250924py3-none-any.whl → 0.1.0.dev251109py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of tico might be problematic. Click here for more details.

Files changed (114) hide show

tico/{experimental/quantization/ptq → quantization/wrapq}/wrappers/nn/quant_linear.py RENAMED Viewed

@@ -17,13 +17,12 @@ from typing import Optional
 import torch.nn as nn
 import torch.nn.functional as F
-from tico.experimental.quantization.ptq.mode import Mode
-from tico.experimental.quantization.ptq.qscheme import QScheme
-from tico.experimental.quantization.ptq.quant_config import QuantConfig
-from tico.experimental.quantization.ptq.wrappers.quant_module_base import (
-    QuantModuleBase,
-)
-from tico.experimental.quantization.ptq.wrappers.registry import register
+from tico.quantization.config.ptq import PTQConfig
+from tico.quantization.wrapq.mode import Mode
+from tico.quantization.wrapq.qscheme import QScheme
+from tico.quantization.wrapq.wrappers.quant_module_base import QuantModuleBase
+from tico.quantization.wrapq.wrappers.registry import register
 @register(nn.Linear)
@@ -34,7 +33,7 @@ class QuantLinear(QuantModuleBase):
         self,
         fp: nn.Linear,
         *,
-        qcfg: Optional[QuantConfig] = None,
+        qcfg: Optional[PTQConfig] = None,
         fp_name: Optional[str] = None
     ):
         super().__init__(qcfg, fp_name=fp_name)

tico/{experimental/quantization/ptq → quantization/wrapq}/wrappers/nn/quant_silu.py RENAMED Viewed

@@ -17,18 +17,17 @@ from typing import Optional
 import torch
 import torch.nn as nn
-from tico.experimental.quantization.ptq.quant_config import QuantConfig
-from tico.experimental.quantization.ptq.wrappers.quant_module_base import (
-    QuantModuleBase,
-)
-from tico.experimental.quantization.ptq.wrappers.registry import register
+from tico.quantization.config.ptq import PTQConfig
+from tico.quantization.wrapq.wrappers.quant_module_base import QuantModuleBase
+from tico.quantization.wrapq.wrappers.registry import try_register
-@register(nn.SiLU)
+@try_register("torch.nn.SiLU", "transformers.activations.SiLUActivation")
 class QuantSiLU(QuantModuleBase):
     """
-    QuantSiLU — drop-in replacement for nn.SiLU that quantizes
-    both intermediate tensors:
+    QuantSiLU — drop-in quantized implementation of the SiLU operation.
+    This module quantizes both intermediate tensors:
         • s  = sigmoid(x)   (logistic)
         • y  = x * s        (mul)
     """
@@ -37,7 +36,7 @@ class QuantSiLU(QuantModuleBase):
         self,
         fp: nn.SiLU,
         *,
-        qcfg: Optional[QuantConfig] = None,
+        qcfg: Optional[PTQConfig] = None,
         fp_name: Optional[str] = None
     ):
         super().__init__(qcfg, fp_name=fp_name)

tico/{experimental/quantization/ptq → quantization/wrapq}/wrappers/ptq_wrapper.py RENAMED Viewed

@@ -16,11 +16,9 @@ from typing import Optional
 import torch
-from tico.experimental.quantization.ptq.quant_config import QuantConfig
-from tico.experimental.quantization.ptq.wrappers.quant_module_base import (
-    QuantModuleBase,
-)
-from tico.experimental.quantization.ptq.wrappers.registry import lookup
+from tico.quantization.config.ptq import PTQConfig
+from tico.quantization.wrapq.wrappers.quant_module_base import QuantModuleBase
+from tico.quantization.wrapq.wrappers.registry import lookup
 class PTQWrapper(QuantModuleBase):
@@ -34,7 +32,7 @@ class PTQWrapper(QuantModuleBase):
     def __init__(
         self,
         module: torch.nn.Module,
-        qcfg: Optional[QuantConfig] = None,
+        qcfg: Optional[PTQConfig] = None,
         *,
         fp_name: Optional[str] = None,
     ):

tico/{experimental/quantization/ptq → quantization/wrapq}/wrappers/quant_elementwise.py RENAMED Viewed

@@ -12,16 +12,14 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from typing import Callable, Optional
+from typing import Any, Optional
 import torch
 import torch.nn as nn
-from tico.experimental.quantization.ptq.quant_config import QuantConfig
-from tico.experimental.quantization.ptq.wrappers.quant_module_base import (
-    QuantModuleBase,
-)
-from tico.experimental.quantization.ptq.wrappers.registry import register
+from tico.quantization.config.ptq import PTQConfig
+from tico.quantization.wrapq.wrappers.quant_module_base import QuantModuleBase
+from tico.quantization.wrapq.wrappers.registry import register
 class QuantElementwise(QuantModuleBase):
@@ -33,7 +31,7 @@ class QuantElementwise(QuantModuleBase):
     """
     # subclass must set this
-    FUNC: Callable[[torch.Tensor], torch.Tensor] | None = None
+    FUNC: Any = None
     def __init_subclass__(cls, **kwargs):
         super().__init_subclass__(**kwargs)
@@ -48,7 +46,7 @@ class QuantElementwise(QuantModuleBase):
         self,
         fp_module: nn.Module,
         *,
-        qcfg: Optional[QuantConfig] = None,
+        qcfg: Optional[PTQConfig] = None,
         fp_name: Optional[str] = None,
     ):
         super().__init__(qcfg, fp_name=fp_name)
@@ -70,7 +68,7 @@ class QuantElementwise(QuantModuleBase):
 """
-Why `FUNC` is a `staticmethod`
+Q1) Why `FUNC` is a `staticmethod`
 - Prevents automatic binding: calling `self.FUNC(x)` will not inject `self`,
   so the callable keeps the expected signature `Tensor -> Tensor`
@@ -87,27 +85,67 @@ Why `FUNC` is a `staticmethod`
   than an `nn.Module` instance that would appear in the module tree.
 - Small perf/alloc win: no bound-method objects are created on each call.
+Q2) Why we define small Python wrappers (_relu, _tanh, etc.)
+- torch.relu / torch.tanh / torch.sigmoid are CPython built-ins.
+  Their type is `builtin_function_or_method`, not a Python `FunctionType`.
+  This causes `torch.export` (and FX tracing) to fail with:
+    "expected FunctionType, found builtin_function_or_method".
+- By defining a thin Python wrapper (e.g., `def _tanh(x): return torch.tanh(x)`),
+  we convert it into a normal Python function object (`FunctionType`),
+  which satisfies export/tracing requirements.
+- Functionally, this adds zero overhead and preserves semantics,
+  but makes the callable introspectable (has __code__, __name__, etc.)
+  and compatible with TorchDynamo / FX graph capture.
+- It also keeps FUNC pure and stateless, ensuring the elementwise op
+  is represented as `call_function(_tanh)` in the traced graph
+  rather than a bound `call_method` or module attribute access.
 """
-# Sigmoid
+def _relu(x: torch.Tensor) -> torch.Tensor:
+    return torch.relu(x)
+def _tanh(x: torch.Tensor) -> torch.Tensor:
+    return torch.tanh(x)
+def _sigmoid(x: torch.Tensor) -> torch.Tensor:
+    return torch.sigmoid(x)
+def _gelu(x: torch.Tensor) -> torch.Tensor:
+    return torch.nn.functional.gelu(x)
 @register(nn.Sigmoid)
 class QuantSigmoid(QuantElementwise):
-    FUNC = staticmethod(torch.sigmoid)
+    @staticmethod
+    def FUNC(x: torch.Tensor) -> torch.Tensor:
+        return _sigmoid(x)
-# Tanh
 @register(nn.Tanh)
 class QuantTanh(QuantElementwise):
-    FUNC = staticmethod(torch.tanh)
+    @staticmethod
+    def FUNC(x: torch.Tensor) -> torch.Tensor:
+        return _tanh(x)
-# ReLU
 @register(nn.ReLU)
 class QuantReLU(QuantElementwise):
-    FUNC = staticmethod(torch.relu)
+    @staticmethod
+    def FUNC(x: torch.Tensor) -> torch.Tensor:
+        return _relu(x)
-# GELU (approximate)
 @register(nn.GELU)
 class QuantGELU(QuantElementwise):
-    FUNC = staticmethod(torch.nn.functional.gelu)
+    @staticmethod
+    def FUNC(x: torch.Tensor) -> torch.Tensor:
+        return _gelu(x)

tico/{experimental/quantization/ptq → quantization/wrapq}/wrappers/quant_module_base.py RENAMED Viewed

@@ -17,9 +17,10 @@ from typing import Iterable, Optional, Tuple
 import torch.nn as nn
-from tico.experimental.quantization.ptq.mode import Mode
-from tico.experimental.quantization.ptq.observers.base import ObserverBase
-from tico.experimental.quantization.ptq.quant_config import QuantConfig
+from tico.quantization.config.ptq import PTQConfig
+from tico.quantization.wrapq.mode import Mode
+from tico.quantization.wrapq.observers.base import ObserverBase
 class QuantModuleBase(nn.Module, ABC):
@@ -29,7 +30,7 @@ class QuantModuleBase(nn.Module, ABC):
     Responsibilities
     ----------------
     • Own *one* Mode enum (`NO_QUANT / CALIB / QUANT`)
-    • Own a QuantConfig describing default / per-observer dtypes
+    • Own a PTQConfig describing default / per-observer dtypes
     • Expose a canonical lifecycle:
           enable_calibration()
           freeze_qparams()
@@ -38,10 +39,10 @@ class QuantModuleBase(nn.Module, ABC):
     """
     def __init__(
-        self, qcfg: Optional[QuantConfig] = None, *, fp_name: Optional[str] = None
+        self, qcfg: Optional[PTQConfig] = None, *, fp_name: Optional[str] = None
     ) -> None:
         super().__init__()
-        self.qcfg = qcfg or QuantConfig()
+        self.qcfg = qcfg or PTQConfig()
         self._mode: Mode = Mode.NO_QUANT  # default state
         self.fp_name = fp_name
@@ -118,9 +119,9 @@ class QuantModuleBase(nn.Module, ABC):
         Instantiate an observer named *name*.
         Precedence (3-tier) for keys:
-           • observer:  user > wrapper-default > QuantConfig.default_observer
-           • dtype:     user > wrapper-default > QuantConfig.default_dtype
-           • qscheme:   user > wrapper-default > QuantConfig.default_qscheme
+           • observer:  user > wrapper-default > PTQConfig.default_observer
+           • dtype:     user > wrapper-default > PTQConfig.default_dtype
+           • qscheme:   user > wrapper-default > PTQConfig.default_qscheme
         Other kwargs (e.g., qscheme, channel_axis, etc.) remain:
            user override > wrapper-default

tico/{experimental/quantization/ptq → quantization/wrapq}/wrappers/registry.py RENAMED Viewed

@@ -17,26 +17,27 @@ from typing import Callable, Dict, Type
 import torch.nn as nn
-from tico.experimental.quantization.ptq.wrappers.quant_module_base import (
-    QuantModuleBase,
-)
+from tico.quantization.wrapq.wrappers.quant_module_base import QuantModuleBase
 _WRAPPERS: Dict[Type[nn.Module], Type[QuantModuleBase]] = {}
 _IMPORT_ONCE = False
 _CORE_MODULES = (
-    "tico.experimental.quantization.ptq.wrappers.quant_elementwise",
-    "tico.experimental.quantization.ptq.wrappers.nn.quant_layernorm",
-    "tico.experimental.quantization.ptq.wrappers.nn.quant_linear",
-    "tico.experimental.quantization.ptq.wrappers.nn.quant_silu",
-    # llama
-    "tico.experimental.quantization.ptq.wrappers.llama.quant_attn",
-    "tico.experimental.quantization.ptq.wrappers.llama.quant_decoder_layer",
-    "tico.experimental.quantization.ptq.wrappers.llama.quant_mlp",
-    # fairseq
-    "tico.experimental.quantization.ptq.wrappers.fairseq.quant_decoder_layer",
-    "tico.experimental.quantization.ptq.wrappers.fairseq.quant_encoder",
-    "tico.experimental.quantization.ptq.wrappers.fairseq.quant_encoder_layer",
-    "tico.experimental.quantization.ptq.wrappers.fairseq.quant_mha",
+    "tico.quantization.wrapq.wrappers.quant_elementwise",
+    ## nn ##
+    "tico.quantization.wrapq.wrappers.nn.quant_layernorm",
+    "tico.quantization.wrapq.wrappers.nn.quant_linear",
+    # This includes not only `nn.SiLU` but also `SiLUActivation` from transformers
+    # as they are same operation.
+    "tico.quantization.wrapq.wrappers.nn.quant_silu",
+    ## llama ##
+    "tico.quantization.wrapq.wrappers.llama.quant_attn",
+    "tico.quantization.wrapq.wrappers.llama.quant_decoder_layer",
+    "tico.quantization.wrapq.wrappers.llama.quant_mlp",
+    ## fairseq ##
+    "tico.quantization.wrapq.wrappers.fairseq.quant_decoder_layer",
+    "tico.quantization.wrapq.wrappers.fairseq.quant_encoder",
+    "tico.quantization.wrapq.wrappers.fairseq.quant_encoder_layer",
+    "tico.quantization.wrapq.wrappers.fairseq.quant_mha",
     # add future core wrappers here
 )

tico/utils/convert.py CHANGED Viewed

@@ -20,20 +20,6 @@ import torch
 from torch.export import export, ExportedProgram
 from tico.config import CompileConfigBase, get_default_config
-from tico.experimental.quantization.passes.fold_quant_ops import FoldQuantOps
-from tico.experimental.quantization.passes.insert_quantize_on_dtype_mismatch import (
-    InsertQuantizeOnDtypeMismatch,
-)
-from tico.experimental.quantization.passes.propagate_qparam_backward import (
-    PropagateQParamBackward,
-)
-from tico.experimental.quantization.passes.propagate_qparam_forward import (
-    PropagateQParamForward,
-)
-from tico.experimental.quantization.passes.quantize_bias import QuantizeBias
-from tico.experimental.quantization.passes.remove_weight_dequant_op import (
-    RemoveWeightDequantOp,
-)
 from tico.passes.cast_aten_where_arg_type import CastATenWhereArgType
 from tico.passes.cast_clamp_mixed_type_args import CastClampMixedTypeArgs
 from tico.passes.cast_mixed_type_args import CastMixedTypeArgs
@@ -74,6 +60,14 @@ from tico.passes.remove_redundant_slice import RemoveRedundantSlice
 from tico.passes.remove_redundant_to_copy import RemoveRedundantToCopy
 from tico.passes.restore_linear import RestoreLinear
 from tico.passes.segment_index_select import SegmentIndexSelectConst
+from tico.quantization.passes.fold_quant_ops import FoldQuantOps
+from tico.quantization.passes.insert_quantize_on_dtype_mismatch import (
+    InsertQuantizeOnDtypeMismatch,
+)
+from tico.quantization.passes.propagate_qparam_backward import PropagateQParamBackward
+from tico.quantization.passes.propagate_qparam_forward import PropagateQParamForward
+from tico.quantization.passes.quantize_bias import QuantizeBias
+from tico.quantization.passes.remove_weight_dequant_op import RemoveWeightDequantOp
 from tico.serialize.circle_serializer import build_circle
 from tico.serialize.operators.node_visitor import get_support_targets
 from tico.utils import logging
@@ -143,6 +137,7 @@ def traced_run_decompositions(exported_program: ExportedProgram):
         or torch.__version__.startswith("2.7")
         or torch.__version__.startswith("2.8")
         or torch.__version__.startswith("2.9")
+        or torch.__version__.startswith("2.10")
     ):
         return run_decompositions(exported_program)
     else:

{tico-0.1.0.dev250924.dist-info → tico-0.1.0.dev251109.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: tico
-Version: 0.1.0.dev250924
+Version: 0.1.0.dev251109
 Summary: Convert exported Torch module to circle
 Home-page: UNKNOWN
 License: UNKNOWN
@@ -30,6 +30,7 @@ designed for optimized on-device neural network inference.
   - [From torch module](#from-torch-module)
   - [From .pt2](#from-pt2)
   - [Running circle models directly in Python](#running-circle-models-directly-in-python)
+  - [Quantization](#quantization)
 ### For Developers
@@ -68,7 +69,7 @@ This will generate `build` and `dist` directories in the root directory.
 **Available options**
 - `--dist` To install the package from .whl (without this option, _TICO_ is installed in an editable mode)
 - `--torch_ver <torch version>` To install a specific torch version (default: 2.6).
-  - Available <torch version>: 2.5, 2.6, 2.7, nightly
+  - Available <torch version>: 2.5, 2.6, 2.7, 2.8, nightly
 4. Now you can convert a torch module to a `.circle`.
@@ -188,6 +189,48 @@ circle_model(*example_inputs)
 # numpy.ndarray([2., 2., 2., 2.], dtype=float32)
 ```
+### Quantization
+The `tico.quantization` module provides a unified and modular interface for quantizing
+ large language models (LLMs) and other neural networks.
+It introduces a simple two-step workflow — **prepare** and **convert** — that
+ abstracts the details of different quantization algorithms.
+#### Basic Usage
+```python
+from tico.quantization import prepare, convert
+from tico.quantization.config.gptq import GPTQConfig
+import torch
+import torch.nn as nn
+class LinearModel(nn.Module):
+    def __init__(self):
+        super().__init__()
+        self.linear = nn.Linear(8, 8)
+    def forward(self, x):
+        return self.linear(x)
+model = LinearModel().eval()
+# 1. Prepare for quantization
+quant_config = GPTQConfig()
+prepared_model = prepare(model, quant_config)
+# 2. Calibration
+for d in dataset:
+    prepared_model(d)
+# 3. Apply GPTQ
+quantized_model = convert(prepared_model, quant_config)
+```
+For detailed documentation, design notes, and contributing guidelines,
+see [tico/quantization/README.md](./tico/quantization/README.md).
 ## For Developers
 ### Testing & Code Formatting
@@ -276,6 +319,9 @@ If you want to test them locally, you can do so by navigating to each model dire
 $ pip install -r test/modules/model/<model_name>/requirements.txt
 # Run test for a single model
 $ ./ccex test -m <model_name>
+# Run models whose names contain "Llama" (e.g., Llama, LlamaDecoderLayer, LlamaWithGQA, etc.)
+# Note that you should use quotes for the wildcard(*) pattern
+$ ./ccex test -m "Llama*"
 ```
 For example, to run a single model

tico 0.1.0.dev250924__py3-none-any.whl → 0.1.0.dev251109__py3-none-any.whl

Potentially problematic release.

tico 0.1.0.dev250924py3-none-any.whl → 0.1.0.dev251109py3-none-any.whl