PyPI - tico - Versions diffs - 0.1.0.dev251106__py3-none-any.whl → 0.2.0.dev260122__py3-none-any.whl - Mend

tico 0.1.0.dev251106py3-none-any.whl → 0.2.0.dev260122py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (56) hide show

tico/__init__.py +2 -2
tico/_version.py +1 -0
tico/passes/convert_conv3d_to_conv2d.py +435 -0
tico/passes/convert_sym_size_to_circle_shape.py +99 -0
tico/passes/decompose_batch_norm.py +9 -5
tico/passes/lower_copy.py +95 -0
tico/passes/ops.py +4 -0
tico/quantization/algorithm/fpi_gptq/fpi_gptq.py +251 -0
tico/quantization/algorithm/fpi_gptq/quantizer.py +180 -0
tico/quantization/algorithm/gptq/gptq.py +231 -11
tico/quantization/algorithm/gptq/quantizer.py +18 -6
tico/quantization/config/{pt2e.py → fpi_gptq.py} +11 -4
tico/quantization/config/gptq.py +27 -4
tico/quantization/public_interface.py +0 -10
tico/quantization/wrapq/quantizer.py +2 -0
tico/quantization/wrapq/wrappers/quant_elementwise.py +51 -11
tico/serialize/operators/adapters/onert/llama_attention.py +51 -0
tico/serialize/operators/op_attention.py +58 -0
tico/serialize/operators/op_circle_shape.py +64 -0
tico/serialize/operators/op_dequantize_per_channel.py +1 -0
tico/serialize/operators/op_dequantize_per_tensor.py +1 -0
tico/serialize/operators/op_transpose_conv.py +66 -50
tico/utils/convert.py +16 -1
tico/utils/padding.py +13 -5
tico/utils/record_input.py +2 -2
tico/utils/register_custom_op.py +63 -0
tico/utils/validate_args_kwargs.py +49 -4
tico-0.2.0.dev260122.dist-info/METADATA +631 -0
{tico-0.1.0.dev251106.dist-info → tico-0.2.0.dev260122.dist-info}/RECORD +35 -46
{tico-0.1.0.dev251106.dist-info → tico-0.2.0.dev260122.dist-info}/WHEEL +1 -1
{tico-0.1.0.dev251106.dist-info → tico-0.2.0.dev260122.dist-info}/entry_points.txt +0 -1
tico/quantization/algorithm/pt2e/annotation/annotator.py +0 -208
tico/quantization/algorithm/pt2e/annotation/config.py +0 -26
tico/quantization/algorithm/pt2e/annotation/op/__init__.py +0 -21
tico/quantization/algorithm/pt2e/annotation/op/adaptive_avg_pool2d.py +0 -63
tico/quantization/algorithm/pt2e/annotation/op/add.py +0 -55
tico/quantization/algorithm/pt2e/annotation/op/conv2d.py +0 -90
tico/quantization/algorithm/pt2e/annotation/op/div.py +0 -55
tico/quantization/algorithm/pt2e/annotation/op/linear.py +0 -92
tico/quantization/algorithm/pt2e/annotation/op/mean.py +0 -51
tico/quantization/algorithm/pt2e/annotation/op/mul.py +0 -55
tico/quantization/algorithm/pt2e/annotation/op/relu6.py +0 -51
tico/quantization/algorithm/pt2e/annotation/op/rsqrt.py +0 -51
tico/quantization/algorithm/pt2e/annotation/op/sub.py +0 -55
tico/quantization/algorithm/pt2e/annotation/spec.py +0 -45
tico/quantization/algorithm/pt2e/annotation/utils.py +0 -88
tico/quantization/algorithm/pt2e/quantizer.py +0 -81
tico/quantization/algorithm/pt2e/transformation/__init__.py +0 -1
tico/quantization/algorithm/pt2e/transformation/convert_scalars_to_attrs.py +0 -58
tico/quantization/algorithm/pt2e/utils.py +0 -135
tico/serialize/operators/op_copy.py +0 -187
tico-0.1.0.dev251106.dist-info/METADATA +0 -392
/tico/quantization/algorithm/{pt2e → fpi_gptq}/__init__.py +0 -0
/tico/{quantization/algorithm/pt2e/annotation → serialize/operators/adapters/onert}/__init__.py +0 -0
{tico-0.1.0.dev251106.dist-info → tico-0.2.0.dev260122.dist-info/licenses}/LICENSE +0 -0
{tico-0.1.0.dev251106.dist-info → tico-0.2.0.dev260122.dist-info}/top_level.txt +0 -0

tico/quantization/algorithm/gptq/gptq.py CHANGED Viewed

@@ -31,16 +31,147 @@ torch.backends.cuda.matmul.allow_tf32 = False
 torch.backends.cudnn.allow_tf32 = False
+def convtranspose2d_weights_to_conv2d_weights(layer, w) -> torch.Tensor:
+    if layer.groups == 1:
+        # the last two dimensions of w is (k_h, k_w) to get equivalent Conv2D we need to flip them to get `w_conv2D_equivalent_to_w[i, j] = w_conv[k_h - i - 1, k_w - j - 1]`
+        # the first two dimensions of w is (input_channels, output_channels), so we need to transpose them as Conv2D weights should be in the (output_channels, input_channels) form
+        # please see https://github.com/pytorch/pytorch/blob/d38164a545b4a4e4e0cf73ce67173f70574890b6/torch/nn/modules/conv.py#L1059-L1061 for additional info
+        w_conv_transposed = w.transpose(1, 0).flip((-2, -1))
+    else:
+        # basically it's the same as for `layer.groups == 1` but groupwise
+        in_channels, out_channels, kernel_h, kernel_w = layer.weight.shape
+        out_channels *= layer.groups
+        w_conv_transposed = torch.zeros(
+            out_channels, in_channels // layer.groups, kernel_h, kernel_w
+        )
+        for i in range(0, layer.groups):
+            w_conv_transposed[
+                i
+                * out_channels
+                // layer.groups : (i + 1)
+                * out_channels
+                // layer.groups,
+                :,
+                :,
+                :,
+            ] = (
+                w[
+                    i
+                    * in_channels
+                    // layer.groups : (i + 1)
+                    * in_channels
+                    // layer.groups,
+                    :,
+                    :,
+                    :,
+                ]
+                .transpose(1, 0)
+                .flip((-2, -1))
+            )
+    return w_conv_transposed
+def conv2d_weights_to_convtranspose2d_weights(orig_layer, w) -> torch.Tensor:
+    # this is just an inverse of convtranspose2d_weights_to_conv2d_weights
+    if orig_layer.groups > 1:
+        in_channels, out_channels, _, _ = orig_layer.weight.shape
+        out_channels *= orig_layer.groups
+        w_conv_transposed = torch.zeros_like(orig_layer.weight)
+        for i in range(0, orig_layer.groups):
+            w_conv_transposed[
+                i
+                * in_channels
+                // orig_layer.groups : (i + 1)
+                * in_channels
+                // orig_layer.groups,
+                :,
+                :,
+                :,
+            ] = (
+                w[
+                    i
+                    * out_channels
+                    // orig_layer.groups : (i + 1)
+                    * out_channels
+                    // orig_layer.groups,
+                    :,
+                    :,
+                    :,
+                ]
+                .transpose(1, 0)
+                .flip((-2, -1))
+            )
+    else:
+        w_conv_transposed = w.transpose(1, 0).flip((-2, -1))
+    return w_conv_transposed
+def get_matmul_input_for_convtranspose2d(layer, inp):
+    # Please see https://github.com/pytorch/pytorch/blob/d38164a545b4a4e4e0cf73ce67173f70574890b6/torch/nn/modules/conv.py#L996-L998 for padding
+    strided_pad = (
+        layer.dilation[0] * (layer.kernel_size[0] - 1) - layer.padding[0],
+        layer.dilation[1] * (layer.kernel_size[1] - 1) - layer.padding[1],
+    )
+    # interleave input with zero rows and columns according to stride
+    # Please see https://github.com/pytorch/pytorch/blob/d38164a545b4a4e4e0cf73ce67173f70574890b6/torch/nn/modules/conv.py#L991-L994 for more info
+    inp_strided = torch.zeros(
+        inp.shape[0],
+        inp.shape[1],
+        layer.stride[0] * (inp.shape[2] - 1) + 2 * strided_pad[0] + 1,
+        layer.stride[1] * (inp.shape[3] - 1) + 2 * strided_pad[1] + 1,
+        device=inp.device,
+    )
+    indices = torch.arange(0, inp.shape[2], device=inp.device)
+    # insert original input values according to stride to meet https://github.com/pytorch/pytorch/blob/d38164a545b4a4e4e0cf73ce67173f70574890b6/torch/nn/modules/conv.py#L991-L994
+    inp_strided[
+        :,
+        :,
+        layer.stride[0] * indices + strided_pad[0],
+        strided_pad[1] : -strided_pad[1] : layer.stride[1],
+    ] = inp[:, :, indices, :]
+    del inp
+    inp = (
+        inp_strided  # so the rest is just processing for Conv2D with transposed weights
+    )
+    # TODO reduce code duplication with Conv2D
+    unfold = nn.Unfold(
+        layer.kernel_size,
+        dilation=layer.dilation,
+        padding=(
+            0,
+            0,
+        ),  # equivalent Conv2D has (0, 0) padding for input_strided as input
+        stride=(1, 1),  # equivalent Conv2D has (1, 1) stride for input_strided as input
+    )
+    if layer.groups != 1:
+        inp = inp.reshape(
+            inp.size(0) * layer.groups,
+            inp.size(1) // layer.groups,
+            inp.shape[2],
+            inp.shape[3],
+        )  # inp.shape == (batch*groups, in_channels / groups, H, W) to meet Groupwise-wise Convolution, so that each group is colvolved with its own filter
+    inp = unfold(inp).permute([1, 0, 2]).flatten(1)
+    return inp
 class GPTQ:
     def __init__(self, layer):
         self.layer = layer
         self.dev = self.layer.weight.device
         W = layer.weight.data.clone()
-        if isinstance(self.layer, nn.Conv2d):
+        if isinstance(self.layer, nn.Conv2d) or isinstance(self.layer, nn.Conv1d):
+            W = W.flatten(1)  # reshaped to matrix (OUT_channels x the_rest)
+        elif isinstance(self.layer, nn.ConvTranspose2d):
+            W = convtranspose2d_weights_to_conv2d_weights(self.layer, W)
             W = W.flatten(1)
-        if isinstance(self.layer, nn.Conv1d):
-            W = W.t()
         self.rows = W.shape[0]
         self.columns = W.shape[1]
         self.H: Optional[torch.Tensor] = torch.zeros(
@@ -53,7 +184,7 @@ class GPTQ:
         if len(inp.shape) == 2:
             inp = inp.unsqueeze(0)
         tmp = inp.shape[0]
-        if isinstance(self.layer, nn.Linear) or isinstance(self.layer, nn.Conv1d):
+        if isinstance(self.layer, nn.Linear):
             if len(inp.shape) > 2:
                 inp = inp.reshape((-1, inp.shape[-1]))
             inp = inp.t()
@@ -65,10 +196,59 @@ class GPTQ:
                 stride=self.layer.stride,
             )
+            if self.layer.groups != 1:
+                # the idea behind conversion of depthwise convolution to matmul is described here
+                # https://discuss.pytorch.org/t/conv1d-implementation-using-torch-nn-functional-unfold/109643/2
+                # although depthwise convolution is equal to a set of MatMuls
+                # (please note `w.view(1, groups, out_channels // groups, -1)` in the reference above is not just w.flatten(1))
+                # we can approximate groupwise Hessians with their mean
+                # so that we will have just a single Hessian and the usual GPTQ applies
+                inp = inp.reshape(
+                    inp.size(0) * self.layer.groups,
+                    inp.size(1) // self.layer.groups,
+                    inp.shape[2],
+                    inp.shape[3],
+                )  # inp.shape == (batch*groups, in_channels / groups, H, W) to meet Groupwise-wise Convolution, so that each group is colvolved with its own filter
+            inp = unfold(
+                inp
+            )  # inp.shape == (batch*groups, k_h*k_w*in_channels / groups, flattened_patches)
+            inp = inp.permute(
+                [1, 0, 2]
+            )  # inp.shape == (k_h*k_w*in_channels / groups, batch * groups, flattened_patches)
+            inp = inp.flatten(
+                1
+            )  # inp.shape == (k_h*k_w*in_channels / groups, batch * groups * flattened_patches)
+            # so inp.matmul(inp.t()).shape == (k_x*k_y*in_channels / groups, k_x*k_y*in_channels / groups) == W.flatten(1)
+        if isinstance(self.layer, nn.Conv1d):
+            # nn.Conv1d is basically the same as nn.Conv2d so we can use the same idea as for nn.Conv2d
+            # TODO reduce code duplication
+            # represent conv1d as conv2d(1, k) on reshaped_input(batch, in_channels, 1, L)
+            unfold = nn.Unfold(
+                (1, self.layer.kernel_size[0]),
+                dilation=(1, self.layer.dilation[0]),
+                padding=(0, self.layer.padding[0]),
+                stride=(1, self.layer.stride[0]),
+            )
+            if self.layer.groups != 1:
+                # please see Conv2D for additional info
+                inp = inp.reshape(
+                    inp.size(0) * self.layer.groups,
+                    inp.size(1) // self.layer.groups,
+                    inp.shape[2],
+                )  # inp.shape == (batch*groups, in_channels / groups, L) to meet Groupwise-wise Convolution, so that each group is colvolved with its own filter
+            inp = inp.unsqueeze(
+                -2
+            )  # (batch*groups, in_channels / groups, L)->(batch*groups, in_channels / groups, 1, L), valid for Conv2D
             inp = unfold(inp)
             inp = inp.permute([1, 0, 2])
             inp = inp.flatten(1)
+        if isinstance(self.layer, nn.ConvTranspose2d):
+            inp = get_matmul_input_for_convtranspose2d(self.layer, inp)
         self.H *= self.nsamples / (self.nsamples + tmp)
         self.nsamples += tmp
         inp = math.sqrt(2 / self.nsamples) * inp.float()
@@ -84,10 +264,13 @@ class GPTQ:
         verbose=False,
     ):
         W = self.layer.weight.data.clone()
-        if isinstance(self.layer, nn.Conv2d):
-            W = W.flatten(1)
-        if isinstance(self.layer, nn.Conv1d):
-            W = W.t()
+        if isinstance(self.layer, nn.Conv2d) or isinstance(self.layer, nn.Conv1d):
+            W = W.flatten(1)  # reshaped to matrix (OUT_channels x the_rest)
+        elif isinstance(self.layer, nn.ConvTranspose2d):
+            W = convtranspose2d_weights_to_conv2d_weights(self.layer, W)
+            conv2d_shape = W.shape
+            W = W.flatten(1)  # reshaped to matrix (OUT_channels x the_rest)
         W = W.float()
         tick = time.time()
         if not self.quantizer.ready():
@@ -181,9 +364,46 @@ class GPTQ:
         if actorder:
             Q = Q[:, invperm]
-        self.layer.weight.data = Q.reshape(self.layer.weight.shape).to(
-            self.layer.weight.data.dtype
-        )
+        if isinstance(self.layer, nn.Conv2d) or isinstance(self.layer, nn.Conv1d):
+            if groupsize == -1:  # TODO support groupsize != -1
+                Q[:, dead] = quantize(
+                    self.layer.weight.flatten(1)[:, dead],
+                    self.quantizer.scale,
+                    self.quantizer.zero,
+                    self.quantizer.maxq,
+                )
+        elif isinstance(self.layer, nn.ConvTranspose2d):
+            if groupsize == -1:  # TODO support groupsize != -1
+                Q[:, dead] = quantize(
+                    convtranspose2d_weights_to_conv2d_weights(
+                        self.layer, self.layer.weight.data
+                    ).flatten(1)[:, dead],
+                    self.quantizer.scale,
+                    self.quantizer.zero,
+                    self.quantizer.maxq,
+                )
+        else:
+            if groupsize == -1:  # TODO support groupsize != -1
+                Q[:, dead] = quantize(
+                    self.layer.weight[:, dead],
+                    self.quantizer.scale,
+                    self.quantizer.zero,
+                    self.quantizer.maxq,
+                )
+        assert (
+            groupsize == -1 or torch.sum(dead) == 0
+        )  # TODO `dead` elements should be RTN quantized for groupwise
+        if isinstance(self.layer, nn.ConvTranspose2d):
+            Q_conv2d = Q.reshape(conv2d_shape).to(self.layer.weight.data.dtype)
+            self.layer.weight.data = conv2d_weights_to_convtranspose2d_weights(
+                self.layer, Q_conv2d
+            )
+        else:
+            self.layer.weight.data = Q.reshape(self.layer.weight.shape).to(
+                self.layer.weight.data.dtype
+            )
     def free(self):
         self.H = None

tico/quantization/algorithm/gptq/quantizer.py CHANGED Viewed

@@ -170,6 +170,7 @@ class GPTQQuantizer(BaseQuantizer):
         gptq_conf = self.config
         assert isinstance(gptq_conf, GPTQConfig)
+        gptq_conf.validate()
         # Disable use_cache during calibration
         if hasattr(model, "config") and hasattr(model.config, "use_cache"):
             orig_use_cache = model.config.use_cache
@@ -193,7 +194,15 @@ class GPTQQuantizer(BaseQuantizer):
             )
         ):
             # 1) Identify quantizable submodules within the layer
-            full = find_layers(layer)
+            full = find_layers(
+                layer,
+                layers=[
+                    torch.nn.Linear,
+                    torch.nn.Conv2d,
+                    torch.nn.Conv1d,
+                    torch.nn.ConvTranspose2d,
+                ],
+            )
             sequential = [list(full.keys())]
             # 2) Set up GPTQ objects and gather stats
@@ -204,7 +213,10 @@ class GPTQQuantizer(BaseQuantizer):
                 for name in subset:
                     gptq[name] = GPTQ(subset[name])
                     gptq[name].quantizer.configure(
-                        bits=8, perchannel=True, sym=False, mse=False
+                        bits=gptq_conf.weight_bits,
+                        perchannel=gptq_conf.perchannel,
+                        sym=gptq_conf.symmetric,
+                        mse=gptq_conf.mse,
                     )
                 # Hook to collect (inp, out) for GPTQ
@@ -244,10 +256,10 @@ class GPTQQuantizer(BaseQuantizer):
                     if gptq_conf.verbose:
                         print(f"[Layer {l_idx}] {name} -> Quantizing ...")
                     gptq[name].fasterquant(
-                        percdamp=0.01,
-                        groupsize=-1,
-                        actorder=True,
-                        static_groups=False,
+                        percdamp=gptq_conf.percdamp,
+                        groupsize=gptq_conf.groupsize,
+                        actorder=gptq_conf.actorder,
+                        static_groups=gptq_conf.static_groups,
                         verbose=gptq_conf.verbose,
                     )
                     quantizers[f"model.layers.{l_idx}.{name}"] = gptq[name].quantizer

tico/quantization/config/{pt2e.py → fpi_gptq.py} RENAMED Viewed

@@ -12,14 +12,21 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from tico.quantization.config.base import BaseConfig
+from dataclasses import dataclass
+from tico.quantization.config.gptq import GPTQConfig
-class PT2EConfig(BaseConfig):
+@dataclass
+class FPIGPTQConfig(GPTQConfig):
     """
-    Configuration for pytorch 2.0 export quantization.
+    Configuration for FPIGPTQ (Fixed Point Iteration).
     """
+    def __init__(self, verbose: bool = False, show_progress: bool = True):
+        self.verbose = verbose
+        self.show_progress = show_progress
     @property
     def name(self) -> str:
-        return "pt2e"
+        return "fpi_gptq"

tico/quantization/config/gptq.py CHANGED Viewed

@@ -12,18 +12,41 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
+from dataclasses import dataclass
 from tico.quantization.config.base import BaseConfig
+@dataclass
 class GPTQConfig(BaseConfig):
     """
-    Configuration for GPTQ.
+    Configuration for GPTQ weight quantization.
     """
-    def __init__(self, verbose: bool = False, show_progress: bool = True):
-        self.verbose = verbose
-        self.show_progress = show_progress
+    # general
+    verbose: bool = False
+    show_progress: bool = True
+    # quantizer.configure params (weight quantization spec)
+    weight_bits: int = 8
+    perchannel: bool = True
+    symmetric: bool = False
+    mse: bool = False
+    # GPTQ.fasterquant params (algorithm hyperparams)
+    percdamp: float = 0.01
+    groupsize: int = -1
+    actorder: bool = True
+    static_groups: bool = False
     @property
     def name(self) -> str:
         return "gptq"
+    def validate(self) -> None:
+        if self.weight_bits <= 0:
+            raise ValueError(f"weight_bits must be positive. got {self.weight_bits}")
+        if self.groupsize != -1 and self.groupsize <= 0:
+            raise ValueError(f"groupsize must be -1 or positive. got {self.groupsize}")
+        if not (0.0 < self.percdamp <= 1.0):
+            raise ValueError(f"percdamp must be in (0, 1]. got {self.percdamp}")

tico/quantization/public_interface.py CHANGED Viewed

@@ -18,7 +18,6 @@ from typing import Any, Dict, Optional
 import torch
 from tico.quantization.algorithm.gptq.quantizer import GPTQQuantizer
-from tico.quantization.algorithm.pt2e.quantizer import PT2EQuantizer
 from tico.quantization.config.base import BaseConfig
 from tico.quantization.quantizer import BaseQuantizer
 from tico.quantization.quantizer_registry import get_quantizer
@@ -55,11 +54,6 @@ def prepare(
         raise RuntimeError("prepare() already has been called.")
     quantizer = get_quantizer(quant_config)
-    if isinstance(quantizer, PT2EQuantizer) and inplace:
-        raise RuntimeError(
-            "In-place is not supported for PT2E quantization due to limitation in the underlying Torch APIs. Please set 'inplace=False' to proceed."
-        )
     model = model if inplace else copy.deepcopy(model)
     model = quantizer.prepare(model, args, kwargs)
@@ -90,10 +84,6 @@ def convert(model, inplace: Optional[bool] = True):
     else:
         raise RuntimeError("Call prepare() function first.")
-    if isinstance(quantizer, PT2EQuantizer) and inplace:
-        raise RuntimeError(
-            "In-place is not supported for PT2E quantization due to limitation in the underlying Torch APIs. Please set 'inplace=False' to proceed."
-        )
     # deepcopy prevents the quantizer from restoring the catcher used for calibration.
     # TODO Revisit `inplace` policy.
     if isinstance(quantizer, GPTQQuantizer) and not inplace:

tico/quantization/wrapq/quantizer.py CHANGED Viewed

@@ -115,6 +115,7 @@ class PTQQuantizer(BaseQuantizer):
                     assert not self.strict_wrap
                     wrapped = self._wrap_supported(wrapped, child_cfg)
                 root[i] = wrapped  # type: ignore[index]
+            return root
         if isinstance(root, nn.ModuleDict):
             for k, child in list(root.items()):
@@ -128,6 +129,7 @@ class PTQQuantizer(BaseQuantizer):
                     assert not self.strict_wrap
                     wrapped = self._wrap_supported(wrapped, child_cfg)
                 root[k] = wrapped  # type: ignore[index]
+            return root
         # Case C: Leaf node
         root_name = getattr(root, "_get_name", lambda: None)()

tico/quantization/wrapq/wrappers/quant_elementwise.py CHANGED Viewed

@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from typing import Callable, Optional
+from typing import Any, Optional
 import torch
 import torch.nn as nn
@@ -31,7 +31,7 @@ class QuantElementwise(QuantModuleBase):
     """
     # subclass must set this
-    FUNC: Callable[[torch.Tensor], torch.Tensor] | None = None
+    FUNC: Any = None
     def __init_subclass__(cls, **kwargs):
         super().__init_subclass__(**kwargs)
@@ -68,7 +68,7 @@ class QuantElementwise(QuantModuleBase):
 """
-Why `FUNC` is a `staticmethod`
+Q1) Why `FUNC` is a `staticmethod`
 - Prevents automatic binding: calling `self.FUNC(x)` will not inject `self`,
   so the callable keeps the expected signature `Tensor -> Tensor`
@@ -85,27 +85,67 @@ Why `FUNC` is a `staticmethod`
   than an `nn.Module` instance that would appear in the module tree.
 - Small perf/alloc win: no bound-method objects are created on each call.
+Q2) Why we define small Python wrappers (_relu, _tanh, etc.)
+- torch.relu / torch.tanh / torch.sigmoid are CPython built-ins.
+  Their type is `builtin_function_or_method`, not a Python `FunctionType`.
+  This causes `torch.export` (and FX tracing) to fail with:
+    "expected FunctionType, found builtin_function_or_method".
+- By defining a thin Python wrapper (e.g., `def _tanh(x): return torch.tanh(x)`),
+  we convert it into a normal Python function object (`FunctionType`),
+  which satisfies export/tracing requirements.
+- Functionally, this adds zero overhead and preserves semantics,
+  but makes the callable introspectable (has __code__, __name__, etc.)
+  and compatible with TorchDynamo / FX graph capture.
+- It also keeps FUNC pure and stateless, ensuring the elementwise op
+  is represented as `call_function(_tanh)` in the traced graph
+  rather than a bound `call_method` or module attribute access.
 """
-# Sigmoid
+def _relu(x: torch.Tensor) -> torch.Tensor:
+    return torch.relu(x)
+def _tanh(x: torch.Tensor) -> torch.Tensor:
+    return torch.tanh(x)
+def _sigmoid(x: torch.Tensor) -> torch.Tensor:
+    return torch.sigmoid(x)
+def _gelu(x: torch.Tensor) -> torch.Tensor:
+    return torch.nn.functional.gelu(x)
 @register(nn.Sigmoid)
 class QuantSigmoid(QuantElementwise):
-    FUNC = staticmethod(torch.sigmoid)
+    @staticmethod
+    def FUNC(x: torch.Tensor) -> torch.Tensor:
+        return _sigmoid(x)
-# Tanh
 @register(nn.Tanh)
 class QuantTanh(QuantElementwise):
-    FUNC = staticmethod(torch.tanh)
+    @staticmethod
+    def FUNC(x: torch.Tensor) -> torch.Tensor:
+        return _tanh(x)
-# ReLU
 @register(nn.ReLU)
 class QuantReLU(QuantElementwise):
-    FUNC = staticmethod(torch.relu)
+    @staticmethod
+    def FUNC(x: torch.Tensor) -> torch.Tensor:
+        return _relu(x)
-# GELU (approximate)
 @register(nn.GELU)
 class QuantGELU(QuantElementwise):
-    FUNC = staticmethod(torch.nn.functional.gelu)
+    @staticmethod
+    def FUNC(x: torch.Tensor) -> torch.Tensor:
+        return _gelu(x)

tico/serialize/operators/adapters/onert/llama_attention.py ADDED Viewed

@@ -0,0 +1,51 @@
+# Copyright (c) 2025 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from typing import Dict, List, TYPE_CHECKING
+import torch
+from transformers.cache_utils import DynamicCache
+from transformers.models.llama.modeling_llama import LlamaAttention
+def llama_attention_forward_adapter(
+    self: LlamaAttention,
+    hidden_states: torch.Tensor,
+    position_embeddings: List[torch.Tensor],
+    attention_mask: torch.Tensor,
+    past_key_value: DynamicCache,
+    cache_position: torch.Tensor,
+    **kwargs,
+):
+    # past_key_value is a dict with key_cache and value_cache.
+    # It needs to be decomposed for tico and circle which does not know dict.
+    key_cache = past_key_value.key_cache  # type: ignore[union-attr]
+    value_cache = past_key_value.value_cache  # type: ignore[union-attr]
+    return (
+        torch.ops.circle_custom.attention(
+            hidden_states,
+            self.q_proj.weight,
+            self.k_proj.weight,
+            self.v_proj.weight,
+            self.o_proj.weight,
+            position_embeddings[0],  # cos
+            position_embeddings[1],  # sin
+            attention_mask,
+            key_cache[self.layer_idx],
+            value_cache[self.layer_idx],  # Same to value_cache
+            cache_position,
+        ),
+        None,
+    )

tico/serialize/operators/op_attention.py ADDED Viewed

@@ -0,0 +1,58 @@
+# Copyright (c) 2025 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from typing import Dict, List, TYPE_CHECKING
+if TYPE_CHECKING:
+    import torch._ops
+    import torch.fx
+import torch
+from circle_schema import circle
+from tico.serialize.circle_graph import CircleSubgraph
+from tico.serialize.operators.hashable_opcode import OpCode
+from tico.serialize.operators.node_visitor import NodeVisitor, register_node_visitor
+from tico.serialize.operators.utils import create_builtin_operator, get_op_index
+from tico.utils.validate_args_kwargs import CircleAttentionArgs
+@register_node_visitor
+class AttentionVisitor(NodeVisitor):
+    target: List[torch._ops.OpOverload] = [
+        torch.ops.circle_custom.attention.default,
+    ]
+    def __init__(self, op_codes: Dict[OpCode, int], graph: CircleSubgraph):
+        super().__init__(op_codes, graph)
+    def define_node(
+        self,
+        node: torch.fx.Node,
+    ) -> circle.Operator.OperatorT:
+        args = CircleAttentionArgs(*node.args, **node.kwargs)  # type: ignore[arg-type]
+        op_index = get_op_index(
+            circle.BuiltinOperator.BuiltinOperator.ATTENTION, self._op_codes
+        )
+        inputs = node.args
+        outputs = [node]
+        operator = create_builtin_operator(self.graph, op_index, inputs, outputs)
+        # Op-specific option
+        operator.builtinOptionsType = (
+            circle.BuiltinOptions.BuiltinOptions.AttentionOptions
+        )
+        operator.builtinOptions = circle.AttentionOptions.AttentionOptionsT()
+        return operator

tico 0.1.0.dev251106__py3-none-any.whl → 0.2.0.dev260122__py3-none-any.whl

tico 0.1.0.dev251106py3-none-any.whl → 0.2.0.dev260122py3-none-any.whl