PyPI - tico - Versions diffs - 0.1.0.dev251123__py3-none-any.whl → 0.2.0.dev260122__py3-none-any.whl - Mend

tico 0.1.0.dev251123py3-none-any.whl → 0.2.0.dev260122py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (56) hide show

tico/__init__.py +2 -2
tico/_version.py +1 -0
tico/passes/convert_conv3d_to_conv2d.py +435 -0
tico/passes/convert_sym_size_to_circle_shape.py +99 -0
tico/passes/decompose_batch_norm.py +9 -5
tico/passes/lower_copy.py +95 -0
tico/passes/ops.py +4 -0
tico/quantization/algorithm/fpi_gptq/fpi_gptq.py +87 -12
tico/quantization/algorithm/fpi_gptq/quantizer.py +9 -8
tico/quantization/algorithm/gptq/gptq.py +211 -12
tico/quantization/algorithm/gptq/quantizer.py +18 -12
tico/quantization/config/fpi_gptq.py +3 -0
tico/quantization/config/gptq.py +27 -4
tico/quantization/public_interface.py +0 -10
tico/quantization/wrapq/quantizer.py +2 -0
tico/serialize/operators/adapters/onert/llama_attention.py +51 -0
tico/serialize/operators/op_attention.py +58 -0
tico/serialize/operators/op_circle_shape.py +64 -0
tico/serialize/operators/op_dequantize_per_channel.py +1 -0
tico/serialize/operators/op_dequantize_per_tensor.py +1 -0
tico/serialize/operators/op_transpose_conv.py +66 -50
tico/utils/convert.py +16 -1
tico/utils/padding.py +13 -5
tico/utils/record_input.py +2 -2
tico/utils/register_custom_op.py +63 -0
tico/utils/validate_args_kwargs.py +49 -4
tico-0.2.0.dev260122.dist-info/METADATA +631 -0
{tico-0.1.0.dev251123.dist-info → tico-0.2.0.dev260122.dist-info}/RECORD +33 -48
{tico-0.1.0.dev251123.dist-info → tico-0.2.0.dev260122.dist-info}/WHEEL +1 -1
{tico-0.1.0.dev251123.dist-info → tico-0.2.0.dev260122.dist-info}/entry_points.txt +0 -1
tico/quantization/algorithm/pt2e/annotation/__init__.py +0 -1
tico/quantization/algorithm/pt2e/annotation/annotator.py +0 -208
tico/quantization/algorithm/pt2e/annotation/config.py +0 -26
tico/quantization/algorithm/pt2e/annotation/op/__init__.py +0 -21
tico/quantization/algorithm/pt2e/annotation/op/adaptive_avg_pool2d.py +0 -63
tico/quantization/algorithm/pt2e/annotation/op/add.py +0 -55
tico/quantization/algorithm/pt2e/annotation/op/conv2d.py +0 -90
tico/quantization/algorithm/pt2e/annotation/op/div.py +0 -55
tico/quantization/algorithm/pt2e/annotation/op/linear.py +0 -92
tico/quantization/algorithm/pt2e/annotation/op/mean.py +0 -51
tico/quantization/algorithm/pt2e/annotation/op/mul.py +0 -55
tico/quantization/algorithm/pt2e/annotation/op/relu6.py +0 -51
tico/quantization/algorithm/pt2e/annotation/op/rsqrt.py +0 -51
tico/quantization/algorithm/pt2e/annotation/op/sub.py +0 -55
tico/quantization/algorithm/pt2e/annotation/spec.py +0 -45
tico/quantization/algorithm/pt2e/annotation/utils.py +0 -88
tico/quantization/algorithm/pt2e/quantizer.py +0 -81
tico/quantization/algorithm/pt2e/transformation/__init__.py +0 -1
tico/quantization/algorithm/pt2e/transformation/convert_scalars_to_attrs.py +0 -58
tico/quantization/algorithm/pt2e/utils.py +0 -135
tico/quantization/config/pt2e.py +0 -25
tico/serialize/operators/op_copy.py +0 -187
tico-0.1.0.dev251123.dist-info/METADATA +0 -392
/tico/{quantization/algorithm/pt2e → serialize/operators/adapters/onert}/__init__.py +0 -0
{tico-0.1.0.dev251123.dist-info → tico-0.2.0.dev260122.dist-info/licenses}/LICENSE +0 -0
{tico-0.1.0.dev251123.dist-info → tico-0.2.0.dev260122.dist-info}/top_level.txt +0 -0

tico/passes/ops.py CHANGED Viewed

@@ -38,6 +38,10 @@ class AtenOps:
             torch.ops.aten.conv1d.default,
             torch.ops.aten.conv1d.padding,
         ]
+        self.conv3d = [
+            torch.ops.aten.conv3d.default,
+            torch.ops.aten.conv3d.padding,
+        ]
         self.detach = [
             torch.ops.aten.detach_.default,
             torch.ops.aten.detach.default,

tico/quantization/algorithm/fpi_gptq/fpi_gptq.py CHANGED Viewed

@@ -25,6 +25,12 @@ from typing import Optional
 import torch
 import torch.nn as nn
+from tico.quantization.algorithm.gptq.gptq import (
+    conv2d_weights_to_convtranspose2d_weights,
+    convtranspose2d_weights_to_conv2d_weights,
+    get_matmul_input_for_convtranspose2d,
+)
 from tico.quantization.algorithm.gptq.quant import quantize, Quantizer
@@ -56,11 +62,12 @@ class FPI_GPTQ:
         self.layer = layer
         self.dev = self.layer.weight.device
         W = layer.weight.data.clone()
-        if isinstance(self.layer, nn.Conv2d):
+        if isinstance(self.layer, nn.Conv2d) or isinstance(self.layer, nn.Conv1d):
+            W = W.flatten(1)
+        elif isinstance(self.layer, nn.ConvTranspose2d):
+            W = convtranspose2d_weights_to_conv2d_weights(self.layer, W)
             W = W.flatten(1)
-        if isinstance(self.layer, nn.Conv1d):
-            W = W.t()
         self.rows = W.shape[0]
         self.columns = W.shape[1]
         self.H: Optional[torch.Tensor] = torch.zeros(
@@ -73,7 +80,7 @@ class FPI_GPTQ:
         if len(inp.shape) == 2:
             inp = inp.unsqueeze(0)
         tmp = inp.shape[0]
-        if isinstance(self.layer, nn.Linear) or isinstance(self.layer, nn.Conv1d):
+        if isinstance(self.layer, nn.Linear):
             if len(inp.shape) > 2:
                 inp = inp.reshape((-1, inp.shape[-1]))
             inp = inp.t()
@@ -85,9 +92,57 @@ class FPI_GPTQ:
                 stride=self.layer.stride,
             )
+            if self.layer.groups != 1:
+                # the idea behind conversion of depthwise convolution to matmul is described here
+                # https://discuss.pytorch.org/t/conv1d-implementation-using-torch-nn-functional-unfold/109643/2
+                # although depthwise convolution is equal to a set of MatMuls
+                # (please note `w.view(1, groups, out_channels // groups, -1)` in the reference above is not just w.flatten(1))
+                # we can approximate groupwise Hessians with their mean
+                # so that we will have just a single Hessian and the usual GPTQ applies
+                inp = inp.reshape(
+                    inp.size(0) * self.layer.groups,
+                    inp.size(1) // self.layer.groups,
+                    inp.shape[2],
+                    inp.shape[3],
+                )  # inp.shape == (batch*groups, in_channels / groups, H, W) to meet Groupwise-wise Convolution, so that each group is colvolved with its own filter
+            inp = unfold(
+                inp
+            )  # inp.shape == (batch*groups, k_h*k_w*in_channels / groups, flattened_patches)
+            inp = inp.permute(
+                [1, 0, 2]
+            )  # inp.shape == (k_h*k_w*in_channels / groups, batch * groups, flattened_patches)
+            inp = inp.flatten(
+                1
+            )  # inp.shape == (k_h*k_w*in_channels / groups, batch * groups * flattened_patches)
+            # so inp.matmul(inp.t()).shape == (k_x*k_y*in_channels / groups, k_x*k_y*in_channels / groups) == W.flatten(1)
+        if isinstance(self.layer, nn.Conv1d):
+            # nn.Conv1d is basically the same as nn.Conv2d so we can use the same idea as for nn.Conv2d
+            # TODO reduce code duplication
+            # represent conv1d as conv2d(1, k) on reshaped_input(batch, in_channels, 1, L)
+            unfold = nn.Unfold(
+                (1, self.layer.kernel_size[0]),
+                dilation=(1, self.layer.dilation[0]),
+                padding=(0, self.layer.padding[0]),
+                stride=(1, self.layer.stride[0]),
+            )
+            if self.layer.groups != 1:
+                # please see Conv2D for additional info
+                inp = inp.reshape(
+                    inp.size(0) * self.layer.groups,
+                    inp.size(1) // self.layer.groups,
+                    inp.shape[2],
+                )  # inp.shape == (batch*groups, in_channels / groups, L) to meet Groupwise-wise Convolution, so that each group is colvolved with its own filter
+            inp = inp.unsqueeze(
+                -2
+            )  # (batch*groups, in_channels / groups, L)->(batch*groups, in_channels / groups, 1, L), valid for Conv2D
             inp = unfold(inp)
             inp = inp.permute([1, 0, 2])
             inp = inp.flatten(1)
+        if isinstance(self.layer, nn.ConvTranspose2d):
+            inp = get_matmul_input_for_convtranspose2d(self.layer, inp)
         self.H *= self.nsamples / (self.nsamples + tmp)
         self.nsamples += tmp
@@ -100,10 +155,13 @@ class FPI_GPTQ:
         verbose=False,
     ):
         W = self.layer.weight.data.clone()
-        if isinstance(self.layer, nn.Conv2d):
+        if isinstance(self.layer, nn.Conv2d) or isinstance(self.layer, nn.Conv1d):
             W = W.flatten(1)
-        if isinstance(self.layer, nn.Conv1d):
-            W = W.t()
+        elif isinstance(self.layer, nn.ConvTranspose2d):
+            W = convtranspose2d_weights_to_conv2d_weights(self.layer, W)
+            conv2d_shape = W.shape
+            W = W.flatten(1)  # reshaped to matrix (OUT_channels x the_rest)
         W = W.float()
         tick = time.time()
         if not self.quantizer.ready():
@@ -139,7 +197,9 @@ class FPI_GPTQ:
             self.quantizer.maxq,
             W,
             Hinv=Hinv,
-            max_num_of_iters=50,
+            max_num_of_iters=min(
+                50, self.columns
+            ),  # we don't need to iterate more than self.columns
         )
         if torch.cuda.is_available():
@@ -151,13 +211,22 @@ class FPI_GPTQ:
         Q = Q[:, invperm]
-        if isinstance(self.layer, nn.Conv2d):
+        if isinstance(self.layer, nn.Conv2d) or isinstance(self.layer, nn.Conv1d):
             Q[:, dead] = quantize(
                 self.layer.weight.flatten(1)[:, dead],
                 self.quantizer.scale,
                 self.quantizer.zero,
                 self.quantizer.maxq,
             )
+        elif isinstance(self.layer, nn.ConvTranspose2d):
+            Q[:, dead] = quantize(
+                convtranspose2d_weights_to_conv2d_weights(
+                    self.layer, self.layer.weight.data
+                ).flatten(1)[:, dead],
+                self.quantizer.scale,
+                self.quantizer.zero,
+                self.quantizer.maxq,
+            )
         else:
             Q[:, dead] = quantize(
                 self.layer.weight[:, dead],
@@ -166,9 +235,15 @@ class FPI_GPTQ:
                 self.quantizer.maxq,
             )
-        self.layer.weight.data = Q.reshape(self.layer.weight.shape).to(
-            self.layer.weight.data.dtype
-        )
+        if isinstance(self.layer, nn.ConvTranspose2d):
+            Q_conv2d = Q.reshape(conv2d_shape).to(self.layer.weight.data.dtype)
+            self.layer.weight.data = conv2d_weights_to_convtranspose2d_weights(
+                self.layer, Q_conv2d
+            )
+        else:
+            self.layer.weight.data = Q.reshape(self.layer.weight.shape).to(
+                self.layer.weight.data.dtype
+            )
     def free(self):
         self.H = None

tico/quantization/algorithm/fpi_gptq/quantizer.py CHANGED Viewed

@@ -76,14 +76,15 @@ class FPIGPTQQuantizer(GPTQQuantizer):
             )
         ):
             # 1) Identify quantizable submodules within the layer
-            full = find_layers(layer, layers=[torch.nn.Linear, torch.nn.Conv2d])
-            # filter out depthwise convolutions and alike
-            full = {
-                key: full[key]
-                for key in full.keys()
-                if not isinstance(full[key], torch.nn.Conv2d) or full[key].groups == 1
-            }
+            full = find_layers(
+                layer,
+                layers=[
+                    torch.nn.Linear,
+                    torch.nn.Conv2d,
+                    torch.nn.Conv1d,
+                    torch.nn.ConvTranspose2d,
+                ],
+            )
             sequential = [list(full.keys())]
             # 2) Set up (as in GPTQ)

tico/quantization/algorithm/gptq/gptq.py CHANGED Viewed

@@ -31,16 +31,147 @@ torch.backends.cuda.matmul.allow_tf32 = False
 torch.backends.cudnn.allow_tf32 = False
+def convtranspose2d_weights_to_conv2d_weights(layer, w) -> torch.Tensor:
+    if layer.groups == 1:
+        # the last two dimensions of w is (k_h, k_w) to get equivalent Conv2D we need to flip them to get `w_conv2D_equivalent_to_w[i, j] = w_conv[k_h - i - 1, k_w - j - 1]`
+        # the first two dimensions of w is (input_channels, output_channels), so we need to transpose them as Conv2D weights should be in the (output_channels, input_channels) form
+        # please see https://github.com/pytorch/pytorch/blob/d38164a545b4a4e4e0cf73ce67173f70574890b6/torch/nn/modules/conv.py#L1059-L1061 for additional info
+        w_conv_transposed = w.transpose(1, 0).flip((-2, -1))
+    else:
+        # basically it's the same as for `layer.groups == 1` but groupwise
+        in_channels, out_channels, kernel_h, kernel_w = layer.weight.shape
+        out_channels *= layer.groups
+        w_conv_transposed = torch.zeros(
+            out_channels, in_channels // layer.groups, kernel_h, kernel_w
+        )
+        for i in range(0, layer.groups):
+            w_conv_transposed[
+                i
+                * out_channels
+                // layer.groups : (i + 1)
+                * out_channels
+                // layer.groups,
+                :,
+                :,
+                :,
+            ] = (
+                w[
+                    i
+                    * in_channels
+                    // layer.groups : (i + 1)
+                    * in_channels
+                    // layer.groups,
+                    :,
+                    :,
+                    :,
+                ]
+                .transpose(1, 0)
+                .flip((-2, -1))
+            )
+    return w_conv_transposed
+def conv2d_weights_to_convtranspose2d_weights(orig_layer, w) -> torch.Tensor:
+    # this is just an inverse of convtranspose2d_weights_to_conv2d_weights
+    if orig_layer.groups > 1:
+        in_channels, out_channels, _, _ = orig_layer.weight.shape
+        out_channels *= orig_layer.groups
+        w_conv_transposed = torch.zeros_like(orig_layer.weight)
+        for i in range(0, orig_layer.groups):
+            w_conv_transposed[
+                i
+                * in_channels
+                // orig_layer.groups : (i + 1)
+                * in_channels
+                // orig_layer.groups,
+                :,
+                :,
+                :,
+            ] = (
+                w[
+                    i
+                    * out_channels
+                    // orig_layer.groups : (i + 1)
+                    * out_channels
+                    // orig_layer.groups,
+                    :,
+                    :,
+                    :,
+                ]
+                .transpose(1, 0)
+                .flip((-2, -1))
+            )
+    else:
+        w_conv_transposed = w.transpose(1, 0).flip((-2, -1))
+    return w_conv_transposed
+def get_matmul_input_for_convtranspose2d(layer, inp):
+    # Please see https://github.com/pytorch/pytorch/blob/d38164a545b4a4e4e0cf73ce67173f70574890b6/torch/nn/modules/conv.py#L996-L998 for padding
+    strided_pad = (
+        layer.dilation[0] * (layer.kernel_size[0] - 1) - layer.padding[0],
+        layer.dilation[1] * (layer.kernel_size[1] - 1) - layer.padding[1],
+    )
+    # interleave input with zero rows and columns according to stride
+    # Please see https://github.com/pytorch/pytorch/blob/d38164a545b4a4e4e0cf73ce67173f70574890b6/torch/nn/modules/conv.py#L991-L994 for more info
+    inp_strided = torch.zeros(
+        inp.shape[0],
+        inp.shape[1],
+        layer.stride[0] * (inp.shape[2] - 1) + 2 * strided_pad[0] + 1,
+        layer.stride[1] * (inp.shape[3] - 1) + 2 * strided_pad[1] + 1,
+        device=inp.device,
+    )
+    indices = torch.arange(0, inp.shape[2], device=inp.device)
+    # insert original input values according to stride to meet https://github.com/pytorch/pytorch/blob/d38164a545b4a4e4e0cf73ce67173f70574890b6/torch/nn/modules/conv.py#L991-L994
+    inp_strided[
+        :,
+        :,
+        layer.stride[0] * indices + strided_pad[0],
+        strided_pad[1] : -strided_pad[1] : layer.stride[1],
+    ] = inp[:, :, indices, :]
+    del inp
+    inp = (
+        inp_strided  # so the rest is just processing for Conv2D with transposed weights
+    )
+    # TODO reduce code duplication with Conv2D
+    unfold = nn.Unfold(
+        layer.kernel_size,
+        dilation=layer.dilation,
+        padding=(
+            0,
+            0,
+        ),  # equivalent Conv2D has (0, 0) padding for input_strided as input
+        stride=(1, 1),  # equivalent Conv2D has (1, 1) stride for input_strided as input
+    )
+    if layer.groups != 1:
+        inp = inp.reshape(
+            inp.size(0) * layer.groups,
+            inp.size(1) // layer.groups,
+            inp.shape[2],
+            inp.shape[3],
+        )  # inp.shape == (batch*groups, in_channels / groups, H, W) to meet Groupwise-wise Convolution, so that each group is colvolved with its own filter
+    inp = unfold(inp).permute([1, 0, 2]).flatten(1)
+    return inp
 class GPTQ:
     def __init__(self, layer):
         self.layer = layer
         self.dev = self.layer.weight.device
         W = layer.weight.data.clone()
-        if isinstance(self.layer, nn.Conv2d):
+        if isinstance(self.layer, nn.Conv2d) or isinstance(self.layer, nn.Conv1d):
+            W = W.flatten(1)  # reshaped to matrix (OUT_channels x the_rest)
+        elif isinstance(self.layer, nn.ConvTranspose2d):
+            W = convtranspose2d_weights_to_conv2d_weights(self.layer, W)
             W = W.flatten(1)
-        if isinstance(self.layer, nn.Conv1d):
-            W = W.t()
         self.rows = W.shape[0]
         self.columns = W.shape[1]
         self.H: Optional[torch.Tensor] = torch.zeros(
@@ -53,7 +184,7 @@ class GPTQ:
         if len(inp.shape) == 2:
             inp = inp.unsqueeze(0)
         tmp = inp.shape[0]
-        if isinstance(self.layer, nn.Linear) or isinstance(self.layer, nn.Conv1d):
+        if isinstance(self.layer, nn.Linear):
             if len(inp.shape) > 2:
                 inp = inp.reshape((-1, inp.shape[-1]))
             inp = inp.t()
@@ -65,10 +196,59 @@ class GPTQ:
                 stride=self.layer.stride,
             )
+            if self.layer.groups != 1:
+                # the idea behind conversion of depthwise convolution to matmul is described here
+                # https://discuss.pytorch.org/t/conv1d-implementation-using-torch-nn-functional-unfold/109643/2
+                # although depthwise convolution is equal to a set of MatMuls
+                # (please note `w.view(1, groups, out_channels // groups, -1)` in the reference above is not just w.flatten(1))
+                # we can approximate groupwise Hessians with their mean
+                # so that we will have just a single Hessian and the usual GPTQ applies
+                inp = inp.reshape(
+                    inp.size(0) * self.layer.groups,
+                    inp.size(1) // self.layer.groups,
+                    inp.shape[2],
+                    inp.shape[3],
+                )  # inp.shape == (batch*groups, in_channels / groups, H, W) to meet Groupwise-wise Convolution, so that each group is colvolved with its own filter
+            inp = unfold(
+                inp
+            )  # inp.shape == (batch*groups, k_h*k_w*in_channels / groups, flattened_patches)
+            inp = inp.permute(
+                [1, 0, 2]
+            )  # inp.shape == (k_h*k_w*in_channels / groups, batch * groups, flattened_patches)
+            inp = inp.flatten(
+                1
+            )  # inp.shape == (k_h*k_w*in_channels / groups, batch * groups * flattened_patches)
+            # so inp.matmul(inp.t()).shape == (k_x*k_y*in_channels / groups, k_x*k_y*in_channels / groups) == W.flatten(1)
+        if isinstance(self.layer, nn.Conv1d):
+            # nn.Conv1d is basically the same as nn.Conv2d so we can use the same idea as for nn.Conv2d
+            # TODO reduce code duplication
+            # represent conv1d as conv2d(1, k) on reshaped_input(batch, in_channels, 1, L)
+            unfold = nn.Unfold(
+                (1, self.layer.kernel_size[0]),
+                dilation=(1, self.layer.dilation[0]),
+                padding=(0, self.layer.padding[0]),
+                stride=(1, self.layer.stride[0]),
+            )
+            if self.layer.groups != 1:
+                # please see Conv2D for additional info
+                inp = inp.reshape(
+                    inp.size(0) * self.layer.groups,
+                    inp.size(1) // self.layer.groups,
+                    inp.shape[2],
+                )  # inp.shape == (batch*groups, in_channels / groups, L) to meet Groupwise-wise Convolution, so that each group is colvolved with its own filter
+            inp = inp.unsqueeze(
+                -2
+            )  # (batch*groups, in_channels / groups, L)->(batch*groups, in_channels / groups, 1, L), valid for Conv2D
             inp = unfold(inp)
             inp = inp.permute([1, 0, 2])
             inp = inp.flatten(1)
+        if isinstance(self.layer, nn.ConvTranspose2d):
+            inp = get_matmul_input_for_convtranspose2d(self.layer, inp)
         self.H *= self.nsamples / (self.nsamples + tmp)
         self.nsamples += tmp
         inp = math.sqrt(2 / self.nsamples) * inp.float()
@@ -84,10 +264,13 @@ class GPTQ:
         verbose=False,
     ):
         W = self.layer.weight.data.clone()
-        if isinstance(self.layer, nn.Conv2d):
-            W = W.flatten(1)
-        if isinstance(self.layer, nn.Conv1d):
-            W = W.t()
+        if isinstance(self.layer, nn.Conv2d) or isinstance(self.layer, nn.Conv1d):
+            W = W.flatten(1)  # reshaped to matrix (OUT_channels x the_rest)
+        elif isinstance(self.layer, nn.ConvTranspose2d):
+            W = convtranspose2d_weights_to_conv2d_weights(self.layer, W)
+            conv2d_shape = W.shape
+            W = W.flatten(1)  # reshaped to matrix (OUT_channels x the_rest)
         W = W.float()
         tick = time.time()
         if not self.quantizer.ready():
@@ -181,7 +364,7 @@ class GPTQ:
         if actorder:
             Q = Q[:, invperm]
-        if isinstance(self.layer, nn.Conv2d):
+        if isinstance(self.layer, nn.Conv2d) or isinstance(self.layer, nn.Conv1d):
             if groupsize == -1:  # TODO support groupsize != -1
                 Q[:, dead] = quantize(
                     self.layer.weight.flatten(1)[:, dead],
@@ -189,6 +372,16 @@ class GPTQ:
                     self.quantizer.zero,
                     self.quantizer.maxq,
                 )
+        elif isinstance(self.layer, nn.ConvTranspose2d):
+            if groupsize == -1:  # TODO support groupsize != -1
+                Q[:, dead] = quantize(
+                    convtranspose2d_weights_to_conv2d_weights(
+                        self.layer, self.layer.weight.data
+                    ).flatten(1)[:, dead],
+                    self.quantizer.scale,
+                    self.quantizer.zero,
+                    self.quantizer.maxq,
+                )
         else:
             if groupsize == -1:  # TODO support groupsize != -1
                 Q[:, dead] = quantize(
@@ -202,9 +395,15 @@ class GPTQ:
             groupsize == -1 or torch.sum(dead) == 0
         )  # TODO `dead` elements should be RTN quantized for groupwise
-        self.layer.weight.data = Q.reshape(self.layer.weight.shape).to(
-            self.layer.weight.data.dtype
-        )
+        if isinstance(self.layer, nn.ConvTranspose2d):
+            Q_conv2d = Q.reshape(conv2d_shape).to(self.layer.weight.data.dtype)
+            self.layer.weight.data = conv2d_weights_to_convtranspose2d_weights(
+                self.layer, Q_conv2d
+            )
+        else:
+            self.layer.weight.data = Q.reshape(self.layer.weight.shape).to(
+                self.layer.weight.data.dtype
+            )
     def free(self):
         self.H = None

tico/quantization/algorithm/gptq/quantizer.py CHANGED Viewed

@@ -170,6 +170,7 @@ class GPTQQuantizer(BaseQuantizer):
         gptq_conf = self.config
         assert isinstance(gptq_conf, GPTQConfig)
+        gptq_conf.validate()
         # Disable use_cache during calibration
         if hasattr(model, "config") and hasattr(model.config, "use_cache"):
             orig_use_cache = model.config.use_cache
@@ -193,13 +194,15 @@ class GPTQQuantizer(BaseQuantizer):
             )
         ):
             # 1) Identify quantizable submodules within the layer
-            full = find_layers(layer, layers=[torch.nn.Linear, torch.nn.Conv2d])
-            # filter out depthwise convolutions and alike
-            full = {
-                key: full[key]
-                for key in full.keys()
-                if not isinstance(full[key], torch.nn.Conv2d) or full[key].groups == 1
-            }
+            full = find_layers(
+                layer,
+                layers=[
+                    torch.nn.Linear,
+                    torch.nn.Conv2d,
+                    torch.nn.Conv1d,
+                    torch.nn.ConvTranspose2d,
+                ],
+            )
             sequential = [list(full.keys())]
             # 2) Set up GPTQ objects and gather stats
@@ -210,7 +213,10 @@ class GPTQQuantizer(BaseQuantizer):
                 for name in subset:
                     gptq[name] = GPTQ(subset[name])
                     gptq[name].quantizer.configure(
-                        bits=8, perchannel=True, sym=False, mse=False
+                        bits=gptq_conf.weight_bits,
+                        perchannel=gptq_conf.perchannel,
+                        sym=gptq_conf.symmetric,
+                        mse=gptq_conf.mse,
                     )
                 # Hook to collect (inp, out) for GPTQ
@@ -250,10 +256,10 @@ class GPTQQuantizer(BaseQuantizer):
                     if gptq_conf.verbose:
                         print(f"[Layer {l_idx}] {name} -> Quantizing ...")
                     gptq[name].fasterquant(
-                        percdamp=0.01,
-                        groupsize=-1,
-                        actorder=True,
-                        static_groups=False,
+                        percdamp=gptq_conf.percdamp,
+                        groupsize=gptq_conf.groupsize,
+                        actorder=gptq_conf.actorder,
+                        static_groups=gptq_conf.static_groups,
                         verbose=gptq_conf.verbose,
                     )
                     quantizers[f"model.layers.{l_idx}.{name}"] = gptq[name].quantizer

tico/quantization/config/fpi_gptq.py CHANGED Viewed

@@ -12,9 +12,12 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
+from dataclasses import dataclass
 from tico.quantization.config.gptq import GPTQConfig
+@dataclass
 class FPIGPTQConfig(GPTQConfig):
     """
     Configuration for FPIGPTQ (Fixed Point Iteration).

tico/quantization/config/gptq.py CHANGED Viewed

@@ -12,18 +12,41 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
+from dataclasses import dataclass
 from tico.quantization.config.base import BaseConfig
+@dataclass
 class GPTQConfig(BaseConfig):
     """
-    Configuration for GPTQ.
+    Configuration for GPTQ weight quantization.
     """
-    def __init__(self, verbose: bool = False, show_progress: bool = True):
-        self.verbose = verbose
-        self.show_progress = show_progress
+    # general
+    verbose: bool = False
+    show_progress: bool = True
+    # quantizer.configure params (weight quantization spec)
+    weight_bits: int = 8
+    perchannel: bool = True
+    symmetric: bool = False
+    mse: bool = False
+    # GPTQ.fasterquant params (algorithm hyperparams)
+    percdamp: float = 0.01
+    groupsize: int = -1
+    actorder: bool = True
+    static_groups: bool = False
     @property
     def name(self) -> str:
         return "gptq"
+    def validate(self) -> None:
+        if self.weight_bits <= 0:
+            raise ValueError(f"weight_bits must be positive. got {self.weight_bits}")
+        if self.groupsize != -1 and self.groupsize <= 0:
+            raise ValueError(f"groupsize must be -1 or positive. got {self.groupsize}")
+        if not (0.0 < self.percdamp <= 1.0):
+            raise ValueError(f"percdamp must be in (0, 1]. got {self.percdamp}")

tico/quantization/public_interface.py CHANGED Viewed

@@ -18,7 +18,6 @@ from typing import Any, Dict, Optional
 import torch
 from tico.quantization.algorithm.gptq.quantizer import GPTQQuantizer
-from tico.quantization.algorithm.pt2e.quantizer import PT2EQuantizer
 from tico.quantization.config.base import BaseConfig
 from tico.quantization.quantizer import BaseQuantizer
 from tico.quantization.quantizer_registry import get_quantizer
@@ -55,11 +54,6 @@ def prepare(
         raise RuntimeError("prepare() already has been called.")
     quantizer = get_quantizer(quant_config)
-    if isinstance(quantizer, PT2EQuantizer) and inplace:
-        raise RuntimeError(
-            "In-place is not supported for PT2E quantization due to limitation in the underlying Torch APIs. Please set 'inplace=False' to proceed."
-        )
     model = model if inplace else copy.deepcopy(model)
     model = quantizer.prepare(model, args, kwargs)
@@ -90,10 +84,6 @@ def convert(model, inplace: Optional[bool] = True):
     else:
         raise RuntimeError("Call prepare() function first.")
-    if isinstance(quantizer, PT2EQuantizer) and inplace:
-        raise RuntimeError(
-            "In-place is not supported for PT2E quantization due to limitation in the underlying Torch APIs. Please set 'inplace=False' to proceed."
-        )
     # deepcopy prevents the quantizer from restoring the catcher used for calibration.
     # TODO Revisit `inplace` policy.
     if isinstance(quantizer, GPTQQuantizer) and not inplace:

tico/quantization/wrapq/quantizer.py CHANGED Viewed

@@ -115,6 +115,7 @@ class PTQQuantizer(BaseQuantizer):
                     assert not self.strict_wrap
                     wrapped = self._wrap_supported(wrapped, child_cfg)
                 root[i] = wrapped  # type: ignore[index]
+            return root
         if isinstance(root, nn.ModuleDict):
             for k, child in list(root.items()):
@@ -128,6 +129,7 @@ class PTQQuantizer(BaseQuantizer):
                     assert not self.strict_wrap
                     wrapped = self._wrap_supported(wrapped, child_cfg)
                 root[k] = wrapped  # type: ignore[index]
+            return root
         # Case C: Leaf node
         root_name = getattr(root, "_get_name", lambda: None)()

tico 0.1.0.dev251123__py3-none-any.whl → 0.2.0.dev260122__py3-none-any.whl

tico 0.1.0.dev251123py3-none-any.whl → 0.2.0.dev260122py3-none-any.whl