PyPI - tico - Versions diffs - 0.1.0.dev251102__py3-none-any.whl → 0.1.0.dev251123__py3-none-any.whl - Mend

tico 0.1.0.dev251102py3-none-any.whl → 0.1.0.dev251123py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (16) hide show

tico/__init__.py CHANGED Viewed

@@ -29,7 +29,7 @@ __all__ = [
 ]
 # THIS LINE IS AUTOMATICALLY GENERATED BY setup.py
-__version__ = "0.1.0.dev251102"
+__version__ = "0.1.0.dev251123"
 MINIMUM_SUPPORTED_VERSION = "2.5.0"
 SECURE_TORCH_VERSION = "2.6.0"

tico/quantization/algorithm/fpi_gptq/__init__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ # DO NOT REMOVE THIS FILE

tico/quantization/algorithm/fpi_gptq/fpi_gptq.py ADDED Viewed

@@ -0,0 +1,176 @@
+# Copyright IST-DASLab. 2025. (commit: 2d65066). GitHub repository.
+# Retrieved from https://github.com/IST-DASLab/gptq. Licensed under the
+# Apache License 2.0.
+# Copyright (c) 2025 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# https://github.com/IST-DASLab/gptq/blob/2d65066/gptq.py
+import math
+import time
+from typing import Optional
+import torch
+import torch.nn as nn
+from tico.quantization.algorithm.gptq.quant import quantize, Quantizer
+def iterate_GPTQ(scale, zero, maxq, W, Hinv, max_num_of_iters=50):
+    cur_weights = W.clone()
+    mults = torch.pow(torch.diag(Hinv), -1)
+    Hinv_U = torch.triu(Hinv, diagonal=1)
+    init_weights = W.clone()
+    for _ in range(max_num_of_iters):
+        cur_Q = quantize(cur_weights, scale, zero, maxq)
+        d_W = torch.mul((cur_weights - cur_Q), mults)
+        cur_weights = init_weights - torch.matmul(d_W, Hinv_U)
+        del d_W, cur_Q
+        d_W = cur_Q = None
+    del init_weights
+    init_weights = None
+    cur_Q = quantize(cur_weights, scale, zero, maxq)
+    return cur_Q, cur_weights
+class FPI_GPTQ:
+    def __init__(self, layer):
+        self.layer = layer
+        self.dev = self.layer.weight.device
+        W = layer.weight.data.clone()
+        if isinstance(self.layer, nn.Conv2d):
+            W = W.flatten(1)
+        if isinstance(self.layer, nn.Conv1d):
+            W = W.t()
+        self.rows = W.shape[0]
+        self.columns = W.shape[1]
+        self.H: Optional[torch.Tensor] = torch.zeros(
+            (self.columns, self.columns), device=self.dev
+        )
+        self.nsamples = 0
+        self.quantizer: Quantizer = Quantizer()
+    def add_batch(self, inp, out):
+        if len(inp.shape) == 2:
+            inp = inp.unsqueeze(0)
+        tmp = inp.shape[0]
+        if isinstance(self.layer, nn.Linear) or isinstance(self.layer, nn.Conv1d):
+            if len(inp.shape) > 2:
+                inp = inp.reshape((-1, inp.shape[-1]))
+            inp = inp.t()
+        if isinstance(self.layer, nn.Conv2d):
+            unfold = nn.Unfold(
+                self.layer.kernel_size,
+                dilation=self.layer.dilation,
+                padding=self.layer.padding,
+                stride=self.layer.stride,
+            )
+            inp = unfold(inp)
+            inp = inp.permute([1, 0, 2])
+            inp = inp.flatten(1)
+        self.H *= self.nsamples / (self.nsamples + tmp)
+        self.nsamples += tmp
+        inp = math.sqrt(2 / self.nsamples) * inp.float()
+        self.H += inp.matmul(inp.t())
+    def fasterquant(
+        self,
+        percdamp=0.01,
+        verbose=False,
+    ):
+        W = self.layer.weight.data.clone()
+        if isinstance(self.layer, nn.Conv2d):
+            W = W.flatten(1)
+        if isinstance(self.layer, nn.Conv1d):
+            W = W.t()
+        W = W.float()
+        tick = time.time()
+        if not self.quantizer.ready():
+            self.quantizer.find_params(W, weight=True)
+        H = self.H
+        del self.H
+        assert isinstance(H, torch.Tensor)
+        dead = torch.diag(H) == 0
+        H[dead, dead] = 1
+        W[:, dead] = 0
+        # actorder
+        perm = torch.argsort(torch.diag(H), descending=True)
+        W = W[:, perm]
+        H = H[perm][:, perm]
+        invperm = torch.argsort(perm)
+        Q = torch.zeros_like(W)
+        damp = percdamp * torch.mean(torch.diag(H))
+        diag = torch.arange(self.columns, device=self.dev)
+        H[diag, diag] += damp
+        H = torch.linalg.cholesky(H)
+        assert isinstance(H, torch.Tensor)
+        H = torch.cholesky_inverse(H)
+        H = torch.linalg.cholesky(H, upper=True)
+        Hinv = H
+        Q, W = iterate_GPTQ(
+            self.quantizer.scale,
+            self.quantizer.zero,
+            self.quantizer.maxq,
+            W,
+            Hinv=Hinv,
+            max_num_of_iters=50,
+        )
+        if torch.cuda.is_available():
+            torch.cuda.synchronize()
+        if verbose:
+            print("time %.2f" % (time.time() - tick))
+            Losses = 0.5 * ((Q - W) / torch.diag(Hinv)) ** 2
+            print("error", torch.sum(Losses).item())
+        Q = Q[:, invperm]
+        if isinstance(self.layer, nn.Conv2d):
+            Q[:, dead] = quantize(
+                self.layer.weight.flatten(1)[:, dead],
+                self.quantizer.scale,
+                self.quantizer.zero,
+                self.quantizer.maxq,
+            )
+        else:
+            Q[:, dead] = quantize(
+                self.layer.weight[:, dead],
+                self.quantizer.scale,
+                self.quantizer.zero,
+                self.quantizer.maxq,
+            )
+        self.layer.weight.data = Q.reshape(self.layer.weight.shape).to(
+            self.layer.weight.data.dtype
+        )
+    def free(self):
+        self.H = None
+        if torch.cuda.is_available():
+            torch.cuda.empty_cache()

tico/quantization/algorithm/fpi_gptq/quantizer.py ADDED Viewed

@@ -0,0 +1,179 @@
+# Copyright (c) 2024 Intel Corporation
+# Copyright (c) 2025 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from typing import Any, Dict
+import torch
+from tqdm.auto import tqdm
+from tico.quantization.algorithm.fpi_gptq.fpi_gptq import FPI_GPTQ
+from tico.quantization.algorithm.gptq.quantizer import GPTQQuantizer
+from tico.quantization.algorithm.gptq.utils import (
+    find_layers,
+    gather_single_batch_from_dict,
+    gather_single_batch_from_list,
+)
+from tico.quantization.config.fpi_gptq import FPIGPTQConfig
+from tico.quantization.quantizer_registry import register_quantizer
+@register_quantizer(FPIGPTQConfig)
+class FPIGPTQQuantizer(GPTQQuantizer):
+    """
+    Quantizer for applying the Fixed Point Iteration GPTQ algorithm (FPIGPTQ)
+    This implementation expects the same steps as GPTQQuantizer.
+    It should produce results very close to reference GPTQ but much faster when running on cuda.
+    """
+    def __init__(self, config: FPIGPTQConfig):
+        super().__init__(config)
+    @torch.no_grad()
+    def convert(self, model):
+        # Restore original forwards (we no longer want to stop after first layer)
+        assert self._orig_model_forward is not None
+        model.forward = self._orig_model_forward
+        assert (
+            self._first_layer_ref is not None and self._orig_layer_forward is not None
+        )
+        self._first_layer_ref.forward = self._orig_layer_forward
+        gptq_conf = self.config
+        assert isinstance(gptq_conf, FPIGPTQConfig)
+        # Disable use_cache during calibration
+        if hasattr(model, "config") and hasattr(model.config, "use_cache"):
+            orig_use_cache = model.config.use_cache
+            model.config.use_cache = False
+        else:
+            orig_use_cache = None
+        # Identify layers
+        if hasattr(model, "model"):
+            target_layers = model.model.layers
+        else:
+            target_layers = [model]
+        quantizers: Dict[str, Any] = {}
+        for l_idx, layer in enumerate(
+            tqdm(
+                target_layers,
+                desc="Quantizing layers",
+                unit="layer",
+                disable=not gptq_conf.show_progress,
+            )
+        ):
+            # 1) Identify quantizable submodules within the layer
+            full = find_layers(layer, layers=[torch.nn.Linear, torch.nn.Conv2d])
+            # filter out depthwise convolutions and alike
+            full = {
+                key: full[key]
+                for key in full.keys()
+                if not isinstance(full[key], torch.nn.Conv2d) or full[key].groups == 1
+            }
+            sequential = [list(full.keys())]
+            # 2) Set up (as in GPTQ)
+            for names in sequential:
+                subset = {n: full[n] for n in names}
+                gptq: Dict[str, FPI_GPTQ] = {}
+                for name in subset:
+                    gptq[name] = FPI_GPTQ(subset[name])
+                    gptq[name].quantizer.configure(
+                        bits=8, perchannel=True, sym=False, mse=False
+                    )
+                # Hook to collect (inp, out) for GPTQ
+                def add_batch(name):
+                    def _hook(_, inp, out):
+                        gptq[name].add_batch(inp[0].data, out.data)
+                    return _hook
+                handles = []
+                for name in subset:
+                    handles.append(subset[name].register_forward_hook(add_batch(name)))
+                # Run layer forward over all cached batches to build Hessian/statistics
+                batch_num = self.num_batches
+                for batch_idx in tqdm(
+                    range(batch_num),
+                    desc=f"[L{l_idx}] collecting",
+                    leave=False,
+                    unit="batch",
+                    disable=not gptq_conf.show_progress,
+                ):
+                    cache_args_batch = gather_single_batch_from_list(
+                        self.cache_args, batch_idx
+                    )
+                    cache_kwargs_batch = gather_single_batch_from_dict(
+                        self.cache_kwargs, batch_idx
+                    )
+                    layer(*cache_args_batch, **cache_kwargs_batch)
+                # Remove handles
+                for h in handles:
+                    h.remove()
+                # 3) Quantize each submodule
+                for name in subset:
+                    if gptq_conf.verbose:
+                        print(f"[Layer {l_idx}] {name} -> Quantizing ...")
+                    gptq[name].fasterquant(
+                        percdamp=0.01,
+                        verbose=gptq_conf.verbose,
+                    )
+                    quantizers[f"model.layers.{l_idx}.{name}"] = gptq[name].quantizer
+                    gptq[name].free()
+            # 4) After quantization, re-run the layer to produce outputs for the next layer
+            for batch_idx in tqdm(
+                range(batch_num),
+                desc=f"[L{l_idx}] re-forward",
+                leave=False,
+                unit="batch",
+                disable=not gptq_conf.show_progress,
+            ):
+                cache_args_batch = gather_single_batch_from_list(
+                    self.cache_args, batch_idx
+                )
+                cache_kwargs_batch = gather_single_batch_from_dict(
+                    self.cache_kwargs, batch_idx
+                )
+                outs = layer(*cache_args_batch, **cache_kwargs_batch)
+                # LLaMA's decoder layer return type differs across Transformers versions:
+                # some return a tuple (hidden_states, ...), others return just a tensor.
+                # This line ensures we always take the first element when it's a tuple.
+                outs = outs[0] if isinstance(outs, tuple) else outs
+                # Update inputs for next iteration.
+                self.cache_args[0][batch_idx] = outs
+            if torch.cuda.is_available():
+                torch.cuda.empty_cache()
+        # Restore the original cache configuration.
+        if orig_use_cache is not None:
+            model.config.use_cache = orig_use_cache
+        # Clear caches to free memory
+        self.cache_args.clear()
+        self.cache_kwargs.clear()
+        self.num_batches = 0
+        model.quantizers = quantizers
+        return model

tico/quantization/algorithm/gptq/gptq.py CHANGED Viewed

@@ -36,6 +36,11 @@ class GPTQ:
         self.layer = layer
         self.dev = self.layer.weight.device
         W = layer.weight.data.clone()
+        if isinstance(self.layer, nn.Conv2d):
+            W = W.flatten(1)
+        if isinstance(self.layer, nn.Conv1d):
+            W = W.t()
         self.rows = W.shape[0]
         self.columns = W.shape[1]
         self.H: Optional[torch.Tensor] = torch.zeros(
@@ -48,10 +53,22 @@ class GPTQ:
         if len(inp.shape) == 2:
             inp = inp.unsqueeze(0)
         tmp = inp.shape[0]
-        if isinstance(self.layer, nn.Linear):
-            if len(inp.shape) == 3:
+        if isinstance(self.layer, nn.Linear) or isinstance(self.layer, nn.Conv1d):
+            if len(inp.shape) > 2:
                 inp = inp.reshape((-1, inp.shape[-1]))
             inp = inp.t()
+        if isinstance(self.layer, nn.Conv2d):
+            unfold = nn.Unfold(
+                self.layer.kernel_size,
+                dilation=self.layer.dilation,
+                padding=self.layer.padding,
+                stride=self.layer.stride,
+            )
+            inp = unfold(inp)
+            inp = inp.permute([1, 0, 2])
+            inp = inp.flatten(1)
         self.H *= self.nsamples / (self.nsamples + tmp)
         self.nsamples += tmp
         inp = math.sqrt(2 / self.nsamples) * inp.float()
@@ -67,6 +84,10 @@ class GPTQ:
         verbose=False,
     ):
         W = self.layer.weight.data.clone()
+        if isinstance(self.layer, nn.Conv2d):
+            W = W.flatten(1)
+        if isinstance(self.layer, nn.Conv1d):
+            W = W.t()
         W = W.float()
         tick = time.time()
         if not self.quantizer.ready():
@@ -160,6 +181,27 @@ class GPTQ:
         if actorder:
             Q = Q[:, invperm]
+        if isinstance(self.layer, nn.Conv2d):
+            if groupsize == -1:  # TODO support groupsize != -1
+                Q[:, dead] = quantize(
+                    self.layer.weight.flatten(1)[:, dead],
+                    self.quantizer.scale,
+                    self.quantizer.zero,
+                    self.quantizer.maxq,
+                )
+        else:
+            if groupsize == -1:  # TODO support groupsize != -1
+                Q[:, dead] = quantize(
+                    self.layer.weight[:, dead],
+                    self.quantizer.scale,
+                    self.quantizer.zero,
+                    self.quantizer.maxq,
+                )
+        assert (
+            groupsize == -1 or torch.sum(dead) == 0
+        )  # TODO `dead` elements should be RTN quantized for groupwise
         self.layer.weight.data = Q.reshape(self.layer.weight.shape).to(
             self.layer.weight.data.dtype
         )

tico/quantization/algorithm/gptq/quantizer.py CHANGED Viewed

@@ -193,7 +193,13 @@ class GPTQQuantizer(BaseQuantizer):
             )
         ):
             # 1) Identify quantizable submodules within the layer
-            full = find_layers(layer)
+            full = find_layers(layer, layers=[torch.nn.Linear, torch.nn.Conv2d])
+            # filter out depthwise convolutions and alike
+            full = {
+                key: full[key]
+                for key in full.keys()
+                if not isinstance(full[key], torch.nn.Conv2d) or full[key].groups == 1
+            }
             sequential = [list(full.keys())]
             # 2) Set up GPTQ objects and gather stats

tico/quantization/config/fpi_gptq.py ADDED Viewed

@@ -0,0 +1,29 @@
+# Copyright (c) 2025 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from tico.quantization.config.gptq import GPTQConfig
+class FPIGPTQConfig(GPTQConfig):
+    """
+    Configuration for FPIGPTQ (Fixed Point Iteration).
+    """
+    def __init__(self, verbose: bool = False, show_progress: bool = True):
+        self.verbose = verbose
+        self.show_progress = show_progress
+    @property
+    def name(self) -> str:
+        return "fpi_gptq"

tico/quantization/wrapq/wrappers/nn/quant_silu.py CHANGED Viewed

@@ -19,14 +19,15 @@ import torch.nn as nn
 from tico.quantization.config.ptq import PTQConfig
 from tico.quantization.wrapq.wrappers.quant_module_base import QuantModuleBase
-from tico.quantization.wrapq.wrappers.registry import register
+from tico.quantization.wrapq.wrappers.registry import try_register
-@register(nn.SiLU)
+@try_register("torch.nn.SiLU", "transformers.activations.SiLUActivation")
 class QuantSiLU(QuantModuleBase):
     """
-    QuantSiLU — drop-in replacement for nn.SiLU that quantizes
-    both intermediate tensors:
+    QuantSiLU — drop-in quantized implementation of the SiLU operation.
+    This module quantizes both intermediate tensors:
         • s  = sigmoid(x)   (logistic)
         • y  = x * s        (mul)
     """

tico/quantization/wrapq/wrappers/quant_elementwise.py CHANGED Viewed

@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from typing import Callable, Optional
+from typing import Any, Optional
 import torch
 import torch.nn as nn
@@ -31,7 +31,7 @@ class QuantElementwise(QuantModuleBase):
     """
     # subclass must set this
-    FUNC: Callable[[torch.Tensor], torch.Tensor] | None = None
+    FUNC: Any = None
     def __init_subclass__(cls, **kwargs):
         super().__init_subclass__(**kwargs)
@@ -68,7 +68,7 @@ class QuantElementwise(QuantModuleBase):
 """
-Why `FUNC` is a `staticmethod`
+Q1) Why `FUNC` is a `staticmethod`
 - Prevents automatic binding: calling `self.FUNC(x)` will not inject `self`,
   so the callable keeps the expected signature `Tensor -> Tensor`
@@ -85,27 +85,67 @@ Why `FUNC` is a `staticmethod`
   than an `nn.Module` instance that would appear in the module tree.
 - Small perf/alloc win: no bound-method objects are created on each call.
+Q2) Why we define small Python wrappers (_relu, _tanh, etc.)
+- torch.relu / torch.tanh / torch.sigmoid are CPython built-ins.
+  Their type is `builtin_function_or_method`, not a Python `FunctionType`.
+  This causes `torch.export` (and FX tracing) to fail with:
+    "expected FunctionType, found builtin_function_or_method".
+- By defining a thin Python wrapper (e.g., `def _tanh(x): return torch.tanh(x)`),
+  we convert it into a normal Python function object (`FunctionType`),
+  which satisfies export/tracing requirements.
+- Functionally, this adds zero overhead and preserves semantics,
+  but makes the callable introspectable (has __code__, __name__, etc.)
+  and compatible with TorchDynamo / FX graph capture.
+- It also keeps FUNC pure and stateless, ensuring the elementwise op
+  is represented as `call_function(_tanh)` in the traced graph
+  rather than a bound `call_method` or module attribute access.
 """
-# Sigmoid
+def _relu(x: torch.Tensor) -> torch.Tensor:
+    return torch.relu(x)
+def _tanh(x: torch.Tensor) -> torch.Tensor:
+    return torch.tanh(x)
+def _sigmoid(x: torch.Tensor) -> torch.Tensor:
+    return torch.sigmoid(x)
+def _gelu(x: torch.Tensor) -> torch.Tensor:
+    return torch.nn.functional.gelu(x)
 @register(nn.Sigmoid)
 class QuantSigmoid(QuantElementwise):
-    FUNC = staticmethod(torch.sigmoid)
+    @staticmethod
+    def FUNC(x: torch.Tensor) -> torch.Tensor:
+        return _sigmoid(x)
-# Tanh
 @register(nn.Tanh)
 class QuantTanh(QuantElementwise):
-    FUNC = staticmethod(torch.tanh)
+    @staticmethod
+    def FUNC(x: torch.Tensor) -> torch.Tensor:
+        return _tanh(x)
-# ReLU
 @register(nn.ReLU)
 class QuantReLU(QuantElementwise):
-    FUNC = staticmethod(torch.relu)
+    @staticmethod
+    def FUNC(x: torch.Tensor) -> torch.Tensor:
+        return _relu(x)
-# GELU (approximate)
 @register(nn.GELU)
 class QuantGELU(QuantElementwise):
-    FUNC = staticmethod(torch.nn.functional.gelu)
+    @staticmethod
+    def FUNC(x: torch.Tensor) -> torch.Tensor:
+        return _gelu(x)

tico/quantization/wrapq/wrappers/registry.py CHANGED Viewed

@@ -23,14 +23,17 @@ _WRAPPERS: Dict[Type[nn.Module], Type[QuantModuleBase]] = {}
 _IMPORT_ONCE = False
 _CORE_MODULES = (
     "tico.quantization.wrapq.wrappers.quant_elementwise",
+    ## nn ##
     "tico.quantization.wrapq.wrappers.nn.quant_layernorm",
     "tico.quantization.wrapq.wrappers.nn.quant_linear",
+    # This includes not only `nn.SiLU` but also `SiLUActivation` from transformers
+    # as they are same operation.
     "tico.quantization.wrapq.wrappers.nn.quant_silu",
-    # llama
+    ## llama ##
     "tico.quantization.wrapq.wrappers.llama.quant_attn",
     "tico.quantization.wrapq.wrappers.llama.quant_decoder_layer",
     "tico.quantization.wrapq.wrappers.llama.quant_mlp",
-    # fairseq
+    ## fairseq ##
     "tico.quantization.wrapq.wrappers.fairseq.quant_decoder_layer",
     "tico.quantization.wrapq.wrappers.fairseq.quant_encoder",
     "tico.quantization.wrapq.wrappers.fairseq.quant_encoder_layer",

{tico-0.1.0.dev251102.dist-info → tico-0.1.0.dev251123.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: tico
-Version: 0.1.0.dev251102
+Version: 0.1.0.dev251123
 Summary: Convert exported Torch module to circle
 Home-page: UNKNOWN
 License: UNKNOWN

{tico-0.1.0.dev251102.dist-info → tico-0.1.0.dev251123.dist-info}/RECORD RENAMED Viewed

@@ -1,4 +1,4 @@
-tico/__init__.py,sha256=y_g2dpUeGlGMQ9CHvwpWnytUxBZg1Z4De9VFXdD4bos,1883
+tico/__init__.py,sha256=8a76TiclPFZrJ7BLy5dHGxBCtia-SHR-5HWcVqDiSE8,1883
 tico/pt2_to_circle.py,sha256=gu3MD4Iqc0zMZcCZ2IT8oGbyj21CTSbT3Rgd9s2B_9A,2767
 tico/config/__init__.py,sha256=xZzCXjZ84qE-CsBi-dfaL05bqpQ3stKKfTXhnrJRyVs,142
 tico/config/base.py,sha256=q5xMqGxTUZs4mFqt5c7i_y9U00fYgdMGl9nUqIVMlCo,1248
@@ -51,10 +51,13 @@ tico/quantization/public_interface.py,sha256=YlE4re0HkkEDcq8IeXhPJUtveLIiDjAlChL
 tico/quantization/quantizer.py,sha256=FYNiqUqoH9vz1bda0I6yuKqJi2KdIfLEBd4EgeC-_t4,2357
 tico/quantization/quantizer_registry.py,sha256=MxVE1_hj1p8FjdAqkLzUhdez3Cqc-V25k6XKOcTkei0,2414
 tico/quantization/algorithm/__init__.py,sha256=IO6FP_xYbGy0dW0HL26GXD3ouxARaxCK7bz9dn4blPQ,26
+tico/quantization/algorithm/fpi_gptq/__init__.py,sha256=IO6FP_xYbGy0dW0HL26GXD3ouxARaxCK7bz9dn4blPQ,26
+tico/quantization/algorithm/fpi_gptq/fpi_gptq.py,sha256=fzXA2JXoSYClfGxEUoOEUFwFcqAiqg-RqwrFYLOaBjU,5388
+tico/quantization/algorithm/fpi_gptq/quantizer.py,sha256=O04V01CyA7eU_pV08R8KGTiOhThbKf955KwwzagN-S8,6873
 tico/quantization/algorithm/gptq/__init__.py,sha256=IO6FP_xYbGy0dW0HL26GXD3ouxARaxCK7bz9dn4blPQ,26
-tico/quantization/algorithm/gptq/gptq.py,sha256=HkuKv_UWs0xEdbj7zEP-65QPEtI_varmvAORFstyTic,5542
+tico/quantization/algorithm/gptq/gptq.py,sha256=qXhVKjLEkg5DpOgRCExv3V-yy0yB1xTYauBBcK_yxRY,7035
 tico/quantization/algorithm/gptq/quant.py,sha256=Rl4wAOCmlE0U09BtNCDbccaSNohRHCNLwFi3zCqZfNo,5127
-tico/quantization/algorithm/gptq/quantizer.py,sha256=OvR9sHgosGYofwYcDhye84FBl55cNY7-UlfBt9gXbDY,11734
+tico/quantization/algorithm/gptq/quantizer.py,sha256=Ios0lyhTfuClWgI0umbf0dIaWlhkKUs3GMVd0MPrJf0,12027
 tico/quantization/algorithm/gptq/utils.py,sha256=leGKayf-xbSjVwwAGTA5RsxUKrhDiklOQdlsLifjdrs,1811
 tico/quantization/algorithm/pt2e/__init__.py,sha256=IO6FP_xYbGy0dW0HL26GXD3ouxARaxCK7bz9dn4blPQ,26
 tico/quantization/algorithm/pt2e/quantizer.py,sha256=9K8SGwxi67DA8Hdwc_25ResJiSGLIMDkNyAwtQu3PGM,2673
@@ -83,6 +86,7 @@ tico/quantization/algorithm/smoothquant/quantizer.py,sha256=pvf6HwW7VzyNFhfEDGwG
 tico/quantization/algorithm/smoothquant/smooth_quant.py,sha256=fxCy4m-BsSjraciSVPFlPhgsOT46RjrOgczQGb7B9TA,11561
 tico/quantization/config/__init__.py,sha256=IO6FP_xYbGy0dW0HL26GXD3ouxARaxCK7bz9dn4blPQ,26
 tico/quantization/config/base.py,sha256=xg_HCDSuMgYvMd6ENZe4Sm2SYJgMaCBj4cmqaz_lhAs,816
+tico/quantization/config/fpi_gptq.py,sha256=XoY9-56O13HUYVkawRUQ0-uOqIhq71q0jUAFsUtPZPI,994
 tico/quantization/config/gptq.py,sha256=O3NEPYMJdgMJQB--blw3WI8FGbK9nDlSqSo2ZHvNwb8,960
 tico/quantization/config/pt2e.py,sha256=vSfULljHEnypadUyo-zjVoPSbP8Y2eDzSD_kRTcv6bk,837
 tico/quantization/config/ptq.py,sha256=zbLQbuiEpO-qlDgyUYTZ3hkVxr3boq5TX0n0QTBHic4,4540
@@ -129,9 +133,9 @@ tico/quantization/wrapq/utils/metrics.py,sha256=ZnEQOd9fzDDxdXl32PFl3jMQv5ycz9nF
 tico/quantization/wrapq/utils/reduce_utils.py,sha256=3kWawLB91EcvvHlCrNqqfZF7tpgr22htBSA049mKw_4,973
 tico/quantization/wrapq/wrappers/__init__.py,sha256=IO6FP_xYbGy0dW0HL26GXD3ouxARaxCK7bz9dn4blPQ,26
 tico/quantization/wrapq/wrappers/ptq_wrapper.py,sha256=6zcVZ-vVhPCvFHQw6UlN7iizElrIHNkpAraeMaA0DDU,2388
-tico/quantization/wrapq/wrappers/quant_elementwise.py,sha256=trchhUknmZTcoCwVA62uzBP_mWuCjjuZjF0jb7TZpfA,3550
+tico/quantization/wrapq/wrappers/quant_elementwise.py,sha256=2mpDljmROSIG3DI4TpNuy6gVEZ294aT1rKww-ZSI96o,4880
 tico/quantization/wrapq/wrappers/quant_module_base.py,sha256=SgyUlFYxDx39CAvcN2q4lsTedbEVPmetIigrllmvvD4,5915
-tico/quantization/wrapq/wrappers/registry.py,sha256=1rH28O7aWrp-uIFL7exa6rfdyEHeupzXuMZhNTW2i2k,5030
+tico/quantization/wrapq/wrappers/registry.py,sha256=QJcOD9gEGB_DJowdTTqemcRDcYxQa4tHv2CDFgZDnA0,5168
 tico/quantization/wrapq/wrappers/fairseq/__init__.py,sha256=K4R7rbxHosx9LBLk2WKlL8gFuZTYTws41TW47AsSUPM,149
 tico/quantization/wrapq/wrappers/fairseq/decoder_export_single_step.py,sha256=d7ZieKiSbZ2ffkaLYMg2PJl1OyAxkKjB3OHKB4poxJs,9796
 tico/quantization/wrapq/wrappers/fairseq/quant_decoder.py,sha256=JTCUDNEHYU5iOcbC_2mpuhvEoZqzTNIW3gPUZE1J7FE,17810
@@ -146,7 +150,7 @@ tico/quantization/wrapq/wrappers/llama/quant_mlp.py,sha256=I0EUJPnBOvaTnjT1Jk4N2
 tico/quantization/wrapq/wrappers/nn/__init__.py,sha256=IO6FP_xYbGy0dW0HL26GXD3ouxARaxCK7bz9dn4blPQ,26
 tico/quantization/wrapq/wrappers/nn/quant_layernorm.py,sha256=UoWWQaDqBY_bAeWRRsNl19LO331KQQLpZP9ACE-HyiU,6823
 tico/quantization/wrapq/wrappers/nn/quant_linear.py,sha256=y3exJX_Og8HIi0VdpvX4M9m8Voq0e0ndiX8G6DZflT8,2165
-tico/quantization/wrapq/wrappers/nn/quant_silu.py,sha256=6inKWfcVTlXFsnTX_6DdIChME3x0jL_urGbONjydMqw,1810
+tico/quantization/wrapq/wrappers/nn/quant_silu.py,sha256=jRbM2lCFjqAqQj3Gur4eiHs1eCoNtjejMd16VBhNZt8,1901
 tico/serialize/__init__.py,sha256=IO6FP_xYbGy0dW0HL26GXD3ouxARaxCK7bz9dn4blPQ,26
 tico/serialize/circle_graph.py,sha256=qvyul_HULoz7B_6RFKQ8s9RjEvMgPq-ynMVkZe8aqE4,12034
 tico/serialize/circle_mapping.py,sha256=c__AIHPi23lPugNJFolgMAKrw8j7gEeMaUQ1LAMSFnY,8542
@@ -263,9 +267,9 @@ tico/utils/mx/__init__.py,sha256=IO6FP_xYbGy0dW0HL26GXD3ouxARaxCK7bz9dn4blPQ,26
 tico/utils/mx/elemwise_ops.py,sha256=V6glyAHsVR1joqpsgnNytatCD_ew92xNWZ19UFDoMTA,10281
 tico/utils/mx/formats.py,sha256=uzNWyu-1onUlwQfX5cZ6fZSUfHMRqorper7_T1k3jfk,3404
 tico/utils/mx/mx_ops.py,sha256=RcfUTYVi-wilGB2sC35OeARdwDqnixv7dG5iyZ-fQT8,8555
-tico-0.1.0.dev251102.dist-info/LICENSE,sha256=kp4JLII7bzRhPb0CPD5XTDZMh22BQ7h3k3B7t8TiSbw,12644
-tico-0.1.0.dev251102.dist-info/METADATA,sha256=JsgJk7a4VRJL_Mt9Z3_DBpgqsDWMtXTHgNRK2Bi-wTA,9730
-tico-0.1.0.dev251102.dist-info/WHEEL,sha256=G16H4A3IeoQmnOrYV4ueZGKSjhipXx8zc8nu9FGlvMA,92
-tico-0.1.0.dev251102.dist-info/entry_points.txt,sha256=kBKYSS_IYrSXmUYevmmepqIVPScq5vF8ulQRu3I_Zf0,59
-tico-0.1.0.dev251102.dist-info/top_level.txt,sha256=oqs7UPoNSKZEwqsX8B-KAWdQwfAa7i60pbxW_Jk7P3w,5
-tico-0.1.0.dev251102.dist-info/RECORD,,
+tico-0.1.0.dev251123.dist-info/LICENSE,sha256=kp4JLII7bzRhPb0CPD5XTDZMh22BQ7h3k3B7t8TiSbw,12644
+tico-0.1.0.dev251123.dist-info/METADATA,sha256=IcEcsIP6XwyEOQnEe3qnmsaiiu8uaudtL97cuoZbXYk,9730
+tico-0.1.0.dev251123.dist-info/WHEEL,sha256=G16H4A3IeoQmnOrYV4ueZGKSjhipXx8zc8nu9FGlvMA,92
+tico-0.1.0.dev251123.dist-info/entry_points.txt,sha256=kBKYSS_IYrSXmUYevmmepqIVPScq5vF8ulQRu3I_Zf0,59
+tico-0.1.0.dev251123.dist-info/top_level.txt,sha256=oqs7UPoNSKZEwqsX8B-KAWdQwfAa7i60pbxW_Jk7P3w,5
+tico-0.1.0.dev251123.dist-info/RECORD,,

{tico-0.1.0.dev251102.dist-info → tico-0.1.0.dev251123.dist-info}/LICENSE RENAMED Viewed

File without changes

{tico-0.1.0.dev251102.dist-info → tico-0.1.0.dev251123.dist-info}/WHEEL RENAMED Viewed

File without changes

{tico-0.1.0.dev251102.dist-info → tico-0.1.0.dev251123.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{tico-0.1.0.dev251102.dist-info → tico-0.1.0.dev251123.dist-info}/top_level.txt RENAMED Viewed

File without changes

tico 0.1.0.dev251102__py3-none-any.whl → 0.1.0.dev251123__py3-none-any.whl

tico 0.1.0.dev251102py3-none-any.whl → 0.1.0.dev251123py3-none-any.whl