tico 0.1.0.dev251106__py3-none-any.whl → 0.1.0.dev251123__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
tico/__init__.py CHANGED
@@ -29,7 +29,7 @@ __all__ = [
29
29
  ]
30
30
 
31
31
  # THIS LINE IS AUTOMATICALLY GENERATED BY setup.py
32
- __version__ = "0.1.0.dev251106"
32
+ __version__ = "0.1.0.dev251123"
33
33
 
34
34
  MINIMUM_SUPPORTED_VERSION = "2.5.0"
35
35
  SECURE_TORCH_VERSION = "2.6.0"
@@ -0,0 +1 @@
1
+ # DO NOT REMOVE THIS FILE
@@ -0,0 +1,176 @@
1
+ # Copyright IST-DASLab. 2025. (commit: 2d65066). GitHub repository.
2
+ # Retrieved from https://github.com/IST-DASLab/gptq. Licensed under the
3
+ # Apache License 2.0.
4
+
5
+ # Copyright (c) 2025 Samsung Electronics Co., Ltd. All Rights Reserved
6
+ #
7
+ # Licensed under the Apache License, Version 2.0 (the "License");
8
+ # you may not use this file except in compliance with the License.
9
+ # You may obtain a copy of the License at
10
+ #
11
+ # http://www.apache.org/licenses/LICENSE-2.0
12
+ #
13
+ # Unless required by applicable law or agreed to in writing, software
14
+ # distributed under the License is distributed on an "AS IS" BASIS,
15
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16
+ # See the License for the specific language governing permissions and
17
+ # limitations under the License.
18
+
19
+ # https://github.com/IST-DASLab/gptq/blob/2d65066/gptq.py
20
+
21
+ import math
22
+ import time
23
+ from typing import Optional
24
+
25
+ import torch
26
+ import torch.nn as nn
27
+
28
+ from tico.quantization.algorithm.gptq.quant import quantize, Quantizer
29
+
30
+
31
+ def iterate_GPTQ(scale, zero, maxq, W, Hinv, max_num_of_iters=50):
32
+
33
+ cur_weights = W.clone()
34
+ mults = torch.pow(torch.diag(Hinv), -1)
35
+ Hinv_U = torch.triu(Hinv, diagonal=1)
36
+
37
+ init_weights = W.clone()
38
+ for _ in range(max_num_of_iters):
39
+ cur_Q = quantize(cur_weights, scale, zero, maxq)
40
+
41
+ d_W = torch.mul((cur_weights - cur_Q), mults)
42
+ cur_weights = init_weights - torch.matmul(d_W, Hinv_U)
43
+ del d_W, cur_Q
44
+ d_W = cur_Q = None
45
+
46
+ del init_weights
47
+ init_weights = None
48
+
49
+ cur_Q = quantize(cur_weights, scale, zero, maxq)
50
+
51
+ return cur_Q, cur_weights
52
+
53
+
54
+ class FPI_GPTQ:
55
+ def __init__(self, layer):
56
+ self.layer = layer
57
+ self.dev = self.layer.weight.device
58
+ W = layer.weight.data.clone()
59
+ if isinstance(self.layer, nn.Conv2d):
60
+ W = W.flatten(1)
61
+
62
+ if isinstance(self.layer, nn.Conv1d):
63
+ W = W.t()
64
+ self.rows = W.shape[0]
65
+ self.columns = W.shape[1]
66
+ self.H: Optional[torch.Tensor] = torch.zeros(
67
+ (self.columns, self.columns), device=self.dev
68
+ )
69
+ self.nsamples = 0
70
+ self.quantizer: Quantizer = Quantizer()
71
+
72
+ def add_batch(self, inp, out):
73
+ if len(inp.shape) == 2:
74
+ inp = inp.unsqueeze(0)
75
+ tmp = inp.shape[0]
76
+ if isinstance(self.layer, nn.Linear) or isinstance(self.layer, nn.Conv1d):
77
+ if len(inp.shape) > 2:
78
+ inp = inp.reshape((-1, inp.shape[-1]))
79
+ inp = inp.t()
80
+ if isinstance(self.layer, nn.Conv2d):
81
+ unfold = nn.Unfold(
82
+ self.layer.kernel_size,
83
+ dilation=self.layer.dilation,
84
+ padding=self.layer.padding,
85
+ stride=self.layer.stride,
86
+ )
87
+
88
+ inp = unfold(inp)
89
+ inp = inp.permute([1, 0, 2])
90
+ inp = inp.flatten(1)
91
+
92
+ self.H *= self.nsamples / (self.nsamples + tmp)
93
+ self.nsamples += tmp
94
+ inp = math.sqrt(2 / self.nsamples) * inp.float()
95
+ self.H += inp.matmul(inp.t())
96
+
97
+ def fasterquant(
98
+ self,
99
+ percdamp=0.01,
100
+ verbose=False,
101
+ ):
102
+ W = self.layer.weight.data.clone()
103
+ if isinstance(self.layer, nn.Conv2d):
104
+ W = W.flatten(1)
105
+ if isinstance(self.layer, nn.Conv1d):
106
+ W = W.t()
107
+ W = W.float()
108
+ tick = time.time()
109
+ if not self.quantizer.ready():
110
+ self.quantizer.find_params(W, weight=True)
111
+
112
+ H = self.H
113
+ del self.H
114
+ assert isinstance(H, torch.Tensor)
115
+ dead = torch.diag(H) == 0
116
+ H[dead, dead] = 1
117
+ W[:, dead] = 0
118
+
119
+ # actorder
120
+ perm = torch.argsort(torch.diag(H), descending=True)
121
+ W = W[:, perm]
122
+ H = H[perm][:, perm]
123
+ invperm = torch.argsort(perm)
124
+
125
+ Q = torch.zeros_like(W)
126
+
127
+ damp = percdamp * torch.mean(torch.diag(H))
128
+ diag = torch.arange(self.columns, device=self.dev)
129
+ H[diag, diag] += damp
130
+ H = torch.linalg.cholesky(H)
131
+ assert isinstance(H, torch.Tensor)
132
+ H = torch.cholesky_inverse(H)
133
+ H = torch.linalg.cholesky(H, upper=True)
134
+ Hinv = H
135
+
136
+ Q, W = iterate_GPTQ(
137
+ self.quantizer.scale,
138
+ self.quantizer.zero,
139
+ self.quantizer.maxq,
140
+ W,
141
+ Hinv=Hinv,
142
+ max_num_of_iters=50,
143
+ )
144
+
145
+ if torch.cuda.is_available():
146
+ torch.cuda.synchronize()
147
+ if verbose:
148
+ print("time %.2f" % (time.time() - tick))
149
+ Losses = 0.5 * ((Q - W) / torch.diag(Hinv)) ** 2
150
+ print("error", torch.sum(Losses).item())
151
+
152
+ Q = Q[:, invperm]
153
+
154
+ if isinstance(self.layer, nn.Conv2d):
155
+ Q[:, dead] = quantize(
156
+ self.layer.weight.flatten(1)[:, dead],
157
+ self.quantizer.scale,
158
+ self.quantizer.zero,
159
+ self.quantizer.maxq,
160
+ )
161
+ else:
162
+ Q[:, dead] = quantize(
163
+ self.layer.weight[:, dead],
164
+ self.quantizer.scale,
165
+ self.quantizer.zero,
166
+ self.quantizer.maxq,
167
+ )
168
+
169
+ self.layer.weight.data = Q.reshape(self.layer.weight.shape).to(
170
+ self.layer.weight.data.dtype
171
+ )
172
+
173
+ def free(self):
174
+ self.H = None
175
+ if torch.cuda.is_available():
176
+ torch.cuda.empty_cache()
@@ -0,0 +1,179 @@
1
+ # Copyright (c) 2024 Intel Corporation
2
+ # Copyright (c) 2025 Samsung Electronics Co., Ltd. All Rights Reserved
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+
16
+ from typing import Any, Dict
17
+
18
+ import torch
19
+ from tqdm.auto import tqdm
20
+
21
+ from tico.quantization.algorithm.fpi_gptq.fpi_gptq import FPI_GPTQ
22
+ from tico.quantization.algorithm.gptq.quantizer import GPTQQuantizer
23
+ from tico.quantization.algorithm.gptq.utils import (
24
+ find_layers,
25
+ gather_single_batch_from_dict,
26
+ gather_single_batch_from_list,
27
+ )
28
+ from tico.quantization.config.fpi_gptq import FPIGPTQConfig
29
+ from tico.quantization.quantizer_registry import register_quantizer
30
+
31
+
32
+ @register_quantizer(FPIGPTQConfig)
33
+ class FPIGPTQQuantizer(GPTQQuantizer):
34
+ """
35
+ Quantizer for applying the Fixed Point Iteration GPTQ algorithm (FPIGPTQ)
36
+ This implementation expects the same steps as GPTQQuantizer.
37
+ It should produce results very close to reference GPTQ but much faster when running on cuda.
38
+ """
39
+
40
+ def __init__(self, config: FPIGPTQConfig):
41
+ super().__init__(config)
42
+
43
+ @torch.no_grad()
44
+ def convert(self, model):
45
+
46
+ # Restore original forwards (we no longer want to stop after first layer)
47
+ assert self._orig_model_forward is not None
48
+ model.forward = self._orig_model_forward
49
+ assert (
50
+ self._first_layer_ref is not None and self._orig_layer_forward is not None
51
+ )
52
+ self._first_layer_ref.forward = self._orig_layer_forward
53
+
54
+ gptq_conf = self.config
55
+ assert isinstance(gptq_conf, FPIGPTQConfig)
56
+ # Disable use_cache during calibration
57
+ if hasattr(model, "config") and hasattr(model.config, "use_cache"):
58
+ orig_use_cache = model.config.use_cache
59
+ model.config.use_cache = False
60
+ else:
61
+ orig_use_cache = None
62
+
63
+ # Identify layers
64
+ if hasattr(model, "model"):
65
+ target_layers = model.model.layers
66
+ else:
67
+ target_layers = [model]
68
+
69
+ quantizers: Dict[str, Any] = {}
70
+ for l_idx, layer in enumerate(
71
+ tqdm(
72
+ target_layers,
73
+ desc="Quantizing layers",
74
+ unit="layer",
75
+ disable=not gptq_conf.show_progress,
76
+ )
77
+ ):
78
+ # 1) Identify quantizable submodules within the layer
79
+ full = find_layers(layer, layers=[torch.nn.Linear, torch.nn.Conv2d])
80
+ # filter out depthwise convolutions and alike
81
+ full = {
82
+ key: full[key]
83
+ for key in full.keys()
84
+ if not isinstance(full[key], torch.nn.Conv2d) or full[key].groups == 1
85
+ }
86
+
87
+ sequential = [list(full.keys())]
88
+
89
+ # 2) Set up (as in GPTQ)
90
+ for names in sequential:
91
+ subset = {n: full[n] for n in names}
92
+
93
+ gptq: Dict[str, FPI_GPTQ] = {}
94
+ for name in subset:
95
+ gptq[name] = FPI_GPTQ(subset[name])
96
+ gptq[name].quantizer.configure(
97
+ bits=8, perchannel=True, sym=False, mse=False
98
+ )
99
+
100
+ # Hook to collect (inp, out) for GPTQ
101
+ def add_batch(name):
102
+ def _hook(_, inp, out):
103
+ gptq[name].add_batch(inp[0].data, out.data)
104
+
105
+ return _hook
106
+
107
+ handles = []
108
+ for name in subset:
109
+ handles.append(subset[name].register_forward_hook(add_batch(name)))
110
+
111
+ # Run layer forward over all cached batches to build Hessian/statistics
112
+ batch_num = self.num_batches
113
+ for batch_idx in tqdm(
114
+ range(batch_num),
115
+ desc=f"[L{l_idx}] collecting",
116
+ leave=False,
117
+ unit="batch",
118
+ disable=not gptq_conf.show_progress,
119
+ ):
120
+ cache_args_batch = gather_single_batch_from_list(
121
+ self.cache_args, batch_idx
122
+ )
123
+ cache_kwargs_batch = gather_single_batch_from_dict(
124
+ self.cache_kwargs, batch_idx
125
+ )
126
+ layer(*cache_args_batch, **cache_kwargs_batch)
127
+
128
+ # Remove handles
129
+ for h in handles:
130
+ h.remove()
131
+
132
+ # 3) Quantize each submodule
133
+ for name in subset:
134
+ if gptq_conf.verbose:
135
+ print(f"[Layer {l_idx}] {name} -> Quantizing ...")
136
+ gptq[name].fasterquant(
137
+ percdamp=0.01,
138
+ verbose=gptq_conf.verbose,
139
+ )
140
+ quantizers[f"model.layers.{l_idx}.{name}"] = gptq[name].quantizer
141
+ gptq[name].free()
142
+
143
+ # 4) After quantization, re-run the layer to produce outputs for the next layer
144
+ for batch_idx in tqdm(
145
+ range(batch_num),
146
+ desc=f"[L{l_idx}] re-forward",
147
+ leave=False,
148
+ unit="batch",
149
+ disable=not gptq_conf.show_progress,
150
+ ):
151
+ cache_args_batch = gather_single_batch_from_list(
152
+ self.cache_args, batch_idx
153
+ )
154
+ cache_kwargs_batch = gather_single_batch_from_dict(
155
+ self.cache_kwargs, batch_idx
156
+ )
157
+ outs = layer(*cache_args_batch, **cache_kwargs_batch)
158
+ # LLaMA's decoder layer return type differs across Transformers versions:
159
+ # some return a tuple (hidden_states, ...), others return just a tensor.
160
+ # This line ensures we always take the first element when it's a tuple.
161
+ outs = outs[0] if isinstance(outs, tuple) else outs
162
+ # Update inputs for next iteration.
163
+ self.cache_args[0][batch_idx] = outs
164
+
165
+ if torch.cuda.is_available():
166
+ torch.cuda.empty_cache()
167
+
168
+ # Restore the original cache configuration.
169
+ if orig_use_cache is not None:
170
+ model.config.use_cache = orig_use_cache
171
+
172
+ # Clear caches to free memory
173
+ self.cache_args.clear()
174
+ self.cache_kwargs.clear()
175
+ self.num_batches = 0
176
+
177
+ model.quantizers = quantizers
178
+
179
+ return model
@@ -181,6 +181,27 @@ class GPTQ:
181
181
  if actorder:
182
182
  Q = Q[:, invperm]
183
183
 
184
+ if isinstance(self.layer, nn.Conv2d):
185
+ if groupsize == -1: # TODO support groupsize != -1
186
+ Q[:, dead] = quantize(
187
+ self.layer.weight.flatten(1)[:, dead],
188
+ self.quantizer.scale,
189
+ self.quantizer.zero,
190
+ self.quantizer.maxq,
191
+ )
192
+ else:
193
+ if groupsize == -1: # TODO support groupsize != -1
194
+ Q[:, dead] = quantize(
195
+ self.layer.weight[:, dead],
196
+ self.quantizer.scale,
197
+ self.quantizer.zero,
198
+ self.quantizer.maxq,
199
+ )
200
+
201
+ assert (
202
+ groupsize == -1 or torch.sum(dead) == 0
203
+ ) # TODO `dead` elements should be RTN quantized for groupwise
204
+
184
205
  self.layer.weight.data = Q.reshape(self.layer.weight.shape).to(
185
206
  self.layer.weight.data.dtype
186
207
  )
@@ -193,7 +193,13 @@ class GPTQQuantizer(BaseQuantizer):
193
193
  )
194
194
  ):
195
195
  # 1) Identify quantizable submodules within the layer
196
- full = find_layers(layer)
196
+ full = find_layers(layer, layers=[torch.nn.Linear, torch.nn.Conv2d])
197
+ # filter out depthwise convolutions and alike
198
+ full = {
199
+ key: full[key]
200
+ for key in full.keys()
201
+ if not isinstance(full[key], torch.nn.Conv2d) or full[key].groups == 1
202
+ }
197
203
  sequential = [list(full.keys())]
198
204
 
199
205
  # 2) Set up GPTQ objects and gather stats
@@ -0,0 +1,29 @@
1
+ # Copyright (c) 2025 Samsung Electronics Co., Ltd. All Rights Reserved
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ from tico.quantization.config.gptq import GPTQConfig
16
+
17
+
18
+ class FPIGPTQConfig(GPTQConfig):
19
+ """
20
+ Configuration for FPIGPTQ (Fixed Point Iteration).
21
+ """
22
+
23
+ def __init__(self, verbose: bool = False, show_progress: bool = True):
24
+ self.verbose = verbose
25
+ self.show_progress = show_progress
26
+
27
+ @property
28
+ def name(self) -> str:
29
+ return "fpi_gptq"
@@ -12,7 +12,7 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
- from typing import Callable, Optional
15
+ from typing import Any, Optional
16
16
 
17
17
  import torch
18
18
  import torch.nn as nn
@@ -31,7 +31,7 @@ class QuantElementwise(QuantModuleBase):
31
31
  """
32
32
 
33
33
  # subclass must set this
34
- FUNC: Callable[[torch.Tensor], torch.Tensor] | None = None
34
+ FUNC: Any = None
35
35
 
36
36
  def __init_subclass__(cls, **kwargs):
37
37
  super().__init_subclass__(**kwargs)
@@ -68,7 +68,7 @@ class QuantElementwise(QuantModuleBase):
68
68
 
69
69
 
70
70
  """
71
- Why `FUNC` is a `staticmethod`
71
+ Q1) Why `FUNC` is a `staticmethod`
72
72
 
73
73
  - Prevents automatic binding: calling `self.FUNC(x)` will not inject `self`,
74
74
  so the callable keeps the expected signature `Tensor -> Tensor`
@@ -85,27 +85,67 @@ Why `FUNC` is a `staticmethod`
85
85
  than an `nn.Module` instance that would appear in the module tree.
86
86
 
87
87
  - Small perf/alloc win: no bound-method objects are created on each call.
88
+
89
+ Q2) Why we define small Python wrappers (_relu, _tanh, etc.)
90
+
91
+ - torch.relu / torch.tanh / torch.sigmoid are CPython built-ins.
92
+ Their type is `builtin_function_or_method`, not a Python `FunctionType`.
93
+ This causes `torch.export` (and FX tracing) to fail with:
94
+ "expected FunctionType, found builtin_function_or_method".
95
+
96
+ - By defining a thin Python wrapper (e.g., `def _tanh(x): return torch.tanh(x)`),
97
+ we convert it into a normal Python function object (`FunctionType`),
98
+ which satisfies export/tracing requirements.
99
+
100
+ - Functionally, this adds zero overhead and preserves semantics,
101
+ but makes the callable introspectable (has __code__, __name__, etc.)
102
+ and compatible with TorchDynamo / FX graph capture.
103
+
104
+ - It also keeps FUNC pure and stateless, ensuring the elementwise op
105
+ is represented as `call_function(_tanh)` in the traced graph
106
+ rather than a bound `call_method` or module attribute access.
88
107
  """
89
108
 
90
- # Sigmoid
109
+
110
+ def _relu(x: torch.Tensor) -> torch.Tensor:
111
+ return torch.relu(x)
112
+
113
+
114
+ def _tanh(x: torch.Tensor) -> torch.Tensor:
115
+ return torch.tanh(x)
116
+
117
+
118
+ def _sigmoid(x: torch.Tensor) -> torch.Tensor:
119
+ return torch.sigmoid(x)
120
+
121
+
122
+ def _gelu(x: torch.Tensor) -> torch.Tensor:
123
+ return torch.nn.functional.gelu(x)
124
+
125
+
91
126
  @register(nn.Sigmoid)
92
127
  class QuantSigmoid(QuantElementwise):
93
- FUNC = staticmethod(torch.sigmoid)
128
+ @staticmethod
129
+ def FUNC(x: torch.Tensor) -> torch.Tensor:
130
+ return _sigmoid(x)
94
131
 
95
132
 
96
- # Tanh
97
133
  @register(nn.Tanh)
98
134
  class QuantTanh(QuantElementwise):
99
- FUNC = staticmethod(torch.tanh)
135
+ @staticmethod
136
+ def FUNC(x: torch.Tensor) -> torch.Tensor:
137
+ return _tanh(x)
100
138
 
101
139
 
102
- # ReLU
103
140
  @register(nn.ReLU)
104
141
  class QuantReLU(QuantElementwise):
105
- FUNC = staticmethod(torch.relu)
142
+ @staticmethod
143
+ def FUNC(x: torch.Tensor) -> torch.Tensor:
144
+ return _relu(x)
106
145
 
107
146
 
108
- # GELU (approximate)
109
147
  @register(nn.GELU)
110
148
  class QuantGELU(QuantElementwise):
111
- FUNC = staticmethod(torch.nn.functional.gelu)
149
+ @staticmethod
150
+ def FUNC(x: torch.Tensor) -> torch.Tensor:
151
+ return _gelu(x)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: tico
3
- Version: 0.1.0.dev251106
3
+ Version: 0.1.0.dev251123
4
4
  Summary: Convert exported Torch module to circle
5
5
  Home-page: UNKNOWN
6
6
  License: UNKNOWN
@@ -1,4 +1,4 @@
1
- tico/__init__.py,sha256=zFizlBKVmXPeQuRPHlpsmWOePEoPowkPdrlRwoxPs4k,1883
1
+ tico/__init__.py,sha256=8a76TiclPFZrJ7BLy5dHGxBCtia-SHR-5HWcVqDiSE8,1883
2
2
  tico/pt2_to_circle.py,sha256=gu3MD4Iqc0zMZcCZ2IT8oGbyj21CTSbT3Rgd9s2B_9A,2767
3
3
  tico/config/__init__.py,sha256=xZzCXjZ84qE-CsBi-dfaL05bqpQ3stKKfTXhnrJRyVs,142
4
4
  tico/config/base.py,sha256=q5xMqGxTUZs4mFqt5c7i_y9U00fYgdMGl9nUqIVMlCo,1248
@@ -51,10 +51,13 @@ tico/quantization/public_interface.py,sha256=YlE4re0HkkEDcq8IeXhPJUtveLIiDjAlChL
51
51
  tico/quantization/quantizer.py,sha256=FYNiqUqoH9vz1bda0I6yuKqJi2KdIfLEBd4EgeC-_t4,2357
52
52
  tico/quantization/quantizer_registry.py,sha256=MxVE1_hj1p8FjdAqkLzUhdez3Cqc-V25k6XKOcTkei0,2414
53
53
  tico/quantization/algorithm/__init__.py,sha256=IO6FP_xYbGy0dW0HL26GXD3ouxARaxCK7bz9dn4blPQ,26
54
+ tico/quantization/algorithm/fpi_gptq/__init__.py,sha256=IO6FP_xYbGy0dW0HL26GXD3ouxARaxCK7bz9dn4blPQ,26
55
+ tico/quantization/algorithm/fpi_gptq/fpi_gptq.py,sha256=fzXA2JXoSYClfGxEUoOEUFwFcqAiqg-RqwrFYLOaBjU,5388
56
+ tico/quantization/algorithm/fpi_gptq/quantizer.py,sha256=O04V01CyA7eU_pV08R8KGTiOhThbKf955KwwzagN-S8,6873
54
57
  tico/quantization/algorithm/gptq/__init__.py,sha256=IO6FP_xYbGy0dW0HL26GXD3ouxARaxCK7bz9dn4blPQ,26
55
- tico/quantization/algorithm/gptq/gptq.py,sha256=x7wM9_OgOrcs6WmkVCDLn2bF7YuUAR_k6vLG2l593sk,6235
58
+ tico/quantization/algorithm/gptq/gptq.py,sha256=qXhVKjLEkg5DpOgRCExv3V-yy0yB1xTYauBBcK_yxRY,7035
56
59
  tico/quantization/algorithm/gptq/quant.py,sha256=Rl4wAOCmlE0U09BtNCDbccaSNohRHCNLwFi3zCqZfNo,5127
57
- tico/quantization/algorithm/gptq/quantizer.py,sha256=OvR9sHgosGYofwYcDhye84FBl55cNY7-UlfBt9gXbDY,11734
60
+ tico/quantization/algorithm/gptq/quantizer.py,sha256=Ios0lyhTfuClWgI0umbf0dIaWlhkKUs3GMVd0MPrJf0,12027
58
61
  tico/quantization/algorithm/gptq/utils.py,sha256=leGKayf-xbSjVwwAGTA5RsxUKrhDiklOQdlsLifjdrs,1811
59
62
  tico/quantization/algorithm/pt2e/__init__.py,sha256=IO6FP_xYbGy0dW0HL26GXD3ouxARaxCK7bz9dn4blPQ,26
60
63
  tico/quantization/algorithm/pt2e/quantizer.py,sha256=9K8SGwxi67DA8Hdwc_25ResJiSGLIMDkNyAwtQu3PGM,2673
@@ -83,6 +86,7 @@ tico/quantization/algorithm/smoothquant/quantizer.py,sha256=pvf6HwW7VzyNFhfEDGwG
83
86
  tico/quantization/algorithm/smoothquant/smooth_quant.py,sha256=fxCy4m-BsSjraciSVPFlPhgsOT46RjrOgczQGb7B9TA,11561
84
87
  tico/quantization/config/__init__.py,sha256=IO6FP_xYbGy0dW0HL26GXD3ouxARaxCK7bz9dn4blPQ,26
85
88
  tico/quantization/config/base.py,sha256=xg_HCDSuMgYvMd6ENZe4Sm2SYJgMaCBj4cmqaz_lhAs,816
89
+ tico/quantization/config/fpi_gptq.py,sha256=XoY9-56O13HUYVkawRUQ0-uOqIhq71q0jUAFsUtPZPI,994
86
90
  tico/quantization/config/gptq.py,sha256=O3NEPYMJdgMJQB--blw3WI8FGbK9nDlSqSo2ZHvNwb8,960
87
91
  tico/quantization/config/pt2e.py,sha256=vSfULljHEnypadUyo-zjVoPSbP8Y2eDzSD_kRTcv6bk,837
88
92
  tico/quantization/config/ptq.py,sha256=zbLQbuiEpO-qlDgyUYTZ3hkVxr3boq5TX0n0QTBHic4,4540
@@ -129,7 +133,7 @@ tico/quantization/wrapq/utils/metrics.py,sha256=ZnEQOd9fzDDxdXl32PFl3jMQv5ycz9nF
129
133
  tico/quantization/wrapq/utils/reduce_utils.py,sha256=3kWawLB91EcvvHlCrNqqfZF7tpgr22htBSA049mKw_4,973
130
134
  tico/quantization/wrapq/wrappers/__init__.py,sha256=IO6FP_xYbGy0dW0HL26GXD3ouxARaxCK7bz9dn4blPQ,26
131
135
  tico/quantization/wrapq/wrappers/ptq_wrapper.py,sha256=6zcVZ-vVhPCvFHQw6UlN7iizElrIHNkpAraeMaA0DDU,2388
132
- tico/quantization/wrapq/wrappers/quant_elementwise.py,sha256=trchhUknmZTcoCwVA62uzBP_mWuCjjuZjF0jb7TZpfA,3550
136
+ tico/quantization/wrapq/wrappers/quant_elementwise.py,sha256=2mpDljmROSIG3DI4TpNuy6gVEZ294aT1rKww-ZSI96o,4880
133
137
  tico/quantization/wrapq/wrappers/quant_module_base.py,sha256=SgyUlFYxDx39CAvcN2q4lsTedbEVPmetIigrllmvvD4,5915
134
138
  tico/quantization/wrapq/wrappers/registry.py,sha256=QJcOD9gEGB_DJowdTTqemcRDcYxQa4tHv2CDFgZDnA0,5168
135
139
  tico/quantization/wrapq/wrappers/fairseq/__init__.py,sha256=K4R7rbxHosx9LBLk2WKlL8gFuZTYTws41TW47AsSUPM,149
@@ -263,9 +267,9 @@ tico/utils/mx/__init__.py,sha256=IO6FP_xYbGy0dW0HL26GXD3ouxARaxCK7bz9dn4blPQ,26
263
267
  tico/utils/mx/elemwise_ops.py,sha256=V6glyAHsVR1joqpsgnNytatCD_ew92xNWZ19UFDoMTA,10281
264
268
  tico/utils/mx/formats.py,sha256=uzNWyu-1onUlwQfX5cZ6fZSUfHMRqorper7_T1k3jfk,3404
265
269
  tico/utils/mx/mx_ops.py,sha256=RcfUTYVi-wilGB2sC35OeARdwDqnixv7dG5iyZ-fQT8,8555
266
- tico-0.1.0.dev251106.dist-info/LICENSE,sha256=kp4JLII7bzRhPb0CPD5XTDZMh22BQ7h3k3B7t8TiSbw,12644
267
- tico-0.1.0.dev251106.dist-info/METADATA,sha256=cH9ZYH9ysDRREwJcSjMV1VkLFpNjoOSxPBChSzab-A0,9730
268
- tico-0.1.0.dev251106.dist-info/WHEEL,sha256=G16H4A3IeoQmnOrYV4ueZGKSjhipXx8zc8nu9FGlvMA,92
269
- tico-0.1.0.dev251106.dist-info/entry_points.txt,sha256=kBKYSS_IYrSXmUYevmmepqIVPScq5vF8ulQRu3I_Zf0,59
270
- tico-0.1.0.dev251106.dist-info/top_level.txt,sha256=oqs7UPoNSKZEwqsX8B-KAWdQwfAa7i60pbxW_Jk7P3w,5
271
- tico-0.1.0.dev251106.dist-info/RECORD,,
270
+ tico-0.1.0.dev251123.dist-info/LICENSE,sha256=kp4JLII7bzRhPb0CPD5XTDZMh22BQ7h3k3B7t8TiSbw,12644
271
+ tico-0.1.0.dev251123.dist-info/METADATA,sha256=IcEcsIP6XwyEOQnEe3qnmsaiiu8uaudtL97cuoZbXYk,9730
272
+ tico-0.1.0.dev251123.dist-info/WHEEL,sha256=G16H4A3IeoQmnOrYV4ueZGKSjhipXx8zc8nu9FGlvMA,92
273
+ tico-0.1.0.dev251123.dist-info/entry_points.txt,sha256=kBKYSS_IYrSXmUYevmmepqIVPScq5vF8ulQRu3I_Zf0,59
274
+ tico-0.1.0.dev251123.dist-info/top_level.txt,sha256=oqs7UPoNSKZEwqsX8B-KAWdQwfAa7i60pbxW_Jk7P3w,5
275
+ tico-0.1.0.dev251123.dist-info/RECORD,,