tico 0.1.0.dev251030__py3-none-any.whl → 0.1.0.dev251110__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of tico might be problematic. Click here for more details.

tico/__init__.py CHANGED
@@ -29,7 +29,7 @@ __all__ = [
29
29
  ]
30
30
 
31
31
  # THIS LINE IS AUTOMATICALLY GENERATED BY setup.py
32
- __version__ = "0.1.0.dev251030"
32
+ __version__ = "0.1.0.dev251110"
33
33
 
34
34
  MINIMUM_SUPPORTED_VERSION = "2.5.0"
35
35
  SECURE_TORCH_VERSION = "2.6.0"
@@ -0,0 +1 @@
1
+ # DO NOT REMOVE THIS FILE
@@ -0,0 +1,161 @@
1
+ # Copyright IST-DASLab. 2025. (commit: 2d65066). GitHub repository.
2
+ # Retrieved from https://github.com/IST-DASLab/gptq. Licensed under the
3
+ # Apache License 2.0.
4
+
5
+ # Copyright (c) 2025 Samsung Electronics Co., Ltd. All Rights Reserved
6
+ #
7
+ # Licensed under the Apache License, Version 2.0 (the "License");
8
+ # you may not use this file except in compliance with the License.
9
+ # You may obtain a copy of the License at
10
+ #
11
+ # http://www.apache.org/licenses/LICENSE-2.0
12
+ #
13
+ # Unless required by applicable law or agreed to in writing, software
14
+ # distributed under the License is distributed on an "AS IS" BASIS,
15
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16
+ # See the License for the specific language governing permissions and
17
+ # limitations under the License.
18
+
19
+ # https://github.com/IST-DASLab/gptq/blob/2d65066/gptq.py
20
+
21
+ import math
22
+ import time
23
+ from typing import Optional
24
+
25
+ import torch
26
+ import torch.nn as nn
27
+
28
+ from tico.quantization.algorithm.gptq.quant import quantize, Quantizer
29
+
30
+
31
+ def iterate_GPTQ(scale, zero, maxq, W, Hinv, max_num_of_iters=50):
32
+
33
+ cur_weights = W.clone()
34
+ mults = torch.pow(torch.diag(Hinv), -1)
35
+ Hinv_U = torch.triu(Hinv, diagonal=1)
36
+
37
+ init_weights = W.clone()
38
+ for _ in range(max_num_of_iters):
39
+ cur_Q = quantize(cur_weights, scale, zero, maxq)
40
+
41
+ d_W = torch.mul((cur_weights - cur_Q), mults)
42
+ cur_weights = init_weights - torch.matmul(d_W, Hinv_U)
43
+ del d_W, cur_Q
44
+ d_W = cur_Q = None
45
+
46
+ del init_weights
47
+ init_weights = None
48
+
49
+ cur_Q = quantize(cur_weights, scale, zero, maxq)
50
+
51
+ return cur_Q, cur_weights
52
+
53
+
54
+ class FPI_GPTQ:
55
+ def __init__(self, layer):
56
+ self.layer = layer
57
+ self.dev = self.layer.weight.device
58
+ W = layer.weight.data.clone()
59
+ if isinstance(self.layer, nn.Conv2d):
60
+ W = W.flatten(1)
61
+
62
+ if isinstance(self.layer, nn.Conv1d):
63
+ W = W.t()
64
+ self.rows = W.shape[0]
65
+ self.columns = W.shape[1]
66
+ self.H: Optional[torch.Tensor] = torch.zeros(
67
+ (self.columns, self.columns), device=self.dev
68
+ )
69
+ self.nsamples = 0
70
+ self.quantizer: Quantizer = Quantizer()
71
+
72
+ def add_batch(self, inp, out):
73
+ if len(inp.shape) == 2:
74
+ inp = inp.unsqueeze(0)
75
+ tmp = inp.shape[0]
76
+ if isinstance(self.layer, nn.Linear) or isinstance(self.layer, nn.Conv1d):
77
+ if len(inp.shape) > 2:
78
+ inp = inp.reshape((-1, inp.shape[-1]))
79
+ inp = inp.t()
80
+ if isinstance(self.layer, nn.Conv2d):
81
+ unfold = nn.Unfold(
82
+ self.layer.kernel_size,
83
+ dilation=self.layer.dilation,
84
+ padding=self.layer.padding,
85
+ stride=self.layer.stride,
86
+ )
87
+
88
+ inp = unfold(inp)
89
+ inp = inp.permute([1, 0, 2])
90
+ inp = inp.flatten(1)
91
+
92
+ self.H *= self.nsamples / (self.nsamples + tmp)
93
+ self.nsamples += tmp
94
+ inp = math.sqrt(2 / self.nsamples) * inp.float()
95
+ self.H += inp.matmul(inp.t())
96
+
97
+ def fasterquant(
98
+ self,
99
+ percdamp=0.01,
100
+ verbose=False,
101
+ ):
102
+ W = self.layer.weight.data.clone()
103
+ if isinstance(self.layer, nn.Conv2d):
104
+ W = W.flatten(1)
105
+ if isinstance(self.layer, nn.Conv1d):
106
+ W = W.t()
107
+ W = W.float()
108
+ tick = time.time()
109
+ if not self.quantizer.ready():
110
+ self.quantizer.find_params(W, weight=True)
111
+
112
+ H = self.H
113
+ del self.H
114
+ assert isinstance(H, torch.Tensor)
115
+ dead = torch.diag(H) == 0
116
+ H[dead, dead] = 1
117
+ W[:, dead] = 0
118
+
119
+ # actorder
120
+ perm = torch.argsort(torch.diag(H), descending=True)
121
+ W = W[:, perm]
122
+ H = H[perm][:, perm]
123
+ invperm = torch.argsort(perm)
124
+
125
+ Q = torch.zeros_like(W)
126
+
127
+ damp = percdamp * torch.mean(torch.diag(H))
128
+ diag = torch.arange(self.columns, device=self.dev)
129
+ H[diag, diag] += damp
130
+ H = torch.linalg.cholesky(H)
131
+ assert isinstance(H, torch.Tensor)
132
+ H = torch.cholesky_inverse(H)
133
+ H = torch.linalg.cholesky(H, upper=True)
134
+ Hinv = H
135
+
136
+ Q, W = iterate_GPTQ(
137
+ self.quantizer.scale,
138
+ self.quantizer.zero,
139
+ self.quantizer.maxq,
140
+ W,
141
+ Hinv=Hinv,
142
+ max_num_of_iters=50,
143
+ )
144
+
145
+ if torch.cuda.is_available():
146
+ torch.cuda.synchronize()
147
+ if verbose:
148
+ print("time %.2f" % (time.time() - tick))
149
+ Losses = 0.5 * ((Q - W) / torch.diag(Hinv)) ** 2
150
+ print("error", torch.sum(Losses).item())
151
+
152
+ Q = Q[:, invperm]
153
+
154
+ self.layer.weight.data = Q.reshape(self.layer.weight.shape).to(
155
+ self.layer.weight.data.dtype
156
+ )
157
+
158
+ def free(self):
159
+ self.H = None
160
+ if torch.cuda.is_available():
161
+ torch.cuda.empty_cache()
@@ -0,0 +1,179 @@
1
+ # Copyright (c) 2024 Intel Corporation
2
+ # Copyright (c) 2025 Samsung Electronics Co., Ltd. All Rights Reserved
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+
16
+ from typing import Any, Dict
17
+
18
+ import torch
19
+ from tqdm.auto import tqdm
20
+
21
+ from tico.quantization.algorithm.fpi_gptq.fpi_gptq import FPI_GPTQ
22
+ from tico.quantization.algorithm.gptq.quantizer import GPTQQuantizer
23
+ from tico.quantization.algorithm.gptq.utils import (
24
+ find_layers,
25
+ gather_single_batch_from_dict,
26
+ gather_single_batch_from_list,
27
+ )
28
+ from tico.quantization.config.fpi_gptq import FPIGPTQConfig
29
+ from tico.quantization.quantizer_registry import register_quantizer
30
+
31
+
32
+ @register_quantizer(FPIGPTQConfig)
33
+ class FPIGPTQQuantizer(GPTQQuantizer):
34
+ """
35
+ Quantizer for applying the Fixed Point Iteration GPTQ algorithm (FPIGPTQ)
36
+ This implementation expects the same steps as GPTQQuantizer.
37
+ It should produce results very close to reference GPTQ but much faster when running on cuda.
38
+ """
39
+
40
+ def __init__(self, config: FPIGPTQConfig):
41
+ super().__init__(config)
42
+
43
+ @torch.no_grad()
44
+ def convert(self, model):
45
+
46
+ # Restore original forwards (we no longer want to stop after first layer)
47
+ assert self._orig_model_forward is not None
48
+ model.forward = self._orig_model_forward
49
+ assert (
50
+ self._first_layer_ref is not None and self._orig_layer_forward is not None
51
+ )
52
+ self._first_layer_ref.forward = self._orig_layer_forward
53
+
54
+ gptq_conf = self.config
55
+ assert isinstance(gptq_conf, FPIGPTQConfig)
56
+ # Disable use_cache during calibration
57
+ if hasattr(model, "config") and hasattr(model.config, "use_cache"):
58
+ orig_use_cache = model.config.use_cache
59
+ model.config.use_cache = False
60
+ else:
61
+ orig_use_cache = None
62
+
63
+ # Identify layers
64
+ if hasattr(model, "model"):
65
+ target_layers = model.model.layers
66
+ else:
67
+ target_layers = [model]
68
+
69
+ quantizers: Dict[str, Any] = {}
70
+ for l_idx, layer in enumerate(
71
+ tqdm(
72
+ target_layers,
73
+ desc="Quantizing layers",
74
+ unit="layer",
75
+ disable=not gptq_conf.show_progress,
76
+ )
77
+ ):
78
+ # 1) Identify quantizable submodules within the layer
79
+ full = find_layers(layer, layers=[torch.nn.Linear, torch.nn.Conv2d])
80
+ # filter out depthwise convolutions and alike
81
+ full = {
82
+ key: full[key]
83
+ for key in full.keys()
84
+ if not isinstance(full[key], torch.nn.Conv2d) or full[key].groups == 1
85
+ }
86
+
87
+ sequential = [list(full.keys())]
88
+
89
+ # 2) Set up (as in GPTQ)
90
+ for names in sequential:
91
+ subset = {n: full[n] for n in names}
92
+
93
+ gptq: Dict[str, FPI_GPTQ] = {}
94
+ for name in subset:
95
+ gptq[name] = FPI_GPTQ(subset[name])
96
+ gptq[name].quantizer.configure(
97
+ bits=8, perchannel=True, sym=False, mse=False
98
+ )
99
+
100
+ # Hook to collect (inp, out) for GPTQ
101
+ def add_batch(name):
102
+ def _hook(_, inp, out):
103
+ gptq[name].add_batch(inp[0].data, out.data)
104
+
105
+ return _hook
106
+
107
+ handles = []
108
+ for name in subset:
109
+ handles.append(subset[name].register_forward_hook(add_batch(name)))
110
+
111
+ # Run layer forward over all cached batches to build Hessian/statistics
112
+ batch_num = self.num_batches
113
+ for batch_idx in tqdm(
114
+ range(batch_num),
115
+ desc=f"[L{l_idx}] collecting",
116
+ leave=False,
117
+ unit="batch",
118
+ disable=not gptq_conf.show_progress,
119
+ ):
120
+ cache_args_batch = gather_single_batch_from_list(
121
+ self.cache_args, batch_idx
122
+ )
123
+ cache_kwargs_batch = gather_single_batch_from_dict(
124
+ self.cache_kwargs, batch_idx
125
+ )
126
+ layer(*cache_args_batch, **cache_kwargs_batch)
127
+
128
+ # Remove handles
129
+ for h in handles:
130
+ h.remove()
131
+
132
+ # 3) Quantize each submodule
133
+ for name in subset:
134
+ if gptq_conf.verbose:
135
+ print(f"[Layer {l_idx}] {name} -> Quantizing ...")
136
+ gptq[name].fasterquant(
137
+ percdamp=0.01,
138
+ verbose=gptq_conf.verbose,
139
+ )
140
+ quantizers[f"model.layers.{l_idx}.{name}"] = gptq[name].quantizer
141
+ gptq[name].free()
142
+
143
+ # 4) After quantization, re-run the layer to produce outputs for the next layer
144
+ for batch_idx in tqdm(
145
+ range(batch_num),
146
+ desc=f"[L{l_idx}] re-forward",
147
+ leave=False,
148
+ unit="batch",
149
+ disable=not gptq_conf.show_progress,
150
+ ):
151
+ cache_args_batch = gather_single_batch_from_list(
152
+ self.cache_args, batch_idx
153
+ )
154
+ cache_kwargs_batch = gather_single_batch_from_dict(
155
+ self.cache_kwargs, batch_idx
156
+ )
157
+ outs = layer(*cache_args_batch, **cache_kwargs_batch)
158
+ # LLaMA's decoder layer return type differs across Transformers versions:
159
+ # some return a tuple (hidden_states, ...), others return just a tensor.
160
+ # This line ensures we always take the first element when it's a tuple.
161
+ outs = outs[0] if isinstance(outs, tuple) else outs
162
+ # Update inputs for next iteration.
163
+ self.cache_args[0][batch_idx] = outs
164
+
165
+ if torch.cuda.is_available():
166
+ torch.cuda.empty_cache()
167
+
168
+ # Restore the original cache configuration.
169
+ if orig_use_cache is not None:
170
+ model.config.use_cache = orig_use_cache
171
+
172
+ # Clear caches to free memory
173
+ self.cache_args.clear()
174
+ self.cache_kwargs.clear()
175
+ self.num_batches = 0
176
+
177
+ model.quantizers = quantizers
178
+
179
+ return model
@@ -36,6 +36,11 @@ class GPTQ:
36
36
  self.layer = layer
37
37
  self.dev = self.layer.weight.device
38
38
  W = layer.weight.data.clone()
39
+ if isinstance(self.layer, nn.Conv2d):
40
+ W = W.flatten(1)
41
+
42
+ if isinstance(self.layer, nn.Conv1d):
43
+ W = W.t()
39
44
  self.rows = W.shape[0]
40
45
  self.columns = W.shape[1]
41
46
  self.H: Optional[torch.Tensor] = torch.zeros(
@@ -48,10 +53,22 @@ class GPTQ:
48
53
  if len(inp.shape) == 2:
49
54
  inp = inp.unsqueeze(0)
50
55
  tmp = inp.shape[0]
51
- if isinstance(self.layer, nn.Linear):
52
- if len(inp.shape) == 3:
56
+ if isinstance(self.layer, nn.Linear) or isinstance(self.layer, nn.Conv1d):
57
+ if len(inp.shape) > 2:
53
58
  inp = inp.reshape((-1, inp.shape[-1]))
54
59
  inp = inp.t()
60
+ if isinstance(self.layer, nn.Conv2d):
61
+ unfold = nn.Unfold(
62
+ self.layer.kernel_size,
63
+ dilation=self.layer.dilation,
64
+ padding=self.layer.padding,
65
+ stride=self.layer.stride,
66
+ )
67
+
68
+ inp = unfold(inp)
69
+ inp = inp.permute([1, 0, 2])
70
+ inp = inp.flatten(1)
71
+
55
72
  self.H *= self.nsamples / (self.nsamples + tmp)
56
73
  self.nsamples += tmp
57
74
  inp = math.sqrt(2 / self.nsamples) * inp.float()
@@ -67,6 +84,10 @@ class GPTQ:
67
84
  verbose=False,
68
85
  ):
69
86
  W = self.layer.weight.data.clone()
87
+ if isinstance(self.layer, nn.Conv2d):
88
+ W = W.flatten(1)
89
+ if isinstance(self.layer, nn.Conv1d):
90
+ W = W.t()
70
91
  W = W.float()
71
92
  tick = time.time()
72
93
  if not self.quantizer.ready():
@@ -193,7 +193,13 @@ class GPTQQuantizer(BaseQuantizer):
193
193
  )
194
194
  ):
195
195
  # 1) Identify quantizable submodules within the layer
196
- full = find_layers(layer)
196
+ full = find_layers(layer, layers=[torch.nn.Linear, torch.nn.Conv2d])
197
+ # filter out depthwise convolutions and alike
198
+ full = {
199
+ key: full[key]
200
+ for key in full.keys()
201
+ if not isinstance(full[key], torch.nn.Conv2d) or full[key].groups == 1
202
+ }
197
203
  sequential = [list(full.keys())]
198
204
 
199
205
  # 2) Set up GPTQ objects and gather stats
@@ -0,0 +1,29 @@
1
+ # Copyright (c) 2025 Samsung Electronics Co., Ltd. All Rights Reserved
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ from tico.quantization.config.gptq import GPTQConfig
16
+
17
+
18
+ class FPIGPTQConfig(GPTQConfig):
19
+ """
20
+ Configuration for FPIGPTQ (Fixed Point Iteration).
21
+ """
22
+
23
+ def __init__(self, verbose: bool = False, show_progress: bool = True):
24
+ self.verbose = verbose
25
+ self.show_progress = show_progress
26
+
27
+ @property
28
+ def name(self) -> str:
29
+ return "fpi_gptq"
@@ -19,14 +19,15 @@ import torch.nn as nn
19
19
 
20
20
  from tico.quantization.config.ptq import PTQConfig
21
21
  from tico.quantization.wrapq.wrappers.quant_module_base import QuantModuleBase
22
- from tico.quantization.wrapq.wrappers.registry import register
22
+ from tico.quantization.wrapq.wrappers.registry import try_register
23
23
 
24
24
 
25
- @register(nn.SiLU)
25
+ @try_register("torch.nn.SiLU", "transformers.activations.SiLUActivation")
26
26
  class QuantSiLU(QuantModuleBase):
27
27
  """
28
- QuantSiLU — drop-in replacement for nn.SiLU that quantizes
29
- both intermediate tensors:
28
+ QuantSiLU — drop-in quantized implementation of the SiLU operation.
29
+
30
+ This module quantizes both intermediate tensors:
30
31
  • s = sigmoid(x) (logistic)
31
32
  • y = x * s (mul)
32
33
  """
@@ -12,7 +12,7 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
- from typing import Callable, Optional
15
+ from typing import Any, Optional
16
16
 
17
17
  import torch
18
18
  import torch.nn as nn
@@ -31,7 +31,7 @@ class QuantElementwise(QuantModuleBase):
31
31
  """
32
32
 
33
33
  # subclass must set this
34
- FUNC: Callable[[torch.Tensor], torch.Tensor] | None = None
34
+ FUNC: Any = None
35
35
 
36
36
  def __init_subclass__(cls, **kwargs):
37
37
  super().__init_subclass__(**kwargs)
@@ -68,7 +68,7 @@ class QuantElementwise(QuantModuleBase):
68
68
 
69
69
 
70
70
  """
71
- Why `FUNC` is a `staticmethod`
71
+ Q1) Why `FUNC` is a `staticmethod`
72
72
 
73
73
  - Prevents automatic binding: calling `self.FUNC(x)` will not inject `self`,
74
74
  so the callable keeps the expected signature `Tensor -> Tensor`
@@ -85,27 +85,67 @@ Why `FUNC` is a `staticmethod`
85
85
  than an `nn.Module` instance that would appear in the module tree.
86
86
 
87
87
  - Small perf/alloc win: no bound-method objects are created on each call.
88
+
89
+ Q2) Why we define small Python wrappers (_relu, _tanh, etc.)
90
+
91
+ - torch.relu / torch.tanh / torch.sigmoid are CPython built-ins.
92
+ Their type is `builtin_function_or_method`, not a Python `FunctionType`.
93
+ This causes `torch.export` (and FX tracing) to fail with:
94
+ "expected FunctionType, found builtin_function_or_method".
95
+
96
+ - By defining a thin Python wrapper (e.g., `def _tanh(x): return torch.tanh(x)`),
97
+ we convert it into a normal Python function object (`FunctionType`),
98
+ which satisfies export/tracing requirements.
99
+
100
+ - Functionally, this adds zero overhead and preserves semantics,
101
+ but makes the callable introspectable (has __code__, __name__, etc.)
102
+ and compatible with TorchDynamo / FX graph capture.
103
+
104
+ - It also keeps FUNC pure and stateless, ensuring the elementwise op
105
+ is represented as `call_function(_tanh)` in the traced graph
106
+ rather than a bound `call_method` or module attribute access.
88
107
  """
89
108
 
90
- # Sigmoid
109
+
110
+ def _relu(x: torch.Tensor) -> torch.Tensor:
111
+ return torch.relu(x)
112
+
113
+
114
+ def _tanh(x: torch.Tensor) -> torch.Tensor:
115
+ return torch.tanh(x)
116
+
117
+
118
+ def _sigmoid(x: torch.Tensor) -> torch.Tensor:
119
+ return torch.sigmoid(x)
120
+
121
+
122
+ def _gelu(x: torch.Tensor) -> torch.Tensor:
123
+ return torch.nn.functional.gelu(x)
124
+
125
+
91
126
  @register(nn.Sigmoid)
92
127
  class QuantSigmoid(QuantElementwise):
93
- FUNC = staticmethod(torch.sigmoid)
128
+ @staticmethod
129
+ def FUNC(x: torch.Tensor) -> torch.Tensor:
130
+ return _sigmoid(x)
94
131
 
95
132
 
96
- # Tanh
97
133
  @register(nn.Tanh)
98
134
  class QuantTanh(QuantElementwise):
99
- FUNC = staticmethod(torch.tanh)
135
+ @staticmethod
136
+ def FUNC(x: torch.Tensor) -> torch.Tensor:
137
+ return _tanh(x)
100
138
 
101
139
 
102
- # ReLU
103
140
  @register(nn.ReLU)
104
141
  class QuantReLU(QuantElementwise):
105
- FUNC = staticmethod(torch.relu)
142
+ @staticmethod
143
+ def FUNC(x: torch.Tensor) -> torch.Tensor:
144
+ return _relu(x)
106
145
 
107
146
 
108
- # GELU (approximate)
109
147
  @register(nn.GELU)
110
148
  class QuantGELU(QuantElementwise):
111
- FUNC = staticmethod(torch.nn.functional.gelu)
149
+ @staticmethod
150
+ def FUNC(x: torch.Tensor) -> torch.Tensor:
151
+ return _gelu(x)
@@ -23,14 +23,17 @@ _WRAPPERS: Dict[Type[nn.Module], Type[QuantModuleBase]] = {}
23
23
  _IMPORT_ONCE = False
24
24
  _CORE_MODULES = (
25
25
  "tico.quantization.wrapq.wrappers.quant_elementwise",
26
+ ## nn ##
26
27
  "tico.quantization.wrapq.wrappers.nn.quant_layernorm",
27
28
  "tico.quantization.wrapq.wrappers.nn.quant_linear",
29
+ # This includes not only `nn.SiLU` but also `SiLUActivation` from transformers
30
+ # as they are same operation.
28
31
  "tico.quantization.wrapq.wrappers.nn.quant_silu",
29
- # llama
32
+ ## llama ##
30
33
  "tico.quantization.wrapq.wrappers.llama.quant_attn",
31
34
  "tico.quantization.wrapq.wrappers.llama.quant_decoder_layer",
32
35
  "tico.quantization.wrapq.wrappers.llama.quant_mlp",
33
- # fairseq
36
+ ## fairseq ##
34
37
  "tico.quantization.wrapq.wrappers.fairseq.quant_decoder_layer",
35
38
  "tico.quantization.wrapq.wrappers.fairseq.quant_encoder",
36
39
  "tico.quantization.wrapq.wrappers.fairseq.quant_encoder_layer",
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: tico
3
- Version: 0.1.0.dev251030
3
+ Version: 0.1.0.dev251110
4
4
  Summary: Convert exported Torch module to circle
5
5
  Home-page: UNKNOWN
6
6
  License: UNKNOWN
@@ -319,6 +319,9 @@ If you want to test them locally, you can do so by navigating to each model dire
319
319
  $ pip install -r test/modules/model/<model_name>/requirements.txt
320
320
  # Run test for a single model
321
321
  $ ./ccex test -m <model_name>
322
+ # Run models whose names contain "Llama" (e.g., Llama, LlamaDecoderLayer, LlamaWithGQA, etc.)
323
+ # Note that you should use quotes for the wildcard(*) pattern
324
+ $ ./ccex test -m "Llama*"
322
325
  ```
323
326
 
324
327
  For example, to run a single model
@@ -1,4 +1,4 @@
1
- tico/__init__.py,sha256=dAU9qv22Efzxz-X4kM1bjCz0qu8tD4CKeWLTByNx-yo,1883
1
+ tico/__init__.py,sha256=yv29XjNou3kzy3LQIjZ859r_tOdEV8yXazLencxPkWw,1883
2
2
  tico/pt2_to_circle.py,sha256=gu3MD4Iqc0zMZcCZ2IT8oGbyj21CTSbT3Rgd9s2B_9A,2767
3
3
  tico/config/__init__.py,sha256=xZzCXjZ84qE-CsBi-dfaL05bqpQ3stKKfTXhnrJRyVs,142
4
4
  tico/config/base.py,sha256=q5xMqGxTUZs4mFqt5c7i_y9U00fYgdMGl9nUqIVMlCo,1248
@@ -51,10 +51,13 @@ tico/quantization/public_interface.py,sha256=YlE4re0HkkEDcq8IeXhPJUtveLIiDjAlChL
51
51
  tico/quantization/quantizer.py,sha256=FYNiqUqoH9vz1bda0I6yuKqJi2KdIfLEBd4EgeC-_t4,2357
52
52
  tico/quantization/quantizer_registry.py,sha256=MxVE1_hj1p8FjdAqkLzUhdez3Cqc-V25k6XKOcTkei0,2414
53
53
  tico/quantization/algorithm/__init__.py,sha256=IO6FP_xYbGy0dW0HL26GXD3ouxARaxCK7bz9dn4blPQ,26
54
+ tico/quantization/algorithm/fpi_gptq/__init__.py,sha256=IO6FP_xYbGy0dW0HL26GXD3ouxARaxCK7bz9dn4blPQ,26
55
+ tico/quantization/algorithm/fpi_gptq/fpi_gptq.py,sha256=jCbrgH65h22Z5Mjr5raK5U1Vuosn7Cg8MUHFC87aez0,4906
56
+ tico/quantization/algorithm/fpi_gptq/quantizer.py,sha256=O04V01CyA7eU_pV08R8KGTiOhThbKf955KwwzagN-S8,6873
54
57
  tico/quantization/algorithm/gptq/__init__.py,sha256=IO6FP_xYbGy0dW0HL26GXD3ouxARaxCK7bz9dn4blPQ,26
55
- tico/quantization/algorithm/gptq/gptq.py,sha256=HkuKv_UWs0xEdbj7zEP-65QPEtI_varmvAORFstyTic,5542
58
+ tico/quantization/algorithm/gptq/gptq.py,sha256=x7wM9_OgOrcs6WmkVCDLn2bF7YuUAR_k6vLG2l593sk,6235
56
59
  tico/quantization/algorithm/gptq/quant.py,sha256=Rl4wAOCmlE0U09BtNCDbccaSNohRHCNLwFi3zCqZfNo,5127
57
- tico/quantization/algorithm/gptq/quantizer.py,sha256=OvR9sHgosGYofwYcDhye84FBl55cNY7-UlfBt9gXbDY,11734
60
+ tico/quantization/algorithm/gptq/quantizer.py,sha256=Ios0lyhTfuClWgI0umbf0dIaWlhkKUs3GMVd0MPrJf0,12027
58
61
  tico/quantization/algorithm/gptq/utils.py,sha256=leGKayf-xbSjVwwAGTA5RsxUKrhDiklOQdlsLifjdrs,1811
59
62
  tico/quantization/algorithm/pt2e/__init__.py,sha256=IO6FP_xYbGy0dW0HL26GXD3ouxARaxCK7bz9dn4blPQ,26
60
63
  tico/quantization/algorithm/pt2e/quantizer.py,sha256=9K8SGwxi67DA8Hdwc_25ResJiSGLIMDkNyAwtQu3PGM,2673
@@ -83,6 +86,7 @@ tico/quantization/algorithm/smoothquant/quantizer.py,sha256=pvf6HwW7VzyNFhfEDGwG
83
86
  tico/quantization/algorithm/smoothquant/smooth_quant.py,sha256=fxCy4m-BsSjraciSVPFlPhgsOT46RjrOgczQGb7B9TA,11561
84
87
  tico/quantization/config/__init__.py,sha256=IO6FP_xYbGy0dW0HL26GXD3ouxARaxCK7bz9dn4blPQ,26
85
88
  tico/quantization/config/base.py,sha256=xg_HCDSuMgYvMd6ENZe4Sm2SYJgMaCBj4cmqaz_lhAs,816
89
+ tico/quantization/config/fpi_gptq.py,sha256=XoY9-56O13HUYVkawRUQ0-uOqIhq71q0jUAFsUtPZPI,994
86
90
  tico/quantization/config/gptq.py,sha256=O3NEPYMJdgMJQB--blw3WI8FGbK9nDlSqSo2ZHvNwb8,960
87
91
  tico/quantization/config/pt2e.py,sha256=vSfULljHEnypadUyo-zjVoPSbP8Y2eDzSD_kRTcv6bk,837
88
92
  tico/quantization/config/ptq.py,sha256=zbLQbuiEpO-qlDgyUYTZ3hkVxr3boq5TX0n0QTBHic4,4540
@@ -129,9 +133,9 @@ tico/quantization/wrapq/utils/metrics.py,sha256=ZnEQOd9fzDDxdXl32PFl3jMQv5ycz9nF
129
133
  tico/quantization/wrapq/utils/reduce_utils.py,sha256=3kWawLB91EcvvHlCrNqqfZF7tpgr22htBSA049mKw_4,973
130
134
  tico/quantization/wrapq/wrappers/__init__.py,sha256=IO6FP_xYbGy0dW0HL26GXD3ouxARaxCK7bz9dn4blPQ,26
131
135
  tico/quantization/wrapq/wrappers/ptq_wrapper.py,sha256=6zcVZ-vVhPCvFHQw6UlN7iizElrIHNkpAraeMaA0DDU,2388
132
- tico/quantization/wrapq/wrappers/quant_elementwise.py,sha256=trchhUknmZTcoCwVA62uzBP_mWuCjjuZjF0jb7TZpfA,3550
136
+ tico/quantization/wrapq/wrappers/quant_elementwise.py,sha256=2mpDljmROSIG3DI4TpNuy6gVEZ294aT1rKww-ZSI96o,4880
133
137
  tico/quantization/wrapq/wrappers/quant_module_base.py,sha256=SgyUlFYxDx39CAvcN2q4lsTedbEVPmetIigrllmvvD4,5915
134
- tico/quantization/wrapq/wrappers/registry.py,sha256=1rH28O7aWrp-uIFL7exa6rfdyEHeupzXuMZhNTW2i2k,5030
138
+ tico/quantization/wrapq/wrappers/registry.py,sha256=QJcOD9gEGB_DJowdTTqemcRDcYxQa4tHv2CDFgZDnA0,5168
135
139
  tico/quantization/wrapq/wrappers/fairseq/__init__.py,sha256=K4R7rbxHosx9LBLk2WKlL8gFuZTYTws41TW47AsSUPM,149
136
140
  tico/quantization/wrapq/wrappers/fairseq/decoder_export_single_step.py,sha256=d7ZieKiSbZ2ffkaLYMg2PJl1OyAxkKjB3OHKB4poxJs,9796
137
141
  tico/quantization/wrapq/wrappers/fairseq/quant_decoder.py,sha256=JTCUDNEHYU5iOcbC_2mpuhvEoZqzTNIW3gPUZE1J7FE,17810
@@ -146,7 +150,7 @@ tico/quantization/wrapq/wrappers/llama/quant_mlp.py,sha256=I0EUJPnBOvaTnjT1Jk4N2
146
150
  tico/quantization/wrapq/wrappers/nn/__init__.py,sha256=IO6FP_xYbGy0dW0HL26GXD3ouxARaxCK7bz9dn4blPQ,26
147
151
  tico/quantization/wrapq/wrappers/nn/quant_layernorm.py,sha256=UoWWQaDqBY_bAeWRRsNl19LO331KQQLpZP9ACE-HyiU,6823
148
152
  tico/quantization/wrapq/wrappers/nn/quant_linear.py,sha256=y3exJX_Og8HIi0VdpvX4M9m8Voq0e0ndiX8G6DZflT8,2165
149
- tico/quantization/wrapq/wrappers/nn/quant_silu.py,sha256=6inKWfcVTlXFsnTX_6DdIChME3x0jL_urGbONjydMqw,1810
153
+ tico/quantization/wrapq/wrappers/nn/quant_silu.py,sha256=jRbM2lCFjqAqQj3Gur4eiHs1eCoNtjejMd16VBhNZt8,1901
150
154
  tico/serialize/__init__.py,sha256=IO6FP_xYbGy0dW0HL26GXD3ouxARaxCK7bz9dn4blPQ,26
151
155
  tico/serialize/circle_graph.py,sha256=qvyul_HULoz7B_6RFKQ8s9RjEvMgPq-ynMVkZe8aqE4,12034
152
156
  tico/serialize/circle_mapping.py,sha256=c__AIHPi23lPugNJFolgMAKrw8j7gEeMaUQ1LAMSFnY,8542
@@ -263,9 +267,9 @@ tico/utils/mx/__init__.py,sha256=IO6FP_xYbGy0dW0HL26GXD3ouxARaxCK7bz9dn4blPQ,26
263
267
  tico/utils/mx/elemwise_ops.py,sha256=V6glyAHsVR1joqpsgnNytatCD_ew92xNWZ19UFDoMTA,10281
264
268
  tico/utils/mx/formats.py,sha256=uzNWyu-1onUlwQfX5cZ6fZSUfHMRqorper7_T1k3jfk,3404
265
269
  tico/utils/mx/mx_ops.py,sha256=RcfUTYVi-wilGB2sC35OeARdwDqnixv7dG5iyZ-fQT8,8555
266
- tico-0.1.0.dev251030.dist-info/LICENSE,sha256=kp4JLII7bzRhPb0CPD5XTDZMh22BQ7h3k3B7t8TiSbw,12644
267
- tico-0.1.0.dev251030.dist-info/METADATA,sha256=c4rgMWNJZ7e9ZKYhV5hfEIS2eKBjrBo7MxZpbSX5BU0,9548
268
- tico-0.1.0.dev251030.dist-info/WHEEL,sha256=G16H4A3IeoQmnOrYV4ueZGKSjhipXx8zc8nu9FGlvMA,92
269
- tico-0.1.0.dev251030.dist-info/entry_points.txt,sha256=kBKYSS_IYrSXmUYevmmepqIVPScq5vF8ulQRu3I_Zf0,59
270
- tico-0.1.0.dev251030.dist-info/top_level.txt,sha256=oqs7UPoNSKZEwqsX8B-KAWdQwfAa7i60pbxW_Jk7P3w,5
271
- tico-0.1.0.dev251030.dist-info/RECORD,,
270
+ tico-0.1.0.dev251110.dist-info/LICENSE,sha256=kp4JLII7bzRhPb0CPD5XTDZMh22BQ7h3k3B7t8TiSbw,12644
271
+ tico-0.1.0.dev251110.dist-info/METADATA,sha256=PqsFSw-looDSissCbVprESCWC_nNUiZ8F29ahdpB3PM,9730
272
+ tico-0.1.0.dev251110.dist-info/WHEEL,sha256=G16H4A3IeoQmnOrYV4ueZGKSjhipXx8zc8nu9FGlvMA,92
273
+ tico-0.1.0.dev251110.dist-info/entry_points.txt,sha256=kBKYSS_IYrSXmUYevmmepqIVPScq5vF8ulQRu3I_Zf0,59
274
+ tico-0.1.0.dev251110.dist-info/top_level.txt,sha256=oqs7UPoNSKZEwqsX8B-KAWdQwfAa7i60pbxW_Jk7P3w,5
275
+ tico-0.1.0.dev251110.dist-info/RECORD,,