tico 0.1.0.dev251020__py3-none-any.whl → 0.1.0.dev251022__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of tico might be problematic. Click here for more details.

tico/__init__.py CHANGED
@@ -29,7 +29,7 @@ __all__ = [
29
29
  ]
30
30
 
31
31
  # THIS LINE IS AUTOMATICALLY GENERATED BY setup.py
32
- __version__ = "0.1.0.dev251020"
32
+ __version__ = "0.1.0.dev251022"
33
33
 
34
34
  MINIMUM_SUPPORTED_VERSION = "2.5.0"
35
35
  SECURE_TORCH_VERSION = "2.6.0"
@@ -75,6 +75,8 @@ class PTQConfig(BaseConfig):
75
75
  default_observer: Type[ObserverBase] = MinMaxObserver
76
76
  default_qscheme: QScheme = QScheme.PER_TENSOR_ASYMM
77
77
  overrides: Mapping[str, Mapping[str, Any]] = field(default_factory=dict)
78
+ # If True, any module that cannot be wrapped will raise.
79
+ strict_wrap: bool = True
78
80
 
79
81
  @property
80
82
  def name(self) -> str:
@@ -110,7 +112,8 @@ class PTQConfig(BaseConfig):
110
112
  self.default_observer,
111
113
  default_qscheme=self.default_qscheme,
112
114
  overrides=sub_overrides,
115
+ strict_wrap=self.strict_wrap,
113
116
  )
114
117
 
115
118
  def __repr__(self):
116
- return f"PTQConfig(default_dtype={self.default_dtype}, default_observer={self.default_observer}, default_qscheme={self.default_qscheme}, overrides={dict(self.overrides)})"
119
+ return f"PTQConfig(default_dtype={self.default_dtype}, default_observer={self.default_observer}, default_qscheme={self.default_qscheme}, overrides={dict(self.overrides)}, strict_wrap={self.strict_wrap})"
@@ -22,16 +22,15 @@
22
22
 
23
23
  import argparse
24
24
  import sys
25
- from typing import Optional
26
25
 
27
26
  import torch
28
27
  import tqdm
29
28
  from datasets import load_dataset
30
29
  from transformers import AutoModelForCausalLM, AutoTokenizer
31
30
 
31
+ from tico.experimental.quantization import convert, prepare
32
32
  from tico.experimental.quantization.config.ptq import PTQConfig
33
33
  from tico.experimental.quantization.ptq.utils.metrics import perplexity
34
- from tico.experimental.quantization.ptq.wrappers.ptq_wrapper import PTQWrapper
35
34
 
36
35
  # Token-budget presets for activation calibration
37
36
  TOKENS: dict[str, int] = {
@@ -166,12 +165,7 @@ def main():
166
165
  # 2. Wrap every Transformer layer with PTQWrapper
167
166
  # ---------------------------------------------------------------------
168
167
  qcfg = PTQConfig() # all-uint8 defaults
169
-
170
- wrapped_layers = torch.nn.ModuleList()
171
- for idx, layer in enumerate(uint8_model.model.layers):
172
- layer_cfg = qcfg.child(f"layer{idx}")
173
- wrapped_layers.append(PTQWrapper(layer, qcfg=layer_cfg))
174
- uint8_model.model.layers = wrapped_layers
168
+ prepare(uint8_model, qcfg)
175
169
 
176
170
  # ---------------------------------------------------------------------
177
171
  # 3. Single-pass activation calibration
@@ -182,11 +176,7 @@ def main():
182
176
  )[:CALIB_TOKENS]
183
177
  ids = tokenizer(calib_txt, return_tensors="pt").input_ids.to(device)
184
178
 
185
- # (a) switch every QuantModuleBase to CALIB mode
186
- for l in uint8_model.model.layers:
187
- l.enable_calibration()
188
-
189
- # (b) run inference to collect ranges
179
+ # Run inference to collect ranges
190
180
  iterator = range(0, ids.size(1) - 1, args.stride)
191
181
  if not args.no_tqdm:
192
182
  iterator = tqdm.tqdm(iterator, desc="Calibration")
@@ -194,9 +184,8 @@ def main():
194
184
  for i in iterator:
195
185
  uint8_model(ids[:, i : i + args.stride])
196
186
 
197
- # (c) freeze (scale, zero-point)
198
- for l in uint8_model.model.layers:
199
- l.freeze_qparams()
187
+ # Freeze (scale, zero-point)
188
+ convert(uint8_model)
200
189
 
201
190
  # -------------------------------------------------------------------------
202
191
  # 4. Evaluate perplexity
@@ -38,6 +38,7 @@ import tqdm
38
38
  from datasets import load_dataset
39
39
  from transformers import AutoModelForCausalLM, AutoTokenizer
40
40
 
41
+ from tico.experimental.quantization import convert, prepare
41
42
  from tico.experimental.quantization.config.ptq import PTQConfig
42
43
  from tico.experimental.quantization.ptq.utils.introspection import (
43
44
  build_fqn_map,
@@ -177,18 +178,7 @@ def main():
177
178
  # -------------------------------------------------------------------------
178
179
  print("Wrapping layers with PTQWrapper …")
179
180
  qcfg = PTQConfig() # default: per-tensor UINT8
180
-
181
- new_layers = torch.nn.ModuleList()
182
- for idx, fp_layer in enumerate(model.model.layers):
183
- layer_cfg = qcfg.child(f"layer{idx}")
184
- q_layer = PTQWrapper(
185
- fp_layer,
186
- qcfg=layer_cfg,
187
- fp_name=m_to_fqn.get(fp_layer),
188
- )
189
- new_layers.append(q_layer)
190
-
191
- model.model.layers = new_layers # swap in quant wrappers
181
+ prepare(model, qcfg)
192
182
 
193
183
  # -------------------------------------------------------------------------
194
184
  # 3. Activation calibration plus FP-vs-UINT8 diffing
@@ -197,10 +187,6 @@ def main():
197
187
  calib_txt = " ".join(dataset["text"])[:CALIB_TOKENS]
198
188
  ids = tokenizer(calib_txt, return_tensors="pt").input_ids.to(device)
199
189
 
200
- # (a) Enable CALIB mode on every QuantModuleBase
201
- for l in model.model.layers:
202
- l.enable_calibration()
203
-
204
190
  # Save reference FP activations before observers clamp/quantize
205
191
  save_handles, act_cache = save_fp_outputs(model)
206
192
 
@@ -216,11 +202,10 @@ def main():
216
202
  for h in save_handles:
217
203
  h.remove()
218
204
 
219
- # (b) Freeze (scale, zero-point) after calibration
220
- for l in model.model.layers:
221
- l.freeze_qparams()
205
+ # Freeze (scale, zero-point) after calibration
206
+ convert(model)
222
207
 
223
- # (c) Register diff hooks and measure per-layer deltas
208
+ # Register diff hooks and measure per-layer deltas
224
209
  cmp_handles = compare_layer_outputs(model, act_cache, metrics=["diff", "peir"])
225
210
  # Use same inputs for comparison.
226
211
  with torch.no_grad():
@@ -29,13 +29,15 @@ import pathlib
29
29
  import torch
30
30
  import torch.nn as nn
31
31
 
32
+ from tico.experimental.quantization import convert, prepare
33
+ from tico.experimental.quantization.config.ptq import PTQConfig
32
34
  from tico.experimental.quantization.evaluation.metric import compute_peir
33
35
  from tico.experimental.quantization.evaluation.utils import plot_two_outputs
34
-
35
36
  from tico.experimental.quantization.ptq.mode import Mode
36
37
  from tico.experimental.quantization.ptq.wrappers.nn.quant_linear import QuantLinear
37
38
  from tico.utils.utils import SuppressWarning
38
39
 
40
+
39
41
  # -------------------------------------------------------------------------
40
42
  # 0. Define a toy model (1 Linear layer only)
41
43
  # -------------------------------------------------------------------------
@@ -60,20 +62,19 @@ fp32_layer = model.fc
60
62
  # -------------------------------------------------------------------------
61
63
  # 1. Replace the Linear with QuantLinear wrapper
62
64
  # -------------------------------------------------------------------------
63
- model.fc = QuantLinear(fp32_layer) # type: ignore[assignment]
64
- # model.fc = PTQWrapper(fp32_layer) (Wrapping helper class)
65
+ model.fc = prepare(fp32_layer, PTQConfig()) # type: ignore[assignment]
65
66
  qlayer = model.fc # alias for brevity
66
67
 
67
68
  # -------------------------------------------------------------------------
68
69
  # 2. Single-pass calibration (collect activation ranges)
69
70
  # -------------------------------------------------------------------------
70
- assert isinstance(qlayer, QuantLinear)
71
+ assert isinstance(qlayer.wrapped, QuantLinear)
71
72
  with torch.no_grad():
72
- qlayer.enable_calibration()
73
73
  for _ in range(16): # small toy batch
74
74
  x = torch.randn(4, 16) # (batch=4, features=16)
75
75
  _ = model(x)
76
- qlayer.freeze_qparams() # lock scales & zero-points
76
+
77
+ convert(qlayer)
77
78
 
78
79
  assert qlayer._mode is Mode.QUANT, "Quantization mode should be active now."
79
80
 
@@ -17,9 +17,10 @@ import pathlib
17
17
  import torch
18
18
  from transformers import AutoModelForCausalLM, AutoTokenizer
19
19
 
20
+ from tico.experimental.quantization import convert, prepare
21
+ from tico.experimental.quantization.config.ptq import PTQConfig
20
22
  from tico.experimental.quantization.evaluation.metric import compute_peir
21
23
  from tico.experimental.quantization.evaluation.utils import plot_two_outputs
22
-
23
24
  from tico.experimental.quantization.ptq.mode import Mode
24
25
  from tico.experimental.quantization.ptq.wrappers.llama.quant_attn import (
25
26
  QuantLlamaAttention,
@@ -34,12 +35,11 @@ tokenizer = AutoTokenizer.from_pretrained(name)
34
35
  # 1. Replace layer-0’s MLP with QuantLlamaMLP
35
36
  # -------------------------------------------------------------------------
36
37
  orig_attn = model.model.layers[0].self_attn
37
- model.model.layers[0].self_attn = QuantLlamaAttention(
38
- orig_attn
39
- ) # PTQWrapper(orig_attn) is also fine
38
+ model.model.layers[0].self_attn = prepare(orig_attn, PTQConfig())
40
39
  model.eval()
41
40
 
42
41
  attn_q = model.model.layers[0].self_attn # quant wrapper
42
+ assert isinstance(attn_q.wrapped, QuantLlamaAttention)
43
43
  rotary = model.model.rotary_emb
44
44
 
45
45
  # -------------------------------------------------------------------------
@@ -55,7 +55,6 @@ PROMPTS = [
55
55
  ]
56
56
 
57
57
  with torch.no_grad():
58
- attn_q.enable_calibration()
59
58
  for prompt in PROMPTS:
60
59
  ids = tokenizer(prompt, return_tensors="pt")
61
60
  embeds = model.model.embed_tokens(ids["input_ids"])
@@ -63,7 +62,8 @@ with torch.no_grad():
63
62
  S = cos_sin[0].shape[1]
64
63
  float_mask = torch.zeros(1, 1, S, S)
65
64
  _ = attn_q(embeds, cos_sin) # observers collect
66
- attn_q.freeze_qparams()
65
+
66
+ convert(attn_q)
67
67
 
68
68
  assert attn_q._mode is Mode.QUANT, "Quantization mode should be active now."
69
69
 
@@ -31,6 +31,8 @@ import pathlib
31
31
  import torch
32
32
  from transformers import AutoModelForCausalLM, AutoTokenizer
33
33
 
34
+ from tico.experimental.quantization import convert, prepare
35
+ from tico.experimental.quantization.config.ptq import PTQConfig
34
36
  from tico.experimental.quantization.evaluation.metric import compute_peir
35
37
  from tico.experimental.quantization.evaluation.utils import plot_two_outputs
36
38
  from tico.experimental.quantization.ptq.mode import Mode
@@ -50,12 +52,11 @@ rotary = model.model.rotary_emb # RoPE helper
50
52
  # 1. Swap in the quant wrapper
51
53
  # -------------------------------------------------------------------------
52
54
  fp32_layer = model.model.layers[0] # keep a reference for diff check
53
- model.model.layers[0] = QuantLlamaDecoderLayer(
54
- fp32_layer
55
- ) # PTQWrapper(fp32_layer) is also fine
55
+ model.model.layers[0] = prepare(fp32_layer, PTQConfig())
56
56
  model.eval()
57
57
 
58
58
  qlayer = model.model.layers[0] # alias for brevity
59
+ assert isinstance(qlayer.wrapped, QuantLlamaDecoderLayer)
59
60
 
60
61
  # -------------------------------------------------------------------------
61
62
  # 2. Single-pass calibration (gather activation ranges)
@@ -70,7 +71,6 @@ PROMPTS = [
70
71
  ]
71
72
 
72
73
  with torch.no_grad():
73
- qlayer.enable_calibration()
74
74
  for prompt in PROMPTS:
75
75
  ids = tokenizer(prompt, return_tensors="pt")
76
76
  hidden = model.model.embed_tokens(ids["input_ids"])
@@ -78,7 +78,8 @@ with torch.no_grad():
78
78
  S = pos[0].shape[1]
79
79
  attn_mask = torch.zeros(1, 1, S, S) # causal-mask placeholder
80
80
  _ = qlayer(hidden, attention_mask=attn_mask, position_embeddings=pos)
81
- qlayer.freeze_qparams()
81
+
82
+ convert(qlayer)
82
83
 
83
84
  assert qlayer._mode is Mode.QUANT, "Quantization mode should be active now."
84
85
 
@@ -18,6 +18,7 @@ import torch
18
18
  from transformers import AutoModelForCausalLM, AutoTokenizer
19
19
 
20
20
  import tico
21
+ from tico.experimental.quantization import convert, prepare
21
22
  from tico.experimental.quantization.config.ptq import PTQConfig
22
23
  from tico.experimental.quantization.evaluation.metric import compute_peir
23
24
  from tico.experimental.quantization.evaluation.utils import plot_two_outputs
@@ -36,13 +37,13 @@ model.eval()
36
37
  # 1. Replace layer-0’s MLP with QuantLlamaMLP
37
38
  # -------------------------------------------------------------------------
38
39
  fp32_mlp = model.model.layers[0].mlp
39
- model.model.layers[0].mlp = QuantLlamaMLP(
40
- fp32_mlp,
41
- qcfg=PTQConfig(default_dtype=INT16, default_qscheme=QScheme.PER_TENSOR_SYMM),
42
- ) # PTQWrapper(fp32_mlp) is also fine
40
+ model.model.layers[0].mlp = prepare(
41
+ fp32_mlp, PTQConfig(default_dtype=INT16, default_qscheme=QScheme.PER_TENSOR_SYMM)
42
+ )
43
43
  model.eval()
44
44
 
45
45
  mlp_q = model.model.layers[0].mlp
46
+ assert isinstance(mlp_q.wrapped, QuantLlamaMLP)
46
47
 
47
48
  # -------------------------------------------------------------------------
48
49
  # 2. Single-pass calibration
@@ -57,13 +58,12 @@ PROMPTS = [
57
58
  ]
58
59
 
59
60
  with torch.no_grad():
60
- mlp_q.enable_calibration()
61
61
  for prompt in PROMPTS:
62
62
  enc = tokenizer(prompt, return_tensors="pt")
63
63
  emb = model.model.embed_tokens(enc["input_ids"])
64
64
  _ = mlp_q(emb)
65
65
 
66
- mlp_q.freeze_qparams()
66
+ convert(mlp_q)
67
67
 
68
68
  assert mlp_q._mode is Mode.QUANT, "Quantization mode should be active now."
69
69
 
@@ -215,22 +215,8 @@ def main():
215
215
  # 4. Wrap every layer with PTQWrapper (activation UINT-8)
216
216
  # -------------------------------------------------------------------------
217
217
  print("Wrapping layers with PTQWrapper …")
218
- layers = q_m.model.layers
219
- if not isinstance(layers, (list, torch.nn.ModuleList)):
220
- raise TypeError(f"'model.layers' must be list/ModuleList, got {type(layers)}")
221
-
222
218
  qcfg = PTQConfig() # default: per-tensor UINT8
223
- wrapped = torch.nn.ModuleList()
224
- for idx, fp_layer in enumerate(layers):
225
- layer_cfg = qcfg.child(f"layer{idx}")
226
- wrapped.append(
227
- PTQWrapper(
228
- fp_layer,
229
- qcfg=layer_cfg,
230
- fp_name=m_to_fqn.get(fp_layer),
231
- )
232
- )
233
- q_m.model.layers = wrapped
219
+ prepare(q_m, qcfg)
234
220
 
235
221
  # -------------------------------------------------------------------------
236
222
  # 5. Single-pass activation calibration
@@ -242,11 +228,7 @@ def main():
242
228
  calib_txt = " ".join(dataset_train["text"])[:CALIB_TOKENS]
243
229
  train_ids = tokenizer(calib_txt, return_tensors="pt").input_ids.to(device)
244
230
 
245
- # (a) Enable CALIB mode on every QuantModuleBase
246
- for l in q_m.model.layers:
247
- l.enable_calibration()
248
-
249
- # (b) Overwrite weight observers with GPTQ statistics
231
+ # Overwrite weight observers with GPTQ statistics
250
232
  if hasattr(q_m, "quantizers") and isinstance(q_m.quantizers, dict):
251
233
  inject_gptq_qparams(q_m, q_m.quantizers)
252
234
  else:
@@ -254,7 +236,7 @@ def main():
254
236
  "[Warn] q_m.quantizers not found or not a dict; skipping GPTQ qparam injection."
255
237
  )
256
238
 
257
- # (c) Forward passes to collect activation ranges
239
+ # Forward passes to collect activation ranges
258
240
  iterator = range(0, train_ids.size(1) - 1, args.stride)
259
241
  if not args.no_tqdm:
260
242
  iterator = tqdm.tqdm(iterator, desc="Act-calibration")
@@ -262,9 +244,8 @@ def main():
262
244
  for i in iterator:
263
245
  q_m(train_ids[:, i : i + args.stride])
264
246
 
265
- # (d) Freeze all Q-params (scale, zero-point)
266
- for l in q_m.model.layers:
267
- l.freeze_qparams()
247
+ # Freeze all Q-params (scale, zero-point)
248
+ convert(q_m)
268
249
 
269
250
  # -------------------------------------------------------------------------
270
251
  # 6. Evaluate perplexity on Wikitext-2
@@ -0,0 +1,181 @@
1
+ # Copyright (c) 2025 Samsung Electronics Co., Ltd. All Rights Reserved
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ from typing import Any, Dict, Optional
16
+
17
+ import torch
18
+ import torch.nn as nn
19
+
20
+ from tico.experimental.quantization.config.ptq import PTQConfig
21
+
22
+ from tico.experimental.quantization.ptq.wrappers.ptq_wrapper import PTQWrapper
23
+ from tico.experimental.quantization.ptq.wrappers.quant_module_base import (
24
+ QuantModuleBase,
25
+ )
26
+ from tico.experimental.quantization.quantizer import BaseQuantizer
27
+ from tico.experimental.quantization.quantizer_registry import register_quantizer
28
+
29
+
30
+ @register_quantizer(PTQConfig)
31
+ class PTQQuantizer(BaseQuantizer):
32
+ """
33
+ Post-Training Quantization (PTQ) quantizer integrated with the public interface.
34
+
35
+ Features
36
+ --------
37
+ • Automatically wraps quantizable modules using PTQWrapper.
38
+ • Supports leaf-level (single-module) quantization (e.g., prepare(model.fc, PTQConfig())).
39
+ • Enforces strict wrapping if `strict_wrap=True`: raises NotImplementedError if
40
+ no quantizable module was found at any boundary.
41
+ • If `strict_wrap=False`, unquantizable modules are silently skipped.
42
+ """
43
+
44
+ def __init__(self, config: PTQConfig):
45
+ super().__init__(config)
46
+ self.qcfg: PTQConfig = config
47
+ self.strict_wrap: bool = bool(getattr(config, "strict_wrap", True))
48
+
49
+ @torch.no_grad()
50
+ def prepare(
51
+ self,
52
+ model: torch.nn.Module,
53
+ args: Optional[Any] = None,
54
+ kwargs: Optional[Dict[str, Any]] = None,
55
+ ):
56
+ # Wrap the tree (or single module) according to strictness policy
57
+ model = self._wrap_supported(model, self.qcfg)
58
+
59
+ # Switch all quant modules into calibration mode
60
+ if isinstance(model, QuantModuleBase):
61
+ model.enable_calibration()
62
+ for m in model.modules():
63
+ if isinstance(m, QuantModuleBase):
64
+ m.enable_calibration()
65
+ return model
66
+
67
+ @torch.no_grad()
68
+ def convert(self, model):
69
+ # Freeze qparams across the tree (QUANT mode)
70
+ if isinstance(model, QuantModuleBase):
71
+ model.freeze_qparams()
72
+ for m in model.modules():
73
+ if isinstance(m, QuantModuleBase):
74
+ m.freeze_qparams()
75
+ return model
76
+
77
+ def _wrap_supported(
78
+ self,
79
+ root: nn.Module,
80
+ qcfg: PTQConfig,
81
+ ) -> nn.Module:
82
+ """
83
+ Recursively attempt to wrap boundaries. Strictness is applied at every boundary.
84
+ """
85
+ assert not isinstance(root, QuantModuleBase), "The module is already wrapped."
86
+
87
+ # Case A: HuggingFace-style transformers: model.model.layers
88
+ lm = getattr(root, "model", None)
89
+ layers = getattr(lm, "layers", None) if isinstance(lm, nn.Module) else None
90
+ if isinstance(layers, nn.ModuleList):
91
+ new_list = nn.ModuleList()
92
+ for idx, layer in enumerate(layers):
93
+ child_scope = f"layer{idx}"
94
+ child_cfg = qcfg.child(child_scope)
95
+
96
+ # Enforce strictness at the child boundary
97
+ wrapped = self._try_wrap(
98
+ layer,
99
+ child_cfg,
100
+ fp_name=child_scope,
101
+ raise_on_fail=self.strict_wrap,
102
+ )
103
+ new_list.append(wrapped)
104
+ lm.layers = new_list # type: ignore[union-attr]
105
+ return root
106
+
107
+ # Case B: Containers
108
+ if isinstance(root, (nn.Sequential, nn.ModuleList)):
109
+ for i, child in enumerate(list(root)):
110
+ name = str(i)
111
+ child_cfg = qcfg.child(name)
112
+
113
+ wrapped = self._try_wrap(
114
+ child, child_cfg, fp_name=name, raise_on_fail=self.strict_wrap
115
+ )
116
+ if wrapped is child:
117
+ assert not self.strict_wrap
118
+ wrapped = self._wrap_supported(wrapped, child_cfg)
119
+ root[i] = wrapped # type: ignore[index]
120
+
121
+ if isinstance(root, nn.ModuleDict):
122
+ for k, child in list(root.items()):
123
+ name = k
124
+ child_cfg = qcfg.child(name)
125
+
126
+ wrapped = self._try_wrap(
127
+ child, child_cfg, fp_name=name, raise_on_fail=self.strict_wrap
128
+ )
129
+ if wrapped is child:
130
+ assert not self.strict_wrap
131
+ wrapped = self._wrap_supported(wrapped, child_cfg)
132
+ root[k] = wrapped # type: ignore[index]
133
+
134
+ # Case C: Leaf node
135
+ root_name = getattr(root, "_get_name", lambda: None)()
136
+ wrapped = self._try_wrap(
137
+ root, qcfg, fp_name=root_name, raise_on_fail=self.strict_wrap
138
+ )
139
+ if wrapped is not root:
140
+ return wrapped
141
+
142
+ assert not self.strict_wrap
143
+ # Case D: Named children
144
+ for name, child in list(root.named_children()):
145
+ child_cfg = qcfg.child(name)
146
+
147
+ wrapped = self._try_wrap(
148
+ child, child_cfg, fp_name=name, raise_on_fail=self.strict_wrap
149
+ )
150
+ if wrapped is child:
151
+ assert not self.strict_wrap
152
+ wrapped = self._wrap_supported(wrapped, child_cfg)
153
+ setattr(root, name, wrapped)
154
+
155
+ return root
156
+
157
+ def _try_wrap(
158
+ self,
159
+ module: nn.Module,
160
+ qcfg_for_child: PTQConfig,
161
+ *,
162
+ fp_name: Optional[str],
163
+ raise_on_fail: bool,
164
+ ) -> nn.Module:
165
+ """
166
+ Attempt to wrap a boundary with PTQWrapper.
167
+
168
+ Behavior:
169
+ • If PTQWrapper succeeds: return wrapped module.
170
+ • If PTQWrapper raises NotImplementedError:
171
+ - raise_on_fail=True -> re-raise (strict)
172
+ - raise_on_fail=False -> return original module (permissive)
173
+ """
174
+ try:
175
+ return PTQWrapper(module, qcfg=qcfg_for_child, fp_name=fp_name)
176
+ except NotImplementedError as e:
177
+ if raise_on_fail:
178
+ raise NotImplementedError(
179
+ f"PTQQuantizer: no quantization wrapper for {type(module).__name__}"
180
+ ) from e
181
+ return module
@@ -32,7 +32,7 @@ def prepare(
32
32
  quant_config: BaseConfig,
33
33
  args: Optional[Any] = None,
34
34
  kwargs: Optional[Dict[str, Any]] = None,
35
- inplace: Optional[bool] = False,
35
+ inplace: Optional[bool] = True,
36
36
  ):
37
37
  """
38
38
  Prepare the model for quantization using the provided configuration.
@@ -68,7 +68,7 @@ def prepare(
68
68
  return model
69
69
 
70
70
 
71
- def convert(model, inplace: Optional[bool] = False):
71
+ def convert(model, inplace: Optional[bool] = True):
72
72
  """
73
73
  Convert the prepared model to a quantized model using the provided configuration.
74
74
 
@@ -53,14 +53,17 @@ def get_quantizer(cfg: BaseConfig) -> BaseQuantizer:
53
53
  # Lazy import by naming convention
54
54
  name = getattr(cfg, "name", None)
55
55
  if name:
56
- try:
57
- importlib.import_module(
58
- f"tico.experimental.quantization.algorithm.{name}.quantizer"
59
- )
60
- except Exception as e:
61
- raise RuntimeError(
62
- f"Failed to import quantizer module for config name='{name}': {e}"
63
- )
56
+ if name == "ptq":
57
+ importlib.import_module(f"tico.experimental.quantization.ptq.quantizer")
58
+ else:
59
+ try:
60
+ importlib.import_module(
61
+ f"tico.experimental.quantization.algorithm.{name}.quantizer"
62
+ )
63
+ except Exception as e:
64
+ raise RuntimeError(
65
+ f"Failed to import quantizer module for config name='{name}': {e}"
66
+ )
64
67
 
65
68
  qcls = _lookup(cfg)
66
69
  if qcls is not None:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: tico
3
- Version: 0.1.0.dev251020
3
+ Version: 0.1.0.dev251022
4
4
  Summary: Convert exported Torch module to circle
5
5
  Home-page: UNKNOWN
6
6
  License: UNKNOWN
@@ -1,4 +1,4 @@
1
- tico/__init__.py,sha256=aWoO8kl6EuxO5bIEEjtyUpu84ZfkIBOcDXEjg7bGFcw,1883
1
+ tico/__init__.py,sha256=EsX-4xNY4R5ooL4KCszR-_fjyRlWHKMUQ3Bk8MvMf_Q,1883
2
2
  tico/pt2_to_circle.py,sha256=gu3MD4Iqc0zMZcCZ2IT8oGbyj21CTSbT3Rgd9s2B_9A,2767
3
3
  tico/config/__init__.py,sha256=xZzCXjZ84qE-CsBi-dfaL05bqpQ3stKKfTXhnrJRyVs,142
4
4
  tico/config/base.py,sha256=q5xMqGxTUZs4mFqt5c7i_y9U00fYgdMGl9nUqIVMlCo,1248
@@ -6,9 +6,9 @@ tico/config/factory.py,sha256=il0zqB6Lm5NX2LnG-TUhmiP9vVeZ_3TucJMorVZIodY,1324
6
6
  tico/config/v1.py,sha256=uB5d39fkmuBACwjBVGtdWb_HGXfXsvmw6nw64xZcC-8,1342
7
7
  tico/experimental/__init__.py,sha256=IO6FP_xYbGy0dW0HL26GXD3ouxARaxCK7bz9dn4blPQ,26
8
8
  tico/experimental/quantization/__init__.py,sha256=IaJPZegVJp0P3luutBo907Kp5sOJensE1Mm-XBG_jBs,122
9
- tico/experimental/quantization/public_interface.py,sha256=TGo3bTapwLA8KpsoEwBhuzI0LQUO6y3-sUM1VZvkLo8,4220
9
+ tico/experimental/quantization/public_interface.py,sha256=56lfDZIIC8pICyR0qAM1qGx6eAcJMbE-GARW8Bxkls0,4218
10
10
  tico/experimental/quantization/quantizer.py,sha256=pDTQGzR-BcQJeGZ7O4cXRQdCme4q_POpxHetwnv0bYg,2370
11
- tico/experimental/quantization/quantizer_registry.py,sha256=7wm2JcuPRribu7c8dCSZeYVcVqWQO1S-tHoinDDt11s,2345
11
+ tico/experimental/quantization/quantizer_registry.py,sha256=Jhiw2XMlLTn51hHim1okx3ozr-mjulR-SHiQRjYvfXc,2502
12
12
  tico/experimental/quantization/algorithm/__init__.py,sha256=IO6FP_xYbGy0dW0HL26GXD3ouxARaxCK7bz9dn4blPQ,26
13
13
  tico/experimental/quantization/algorithm/gptq/__init__.py,sha256=IO6FP_xYbGy0dW0HL26GXD3ouxARaxCK7bz9dn4blPQ,26
14
14
  tico/experimental/quantization/algorithm/gptq/gptq.py,sha256=Qn9b_2ki7B64DcVEY25NMkww3PdZ5EqYQQXfYhNDQ6I,5555
@@ -44,7 +44,7 @@ tico/experimental/quantization/config/__init__.py,sha256=IO6FP_xYbGy0dW0HL26GXD3
44
44
  tico/experimental/quantization/config/base.py,sha256=xg_HCDSuMgYvMd6ENZe4Sm2SYJgMaCBj4cmqaz_lhAs,816
45
45
  tico/experimental/quantization/config/gptq.py,sha256=IUIEz5bLhsTXqoBCE1rfPec99zsRjwgpDbPW5YJqOPg,973
46
46
  tico/experimental/quantization/config/pt2e.py,sha256=9HCrraTGGZeKEN9puKV-ODi7ncV2Wjc3oe_JCO1D_Rs,850
47
- tico/experimental/quantization/config/ptq.py,sha256=uloDu-BKLJ9RussCmoLsw0Wq41zdk_iKsjdi_xqOn30,4431
47
+ tico/experimental/quantization/config/ptq.py,sha256=4QhoJ6hTJOb1MH88sa0vxKS6GdPdFIy1rFjEXsnarrk,4595
48
48
  tico/experimental/quantization/config/smoothquant.py,sha256=b92dz4-MiBbkaLzXb47bVoO29d2P416woFQUZ1wpO_s,1414
49
49
  tico/experimental/quantization/evaluation/__init__.py,sha256=IO6FP_xYbGy0dW0HL26GXD3ouxARaxCK7bz9dn4blPQ,26
50
50
  tico/experimental/quantization/evaluation/backend.py,sha256=CZL9rZOA0t8cH7PHp6u9l7dGqWNvTj9bKOvwo0PVul0,692
@@ -66,14 +66,15 @@ tico/experimental/quantization/ptq/__init__.py,sha256=IO6FP_xYbGy0dW0HL26GXD3oux
66
66
  tico/experimental/quantization/ptq/dtypes.py,sha256=xfCBtq6mQmUYRwsoFgII6gvRl1raQi0Inj9pznDuKwQ,2236
67
67
  tico/experimental/quantization/ptq/mode.py,sha256=lT-T8vIv8YWcwrjT7xXVhOw1g7aoAdh_3PWB-ptPKaI,1052
68
68
  tico/experimental/quantization/ptq/qscheme.py,sha256=uwhv7bCxOOXB3I-IKlRyr_u4eXOq48uIqGy4TLDqGxY,1301
69
+ tico/experimental/quantization/ptq/quantizer.py,sha256=4hS6S4O8ytcUKoZdjTSmbUrCzWlCELsb_uz5ARnQucI,6647
69
70
  tico/experimental/quantization/ptq/examples/__init__.py,sha256=IO6FP_xYbGy0dW0HL26GXD3ouxARaxCK7bz9dn4blPQ,26
70
- tico/experimental/quantization/ptq/examples/compare_ppl.py,sha256=2we7u4WgeK1NbYa31T2irUWE-RbTPUxRBdFtc9mPccY,8255
71
- tico/experimental/quantization/ptq/examples/debug_quant_outputs.py,sha256=GviYQa3MZ0-nKTKRaRsPXRI24VtWvDL3uOhOqsqxniY,8169
72
- tico/experimental/quantization/ptq/examples/quantize_linear.py,sha256=8zq-ZJDYgam0xQ-PbC6Xb1I7W1mv0Wi-b--IP2wwXtw,4539
73
- tico/experimental/quantization/ptq/examples/quantize_llama_attn.py,sha256=cVWUSSzaZWFp5QZkNkrlpHU3kXyP84QtnZbahVml_yQ,4329
74
- tico/experimental/quantization/ptq/examples/quantize_llama_decoder_layer.py,sha256=mBWrjkyEovYQsPC4Rrsri6Pm1rlFmDb3NiP0DQQhFyM,5751
75
- tico/experimental/quantization/ptq/examples/quantize_llama_mlp.py,sha256=poP-TFmsP_Iy3K6NEu6f8UmHInaCX3wUSFZWhhqoUCQ,4137
76
- tico/experimental/quantization/ptq/examples/quantize_with_gptq.py,sha256=OqlaegX7ySR2PN6mSOZjcfKdxzrDe3gT_jBJg9HMzvM,10428
71
+ tico/experimental/quantization/ptq/examples/compare_ppl.py,sha256=Ap4s62eCMYkVwxdlysVRxdWg6AhLRv7Ib0wqytHqEmY,7774
72
+ tico/experimental/quantization/ptq/examples/debug_quant_outputs.py,sha256=2e1zPFsB0XNTlgjiFrSgR-ezULCXCXHg-5fuSyEgPfI,7713
73
+ tico/experimental/quantization/ptq/examples/quantize_linear.py,sha256=Dox0oxj6HpTwrCQMEltch-7V4ZaKd72IfREizzwp-Ms,4548
74
+ tico/experimental/quantization/ptq/examples/quantize_llama_attn.py,sha256=IsQ4hO_uTcjvVqYqmghzQj5FC98ZlYUpY3mOR3Y3H-A,4421
75
+ tico/experimental/quantization/ptq/examples/quantize_llama_decoder_layer.py,sha256=DNkIoInx9tM1kHEQV-gKPiqRPAOZ3qUNroVq32D_5So,5843
76
+ tico/experimental/quantization/ptq/examples/quantize_llama_mlp.py,sha256=XfYv68anCV5NPeByo4tEyQYjVmC4KArG15sVyjThuTc,4149
77
+ tico/experimental/quantization/ptq/examples/quantize_with_gptq.py,sha256=39c7T-JvUnCAeVa8e1A3FKZWBF32Mnvkv9Dms9LLvdU,9769
77
78
  tico/experimental/quantization/ptq/observers/__init__.py,sha256=IO6FP_xYbGy0dW0HL26GXD3ouxARaxCK7bz9dn4blPQ,26
78
79
  tico/experimental/quantization/ptq/observers/affine_base.py,sha256=e2Eba64nrxKQyE4F_WJ7WTSsk3xe6bkdGUKaoLFWGFw,4638
79
80
  tico/experimental/quantization/ptq/observers/base.py,sha256=Wons1MzpqK1mfcy-ppl-B2Dum0edXg2dWW2Lw3V18tw,3280
@@ -262,9 +263,9 @@ tico/utils/mx/__init__.py,sha256=IO6FP_xYbGy0dW0HL26GXD3ouxARaxCK7bz9dn4blPQ,26
262
263
  tico/utils/mx/elemwise_ops.py,sha256=V6glyAHsVR1joqpsgnNytatCD_ew92xNWZ19UFDoMTA,10281
263
264
  tico/utils/mx/formats.py,sha256=uzNWyu-1onUlwQfX5cZ6fZSUfHMRqorper7_T1k3jfk,3404
264
265
  tico/utils/mx/mx_ops.py,sha256=RcfUTYVi-wilGB2sC35OeARdwDqnixv7dG5iyZ-fQT8,8555
265
- tico-0.1.0.dev251020.dist-info/LICENSE,sha256=kp4JLII7bzRhPb0CPD5XTDZMh22BQ7h3k3B7t8TiSbw,12644
266
- tico-0.1.0.dev251020.dist-info/METADATA,sha256=wmUSIhW4DeNJR4XxRrNjjBelMOrCE_8o9LNErNo53Is,8455
267
- tico-0.1.0.dev251020.dist-info/WHEEL,sha256=G16H4A3IeoQmnOrYV4ueZGKSjhipXx8zc8nu9FGlvMA,92
268
- tico-0.1.0.dev251020.dist-info/entry_points.txt,sha256=kBKYSS_IYrSXmUYevmmepqIVPScq5vF8ulQRu3I_Zf0,59
269
- tico-0.1.0.dev251020.dist-info/top_level.txt,sha256=oqs7UPoNSKZEwqsX8B-KAWdQwfAa7i60pbxW_Jk7P3w,5
270
- tico-0.1.0.dev251020.dist-info/RECORD,,
266
+ tico-0.1.0.dev251022.dist-info/LICENSE,sha256=kp4JLII7bzRhPb0CPD5XTDZMh22BQ7h3k3B7t8TiSbw,12644
267
+ tico-0.1.0.dev251022.dist-info/METADATA,sha256=3K8YDuxSO0M8dPuuo7-Fux6HX68W8DJ-Cz5v9R7sz1M,8455
268
+ tico-0.1.0.dev251022.dist-info/WHEEL,sha256=G16H4A3IeoQmnOrYV4ueZGKSjhipXx8zc8nu9FGlvMA,92
269
+ tico-0.1.0.dev251022.dist-info/entry_points.txt,sha256=kBKYSS_IYrSXmUYevmmepqIVPScq5vF8ulQRu3I_Zf0,59
270
+ tico-0.1.0.dev251022.dist-info/top_level.txt,sha256=oqs7UPoNSKZEwqsX8B-KAWdQwfAa7i60pbxW_Jk7P3w,5
271
+ tico-0.1.0.dev251022.dist-info/RECORD,,