tico 0.1.0.dev250826__py3-none-any.whl → 0.1.0.dev250827__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
tico/__init__.py CHANGED
@@ -29,7 +29,7 @@ __all__ = [
29
29
  ]
30
30
 
31
31
  # THIS LINE IS AUTOMATICALLY GENERATED BY setup.py
32
- __version__ = "0.1.0.dev250826"
32
+ __version__ = "0.1.0.dev250827"
33
33
 
34
34
  MINIMUM_SUPPORTED_VERSION = "2.5.0"
35
35
  SECURE_TORCH_VERSION = "2.6.0"
@@ -0,0 +1,87 @@
1
+ # Copyright (c) 2025 Samsung Electronics Co., Ltd. All Rights Reserved
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ import pathlib
16
+
17
+ import torch
18
+ from transformers import AutoModelForCausalLM, AutoTokenizer
19
+
20
+ import tico
21
+ from tico.experimental.quantization.evaluation.metric import compute_peir
22
+ from tico.experimental.quantization.evaluation.utils import plot_two_outputs
23
+ from tico.experimental.quantization.ptq.dtypes import INT16
24
+ from tico.experimental.quantization.ptq.mode import Mode
25
+ from tico.experimental.quantization.ptq.qscheme import QScheme
26
+ from tico.experimental.quantization.ptq.quant_config import QuantConfig
27
+ from tico.experimental.quantization.ptq.wrappers.llama.quant_mlp import QuantLlamaMLP
28
+ from tico.utils.utils import SuppressWarning
29
+
30
+ name = "Maykeye/TinyLLama-v0"
31
+ model = AutoModelForCausalLM.from_pretrained(name)
32
+ tokenizer = AutoTokenizer.from_pretrained(name)
33
+ model.eval()
34
+
35
+ # -------------------------------------------------------------------------
36
+ # 1. Replace layer-0’s MLP with QuantLlamaMLP
37
+ # -------------------------------------------------------------------------
38
+ fp32_mlp = model.model.layers[0].mlp
39
+ model.model.layers[0].mlp = QuantLlamaMLP(
40
+ fp32_mlp,
41
+ qcfg=QuantConfig(default_dtype=INT16, default_qscheme=QScheme.PER_TENSOR_SYMM),
42
+ ) # PTQWrapper(fp32_mlp) is also fine
43
+ model.eval()
44
+
45
+ mlp_q = model.model.layers[0].mlp
46
+
47
+ # -------------------------------------------------------------------------
48
+ # 2. Single-pass calibration
49
+ # -------------------------------------------------------------------------
50
+ with torch.no_grad():
51
+ mlp_q.enable_calibration()
52
+ for _ in range(16):
53
+ prompts = ["hello tinyllama "] * 8
54
+ enc = tokenizer(prompts, return_tensors="pt")
55
+ emb = model.model.embed_tokens(enc["input_ids"])
56
+ _ = mlp_q(emb)
57
+
58
+ mlp_q.freeze_qparams()
59
+
60
+ assert mlp_q._mode is Mode.QUANT, "Quantization mode should be active now."
61
+
62
+ # -------------------------------------------------------------------------
63
+ # 3. Quick diff check (INT-sim vs FP32)
64
+ # -------------------------------------------------------------------------
65
+ with torch.no_grad():
66
+ ids = tokenizer("quant all tensors!", return_tensors="pt")
67
+ emb = model.model.embed_tokens(ids["input_ids"])
68
+ int16 = mlp_q(emb) # INT-sim
69
+ fp32 = fp32_mlp(emb) # baseline reference
70
+
71
+ print("┌───────────── Quantization Error Summary ─────────────")
72
+ print(f"│ Mean |diff|: {(int16 - fp32).abs().mean().item():.6f}")
73
+ print(f"│ PEIR : {compute_peir(fp32, int16) * 100:.6f} %")
74
+ print("└──────────────────────────────────────────────────────")
75
+ print(plot_two_outputs(fp32, int16))
76
+
77
+ # -------------------------------------------------------------------------
78
+ # 4. Export the quantized block
79
+ # -------------------------------------------------------------------------
80
+ save_path = pathlib.Path("mlp.q.circle")
81
+ example_in = (torch.randn(1, 1, model.config.hidden_size),)
82
+
83
+ with SuppressWarning(UserWarning, ".*"):
84
+ cm = tico.convert(mlp_q, example_in)
85
+ cm.save(save_path)
86
+
87
+ print(f"Quantized Circle model saved to {save_path.resolve()}")
@@ -0,0 +1,3 @@
1
+ from tico.experimental.quantization.ptq.wrappers.llama.quant_mlp import QuantLlamaMLP
2
+
3
+ __all__ = ["QuantLlamaMLP"]
@@ -0,0 +1,98 @@
1
+ # Copyright (c) 2025 Samsung Electronics Co., Ltd. All Rights Reserved
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ from typing import Optional
16
+
17
+ import torch
18
+ import torch.nn as nn
19
+
20
+ from tico.experimental.quantization.ptq.quant_config import QuantConfig
21
+ from tico.experimental.quantization.ptq.wrappers.ptq_wrapper import PTQWrapper
22
+ from tico.experimental.quantization.ptq.wrappers.quant_module_base import (
23
+ QuantModuleBase,
24
+ )
25
+ from tico.experimental.quantization.ptq.wrappers.registry import try_register
26
+
27
+
28
+ @try_register("transformers.models.llama.modeling_llama.LlamaMLP")
29
+ class QuantLlamaMLP(QuantModuleBase):
30
+ def __init__(
31
+ self,
32
+ mlp_fp: nn.Module,
33
+ *,
34
+ qcfg: Optional[QuantConfig] = None,
35
+ fp_name: Optional[str] = None,
36
+ ):
37
+ super().__init__(qcfg, fp_name=fp_name)
38
+
39
+ # ----- child configs (hierarchical override) -------------------
40
+ gate_cfg = qcfg.child("gate_proj") if qcfg else None
41
+ up_cfg = qcfg.child("up_proj") if qcfg else None
42
+ down_cfg = qcfg.child("down_proj") if qcfg else None
43
+ act_cfg = qcfg.child("act_fn") if qcfg else None
44
+
45
+ # ----- wrap three Linear layers -------------------------------
46
+ assert hasattr(mlp_fp, "gate_proj") and isinstance(
47
+ mlp_fp.gate_proj, torch.nn.Module
48
+ )
49
+ assert hasattr(mlp_fp, "up_proj") and isinstance(
50
+ mlp_fp.up_proj, torch.nn.Module
51
+ )
52
+ assert hasattr(mlp_fp, "down_proj") and isinstance(
53
+ mlp_fp.down_proj, torch.nn.Module
54
+ )
55
+ self.gate_proj = PTQWrapper(
56
+ mlp_fp.gate_proj, qcfg=gate_cfg, fp_name=f"{fp_name}.gate_proj"
57
+ )
58
+ self.up_proj = PTQWrapper(
59
+ mlp_fp.up_proj, qcfg=up_cfg, fp_name=f"{fp_name}.up_proj"
60
+ )
61
+ self.down_proj = PTQWrapper(
62
+ mlp_fp.down_proj, qcfg=down_cfg, fp_name=f"{fp_name}.down_proj"
63
+ )
64
+
65
+ # ----- activation ---------------------------------------------
66
+ assert hasattr(mlp_fp, "act_fn") and isinstance(mlp_fp.act_fn, torch.nn.Module)
67
+ self.act_fn = PTQWrapper(
68
+ mlp_fp.act_fn, qcfg=act_cfg, fp_name=f"{fp_name}.act_fn"
69
+ )
70
+
71
+ # ----- local observers ----------------------------------------
72
+ self.act_in_obs = self._make_obs("act_in")
73
+ self.mul_obs = self._make_obs("mul")
74
+
75
+ def forward(self, x: torch.Tensor):
76
+ # 1) quantize input once
77
+ x_q = self._fq(x, self.act_in_obs)
78
+
79
+ # 2) parallel projections
80
+ g = self.gate_proj(x_q)
81
+ u = self.up_proj(x_q)
82
+
83
+ # 3) activation on gate
84
+ a = self.act_fn(g)
85
+
86
+ # 4) element-wise product
87
+ h = self._fq(a * u, self.mul_obs)
88
+
89
+ # 5) final projection
90
+ return self.down_proj(h)
91
+
92
+ def _all_observers(self):
93
+ # local first
94
+ yield self.act_in_obs
95
+ yield self.mul_obs
96
+ # recurse into children that are QuantModuleBase
97
+ for m in (self.gate_proj, self.up_proj, self.down_proj, self.act_fn):
98
+ yield from m._all_observers()
@@ -28,6 +28,7 @@ _CORE_MODULES = (
28
28
  "tico.experimental.quantization.ptq.wrappers.nn.quant_layernorm",
29
29
  "tico.experimental.quantization.ptq.wrappers.nn.quant_linear",
30
30
  "tico.experimental.quantization.ptq.wrappers.nn.quant_silu",
31
+ "tico.experimental.quantization.ptq.wrappers.llama.quant_mlp",
31
32
  # add future core wrappers here
32
33
  )
33
34
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: tico
3
- Version: 0.1.0.dev250826
3
+ Version: 0.1.0.dev250827
4
4
  Summary: Convert exported Torch module to circle
5
5
  Home-page: UNKNOWN
6
6
  License: UNKNOWN
@@ -1,4 +1,4 @@
1
- tico/__init__.py,sha256=M4dQ4CTD_7xsO5DjUx76t4A5o1Q_2NqGlMe0fjkGDxQ,1883
1
+ tico/__init__.py,sha256=zW-Qvhbt2u7nt1XfXlgpi6kJrCNuyIJ3PddICgrWpZI,1883
2
2
  tico/pt2_to_circle.py,sha256=gu3MD4Iqc0zMZcCZ2IT8oGbyj21CTSbT3Rgd9s2B_9A,2767
3
3
  tico/config/__init__.py,sha256=xZzCXjZ84qE-CsBi-dfaL05bqpQ3stKKfTXhnrJRyVs,142
4
4
  tico/config/base.py,sha256=q5xMqGxTUZs4mFqt5c7i_y9U00fYgdMGl9nUqIVMlCo,1248
@@ -63,6 +63,7 @@ tico/experimental/quantization/ptq/qscheme.py,sha256=uwhv7bCxOOXB3I-IKlRyr_u4eXO
63
63
  tico/experimental/quantization/ptq/quant_config.py,sha256=nm7570Y1X2mOT_8s27ilWid04otor6cVTi9GwgAEaKc,4300
64
64
  tico/experimental/quantization/ptq/examples/__init__.py,sha256=IO6FP_xYbGy0dW0HL26GXD3ouxARaxCK7bz9dn4blPQ,26
65
65
  tico/experimental/quantization/ptq/examples/quantize_linear.py,sha256=8zq-ZJDYgam0xQ-PbC6Xb1I7W1mv0Wi-b--IP2wwXtw,4539
66
+ tico/experimental/quantization/ptq/examples/quantize_llama_mlp.py,sha256=MktXLIxz2fUAA7sDG_wN6B8eUi71jOgAaMfhPj9u9XM,3736
66
67
  tico/experimental/quantization/ptq/observers/__init__.py,sha256=WF2MvL9M_jl-B1FqcY9zic34NOCRp17HkRYv-TMxMr4,613
67
68
  tico/experimental/quantization/ptq/observers/affine_base.py,sha256=e2Eba64nrxKQyE4F_WJ7WTSsk3xe6bkdGUKaoLFWGFw,4638
68
69
  tico/experimental/quantization/ptq/observers/base.py,sha256=Wons1MzpqK1mfcy-ppl-B2Dum0edXg2dWW2Lw3V18tw,3280
@@ -76,7 +77,9 @@ tico/experimental/quantization/ptq/wrappers/__init__.py,sha256=47DEQpj8HBSa-_TIm
76
77
  tico/experimental/quantization/ptq/wrappers/ptq_wrapper.py,sha256=F9sK_DiRaXiGNHULcwIbs5EUtHz6ZJ7N4r5CWTTfhsM,2442
77
78
  tico/experimental/quantization/ptq/wrappers/quant_elementwise.py,sha256=LhEoobfvto6zKrBOKL4gmxfFFc31jHzyQV_zfps-iQM,3604
78
79
  tico/experimental/quantization/ptq/wrappers/quant_module_base.py,sha256=vkcDos_knGSS29rIZuEIWkAJLHrENbGz8nCH2-iara8,5969
79
- tico/experimental/quantization/ptq/wrappers/registry.py,sha256=562nKSlp9qF-w4-aQeJbx2V_wMGE2FRrjIKUfRwC4Mg,4571
80
+ tico/experimental/quantization/ptq/wrappers/registry.py,sha256=utZY381tuKYyUITptUfx4_0V-jprSjvEXpGtfa4cPnI,4638
81
+ tico/experimental/quantization/ptq/wrappers/llama/__init__.py,sha256=vVdVj7HMjxRPAYEO4DEg1l_7qztMmGCGCe7GbniCzrM,115
82
+ tico/experimental/quantization/ptq/wrappers/llama/quant_mlp.py,sha256=uZMnrX66oZwxhKhcNbLXXeri-WxxRBiZnr15aBXJMm0,3562
80
83
  tico/experimental/quantization/ptq/wrappers/nn/__init__.py,sha256=I9uTt5HfcRoMEDYHpAeATMv2TbCQiX0ZbfUFMzSJ4Qw,336
81
84
  tico/experimental/quantization/ptq/wrappers/nn/quant_layernorm.py,sha256=G5Sgt-tXnzh0Rxyk-2honmZIfEQOZlRfOsoDBdSGmA4,6887
82
85
  tico/experimental/quantization/ptq/wrappers/nn/quant_linear.py,sha256=xW-VEPB7RJoslS3xLVCdhIuMjppknvpkZleRGK4JFVQ,2240
@@ -235,9 +238,9 @@ tico/utils/mx/__init__.py,sha256=IO6FP_xYbGy0dW0HL26GXD3ouxARaxCK7bz9dn4blPQ,26
235
238
  tico/utils/mx/elemwise_ops.py,sha256=V6glyAHsVR1joqpsgnNytatCD_ew92xNWZ19UFDoMTA,10281
236
239
  tico/utils/mx/formats.py,sha256=uzNWyu-1onUlwQfX5cZ6fZSUfHMRqorper7_T1k3jfk,3404
237
240
  tico/utils/mx/mx_ops.py,sha256=RcfUTYVi-wilGB2sC35OeARdwDqnixv7dG5iyZ-fQT8,8555
238
- tico-0.1.0.dev250826.dist-info/LICENSE,sha256=kp4JLII7bzRhPb0CPD5XTDZMh22BQ7h3k3B7t8TiSbw,12644
239
- tico-0.1.0.dev250826.dist-info/METADATA,sha256=QhtUiHj_YT4ZxsClOx4OaP24kuaLUsLT83x3yl1gRDY,8450
240
- tico-0.1.0.dev250826.dist-info/WHEEL,sha256=G16H4A3IeoQmnOrYV4ueZGKSjhipXx8zc8nu9FGlvMA,92
241
- tico-0.1.0.dev250826.dist-info/entry_points.txt,sha256=kBKYSS_IYrSXmUYevmmepqIVPScq5vF8ulQRu3I_Zf0,59
242
- tico-0.1.0.dev250826.dist-info/top_level.txt,sha256=oqs7UPoNSKZEwqsX8B-KAWdQwfAa7i60pbxW_Jk7P3w,5
243
- tico-0.1.0.dev250826.dist-info/RECORD,,
241
+ tico-0.1.0.dev250827.dist-info/LICENSE,sha256=kp4JLII7bzRhPb0CPD5XTDZMh22BQ7h3k3B7t8TiSbw,12644
242
+ tico-0.1.0.dev250827.dist-info/METADATA,sha256=dkNactHh3C0pzY9_mblMYxzAdqWIRbFTre9sBiKfDAo,8450
243
+ tico-0.1.0.dev250827.dist-info/WHEEL,sha256=G16H4A3IeoQmnOrYV4ueZGKSjhipXx8zc8nu9FGlvMA,92
244
+ tico-0.1.0.dev250827.dist-info/entry_points.txt,sha256=kBKYSS_IYrSXmUYevmmepqIVPScq5vF8ulQRu3I_Zf0,59
245
+ tico-0.1.0.dev250827.dist-info/top_level.txt,sha256=oqs7UPoNSKZEwqsX8B-KAWdQwfAa7i60pbxW_Jk7P3w,5
246
+ tico-0.1.0.dev250827.dist-info/RECORD,,