tico 0.1.0.dev250924__py3-none-any.whl → 0.1.0.dev251109__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of tico might be problematic. Click here for more details.

Files changed (114) hide show
  1. tico/__init__.py +1 -1
  2. tico/quantization/__init__.py +6 -0
  3. tico/quantization/algorithm/fpi_gptq/fpi_gptq.py +161 -0
  4. tico/quantization/algorithm/fpi_gptq/quantizer.py +179 -0
  5. tico/{experimental/quantization → quantization}/algorithm/gptq/gptq.py +24 -3
  6. tico/{experimental/quantization → quantization}/algorithm/gptq/quantizer.py +12 -6
  7. tico/{experimental/quantization → quantization}/algorithm/pt2e/annotation/annotator.py +6 -8
  8. tico/{experimental/quantization → quantization}/algorithm/pt2e/annotation/op/adaptive_avg_pool2d.py +4 -6
  9. tico/{experimental/quantization → quantization}/algorithm/pt2e/annotation/op/add.py +4 -6
  10. tico/{experimental/quantization → quantization}/algorithm/pt2e/annotation/op/conv2d.py +4 -6
  11. tico/{experimental/quantization → quantization}/algorithm/pt2e/annotation/op/div.py +4 -6
  12. tico/{experimental/quantization → quantization}/algorithm/pt2e/annotation/op/linear.py +4 -6
  13. tico/{experimental/quantization → quantization}/algorithm/pt2e/annotation/op/mean.py +4 -6
  14. tico/{experimental/quantization → quantization}/algorithm/pt2e/annotation/op/mul.py +4 -6
  15. tico/{experimental/quantization → quantization}/algorithm/pt2e/annotation/op/relu6.py +4 -6
  16. tico/{experimental/quantization → quantization}/algorithm/pt2e/annotation/op/rsqrt.py +4 -6
  17. tico/{experimental/quantization → quantization}/algorithm/pt2e/annotation/op/sub.py +4 -6
  18. tico/{experimental/quantization → quantization}/algorithm/pt2e/annotation/spec.py +1 -3
  19. tico/{experimental/quantization → quantization}/algorithm/pt2e/annotation/utils.py +1 -1
  20. tico/{experimental/quantization → quantization}/algorithm/pt2e/quantizer.py +4 -4
  21. tico/{experimental/quantization → quantization}/algorithm/pt2e/utils.py +1 -3
  22. tico/{experimental/quantization → quantization}/algorithm/smoothquant/quantizer.py +6 -10
  23. tico/quantization/config/fpi_gptq.py +29 -0
  24. tico/{experimental/quantization → quantization}/config/gptq.py +1 -1
  25. tico/{experimental/quantization → quantization}/config/pt2e.py +1 -1
  26. tico/{experimental/quantization/ptq/quant_config.py → quantization/config/ptq.py} +18 -10
  27. tico/{experimental/quantization → quantization}/config/smoothquant.py +1 -1
  28. tico/{experimental/quantization → quantization}/evaluation/evaluate.py +6 -12
  29. tico/{experimental/quantization → quantization}/evaluation/executor/circle_executor.py +1 -3
  30. tico/{experimental/quantization → quantization}/evaluation/executor/triv24_executor.py +2 -4
  31. tico/{experimental/quantization → quantization}/evaluation/utils.py +1 -1
  32. tico/{experimental/quantization → quantization}/public_interface.py +7 -7
  33. tico/{experimental/quantization → quantization}/quantizer.py +1 -1
  34. tico/{experimental/quantization → quantization}/quantizer_registry.py +11 -10
  35. tico/{experimental/quantization/ptq → quantization/wrapq}/examples/compare_ppl.py +8 -19
  36. tico/{experimental/quantization/ptq → quantization/wrapq}/examples/debug_quant_outputs.py +9 -24
  37. tico/{experimental/quantization/ptq → quantization/wrapq}/examples/quantize_linear.py +11 -10
  38. tico/{experimental/quantization/ptq → quantization/wrapq}/examples/quantize_llama_attn.py +10 -12
  39. tico/{experimental/quantization/ptq → quantization/wrapq}/examples/quantize_llama_decoder_layer.py +10 -9
  40. tico/{experimental/quantization/ptq → quantization/wrapq}/examples/quantize_llama_mlp.py +13 -13
  41. tico/{experimental/quantization/ptq → quantization/wrapq}/examples/quantize_with_gptq.py +14 -35
  42. tico/{experimental/quantization/ptq → quantization/wrapq}/observers/affine_base.py +3 -3
  43. tico/{experimental/quantization/ptq → quantization/wrapq}/observers/base.py +2 -2
  44. tico/{experimental/quantization/ptq → quantization/wrapq}/observers/ema.py +2 -2
  45. tico/{experimental/quantization/ptq → quantization/wrapq}/observers/identity.py +1 -1
  46. tico/{experimental/quantization/ptq → quantization/wrapq}/observers/minmax.py +2 -2
  47. tico/{experimental/quantization/ptq → quantization/wrapq}/observers/mx.py +1 -1
  48. tico/quantization/wrapq/quantizer.py +179 -0
  49. tico/{experimental/quantization/ptq → quantization/wrapq}/utils/introspection.py +3 -5
  50. tico/{experimental/quantization/ptq → quantization/wrapq}/utils/metrics.py +3 -2
  51. tico/{experimental/quantization/ptq → quantization/wrapq}/wrappers/fairseq/__init__.py +1 -1
  52. tico/{experimental/quantization/ptq → quantization/wrapq}/wrappers/fairseq/quant_decoder.py +6 -8
  53. tico/{experimental/quantization/ptq → quantization/wrapq}/wrappers/fairseq/quant_decoder_layer.py +6 -8
  54. tico/{experimental/quantization/ptq → quantization/wrapq}/wrappers/fairseq/quant_encoder.py +6 -8
  55. tico/{experimental/quantization/ptq → quantization/wrapq}/wrappers/fairseq/quant_encoder_layer.py +6 -8
  56. tico/{experimental/quantization/ptq → quantization/wrapq}/wrappers/fairseq/quant_mha.py +5 -7
  57. tico/{experimental/quantization/ptq → quantization/wrapq}/wrappers/llama/quant_attn.py +5 -7
  58. tico/{experimental/quantization/ptq → quantization/wrapq}/wrappers/llama/quant_decoder_layer.py +8 -12
  59. tico/{experimental/quantization/ptq → quantization/wrapq}/wrappers/llama/quant_mlp.py +5 -7
  60. tico/quantization/wrapq/wrappers/nn/__init__.py +1 -0
  61. tico/{experimental/quantization/ptq → quantization/wrapq}/wrappers/nn/quant_layernorm.py +6 -7
  62. tico/{experimental/quantization/ptq → quantization/wrapq}/wrappers/nn/quant_linear.py +7 -8
  63. tico/{experimental/quantization/ptq → quantization/wrapq}/wrappers/nn/quant_silu.py +8 -9
  64. tico/{experimental/quantization/ptq → quantization/wrapq}/wrappers/ptq_wrapper.py +4 -6
  65. tico/{experimental/quantization/ptq → quantization/wrapq}/wrappers/quant_elementwise.py +55 -17
  66. tico/{experimental/quantization/ptq → quantization/wrapq}/wrappers/quant_module_base.py +10 -9
  67. tico/{experimental/quantization/ptq → quantization/wrapq}/wrappers/registry.py +17 -16
  68. tico/utils/convert.py +9 -14
  69. {tico-0.1.0.dev250924.dist-info → tico-0.1.0.dev251109.dist-info}/METADATA +48 -2
  70. {tico-0.1.0.dev250924.dist-info → tico-0.1.0.dev251109.dist-info}/RECORD +113 -108
  71. tico/experimental/quantization/__init__.py +0 -6
  72. /tico/{experimental/quantization → quantization}/algorithm/__init__.py +0 -0
  73. /tico/{experimental/quantization/algorithm/gptq → quantization/algorithm/fpi_gptq}/__init__.py +0 -0
  74. /tico/{experimental/quantization/algorithm/pt2e → quantization/algorithm/gptq}/__init__.py +0 -0
  75. /tico/{experimental/quantization → quantization}/algorithm/gptq/quant.py +0 -0
  76. /tico/{experimental/quantization → quantization}/algorithm/gptq/utils.py +0 -0
  77. /tico/{experimental/quantization/algorithm/pt2e/annotation → quantization/algorithm/pt2e}/__init__.py +0 -0
  78. /tico/{experimental/quantization/algorithm/pt2e/transformation → quantization/algorithm/pt2e/annotation}/__init__.py +0 -0
  79. /tico/{experimental/quantization → quantization}/algorithm/pt2e/annotation/config.py +0 -0
  80. /tico/{experimental/quantization → quantization}/algorithm/pt2e/annotation/op/__init__.py +0 -0
  81. /tico/{experimental/quantization/algorithm/smoothquant → quantization/algorithm/pt2e/transformation}/__init__.py +0 -0
  82. /tico/{experimental/quantization → quantization}/algorithm/pt2e/transformation/convert_scalars_to_attrs.py +0 -0
  83. /tico/{experimental/quantization/config → quantization/algorithm/smoothquant}/__init__.py +0 -0
  84. /tico/{experimental/quantization → quantization}/algorithm/smoothquant/observer.py +0 -0
  85. /tico/{experimental/quantization → quantization}/algorithm/smoothquant/smooth_quant.py +0 -0
  86. /tico/{experimental/quantization/evaluation → quantization/config}/__init__.py +0 -0
  87. /tico/{experimental/quantization → quantization}/config/base.py +0 -0
  88. /tico/{experimental/quantization/evaluation/executor → quantization/evaluation}/__init__.py +0 -0
  89. /tico/{experimental/quantization → quantization}/evaluation/backend.py +0 -0
  90. /tico/{experimental/quantization/passes → quantization/evaluation/executor}/__init__.py +0 -0
  91. /tico/{experimental/quantization → quantization}/evaluation/executor/backend_executor.py +0 -0
  92. /tico/{experimental/quantization → quantization}/evaluation/metric.py +0 -0
  93. /tico/{experimental/quantization/ptq → quantization/passes}/__init__.py +0 -0
  94. /tico/{experimental/quantization → quantization}/passes/fold_quant_ops.py +0 -0
  95. /tico/{experimental/quantization → quantization}/passes/insert_quantize_on_dtype_mismatch.py +0 -0
  96. /tico/{experimental/quantization → quantization}/passes/propagate_qparam_backward.py +0 -0
  97. /tico/{experimental/quantization → quantization}/passes/propagate_qparam_forward.py +0 -0
  98. /tico/{experimental/quantization → quantization}/passes/quantize_bias.py +0 -0
  99. /tico/{experimental/quantization → quantization}/passes/remove_weight_dequant_op.py +0 -0
  100. /tico/{experimental/quantization/ptq/examples → quantization/wrapq}/__init__.py +0 -0
  101. /tico/{experimental/quantization/ptq → quantization/wrapq}/dtypes.py +0 -0
  102. /tico/{experimental/quantization/ptq/observers → quantization/wrapq/examples}/__init__.py +0 -0
  103. /tico/{experimental/quantization/ptq → quantization/wrapq}/mode.py +0 -0
  104. /tico/{experimental/quantization/ptq/utils → quantization/wrapq/observers}/__init__.py +0 -0
  105. /tico/{experimental/quantization/ptq → quantization/wrapq}/qscheme.py +0 -0
  106. /tico/{experimental/quantization/ptq/wrappers → quantization/wrapq/utils}/__init__.py +0 -0
  107. /tico/{experimental/quantization/ptq → quantization/wrapq}/utils/reduce_utils.py +0 -0
  108. /tico/{experimental/quantization/ptq/wrappers/llama → quantization/wrapq/wrappers}/__init__.py +0 -0
  109. /tico/{experimental/quantization/ptq → quantization/wrapq}/wrappers/fairseq/decoder_export_single_step.py +0 -0
  110. /tico/{experimental/quantization/ptq/wrappers/nn → quantization/wrapq/wrappers/llama}/__init__.py +0 -0
  111. {tico-0.1.0.dev250924.dist-info → tico-0.1.0.dev251109.dist-info}/LICENSE +0 -0
  112. {tico-0.1.0.dev250924.dist-info → tico-0.1.0.dev251109.dist-info}/WHEEL +0 -0
  113. {tico-0.1.0.dev250924.dist-info → tico-0.1.0.dev251109.dist-info}/entry_points.txt +0 -0
  114. {tico-0.1.0.dev250924.dist-info → tico-0.1.0.dev251109.dist-info}/top_level.txt +0 -0
@@ -17,13 +17,12 @@ from typing import Optional
17
17
  import torch.nn as nn
18
18
  import torch.nn.functional as F
19
19
 
20
- from tico.experimental.quantization.ptq.mode import Mode
21
- from tico.experimental.quantization.ptq.qscheme import QScheme
22
- from tico.experimental.quantization.ptq.quant_config import QuantConfig
23
- from tico.experimental.quantization.ptq.wrappers.quant_module_base import (
24
- QuantModuleBase,
25
- )
26
- from tico.experimental.quantization.ptq.wrappers.registry import register
20
+ from tico.quantization.config.ptq import PTQConfig
21
+
22
+ from tico.quantization.wrapq.mode import Mode
23
+ from tico.quantization.wrapq.qscheme import QScheme
24
+ from tico.quantization.wrapq.wrappers.quant_module_base import QuantModuleBase
25
+ from tico.quantization.wrapq.wrappers.registry import register
27
26
 
28
27
 
29
28
  @register(nn.Linear)
@@ -34,7 +33,7 @@ class QuantLinear(QuantModuleBase):
34
33
  self,
35
34
  fp: nn.Linear,
36
35
  *,
37
- qcfg: Optional[QuantConfig] = None,
36
+ qcfg: Optional[PTQConfig] = None,
38
37
  fp_name: Optional[str] = None
39
38
  ):
40
39
  super().__init__(qcfg, fp_name=fp_name)
@@ -17,18 +17,17 @@ from typing import Optional
17
17
  import torch
18
18
  import torch.nn as nn
19
19
 
20
- from tico.experimental.quantization.ptq.quant_config import QuantConfig
21
- from tico.experimental.quantization.ptq.wrappers.quant_module_base import (
22
- QuantModuleBase,
23
- )
24
- from tico.experimental.quantization.ptq.wrappers.registry import register
20
+ from tico.quantization.config.ptq import PTQConfig
21
+ from tico.quantization.wrapq.wrappers.quant_module_base import QuantModuleBase
22
+ from tico.quantization.wrapq.wrappers.registry import try_register
25
23
 
26
24
 
27
- @register(nn.SiLU)
25
+ @try_register("torch.nn.SiLU", "transformers.activations.SiLUActivation")
28
26
  class QuantSiLU(QuantModuleBase):
29
27
  """
30
- QuantSiLU — drop-in replacement for nn.SiLU that quantizes
31
- both intermediate tensors:
28
+ QuantSiLU — drop-in quantized implementation of the SiLU operation.
29
+
30
+ This module quantizes both intermediate tensors:
32
31
  • s = sigmoid(x) (logistic)
33
32
  • y = x * s (mul)
34
33
  """
@@ -37,7 +36,7 @@ class QuantSiLU(QuantModuleBase):
37
36
  self,
38
37
  fp: nn.SiLU,
39
38
  *,
40
- qcfg: Optional[QuantConfig] = None,
39
+ qcfg: Optional[PTQConfig] = None,
41
40
  fp_name: Optional[str] = None
42
41
  ):
43
42
  super().__init__(qcfg, fp_name=fp_name)
@@ -16,11 +16,9 @@ from typing import Optional
16
16
 
17
17
  import torch
18
18
 
19
- from tico.experimental.quantization.ptq.quant_config import QuantConfig
20
- from tico.experimental.quantization.ptq.wrappers.quant_module_base import (
21
- QuantModuleBase,
22
- )
23
- from tico.experimental.quantization.ptq.wrappers.registry import lookup
19
+ from tico.quantization.config.ptq import PTQConfig
20
+ from tico.quantization.wrapq.wrappers.quant_module_base import QuantModuleBase
21
+ from tico.quantization.wrapq.wrappers.registry import lookup
24
22
 
25
23
 
26
24
  class PTQWrapper(QuantModuleBase):
@@ -34,7 +32,7 @@ class PTQWrapper(QuantModuleBase):
34
32
  def __init__(
35
33
  self,
36
34
  module: torch.nn.Module,
37
- qcfg: Optional[QuantConfig] = None,
35
+ qcfg: Optional[PTQConfig] = None,
38
36
  *,
39
37
  fp_name: Optional[str] = None,
40
38
  ):
@@ -12,16 +12,14 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
- from typing import Callable, Optional
15
+ from typing import Any, Optional
16
16
 
17
17
  import torch
18
18
  import torch.nn as nn
19
19
 
20
- from tico.experimental.quantization.ptq.quant_config import QuantConfig
21
- from tico.experimental.quantization.ptq.wrappers.quant_module_base import (
22
- QuantModuleBase,
23
- )
24
- from tico.experimental.quantization.ptq.wrappers.registry import register
20
+ from tico.quantization.config.ptq import PTQConfig
21
+ from tico.quantization.wrapq.wrappers.quant_module_base import QuantModuleBase
22
+ from tico.quantization.wrapq.wrappers.registry import register
25
23
 
26
24
 
27
25
  class QuantElementwise(QuantModuleBase):
@@ -33,7 +31,7 @@ class QuantElementwise(QuantModuleBase):
33
31
  """
34
32
 
35
33
  # subclass must set this
36
- FUNC: Callable[[torch.Tensor], torch.Tensor] | None = None
34
+ FUNC: Any = None
37
35
 
38
36
  def __init_subclass__(cls, **kwargs):
39
37
  super().__init_subclass__(**kwargs)
@@ -48,7 +46,7 @@ class QuantElementwise(QuantModuleBase):
48
46
  self,
49
47
  fp_module: nn.Module,
50
48
  *,
51
- qcfg: Optional[QuantConfig] = None,
49
+ qcfg: Optional[PTQConfig] = None,
52
50
  fp_name: Optional[str] = None,
53
51
  ):
54
52
  super().__init__(qcfg, fp_name=fp_name)
@@ -70,7 +68,7 @@ class QuantElementwise(QuantModuleBase):
70
68
 
71
69
 
72
70
  """
73
- Why `FUNC` is a `staticmethod`
71
+ Q1) Why `FUNC` is a `staticmethod`
74
72
 
75
73
  - Prevents automatic binding: calling `self.FUNC(x)` will not inject `self`,
76
74
  so the callable keeps the expected signature `Tensor -> Tensor`
@@ -87,27 +85,67 @@ Why `FUNC` is a `staticmethod`
87
85
  than an `nn.Module` instance that would appear in the module tree.
88
86
 
89
87
  - Small perf/alloc win: no bound-method objects are created on each call.
88
+
89
+ Q2) Why we define small Python wrappers (_relu, _tanh, etc.)
90
+
91
+ - torch.relu / torch.tanh / torch.sigmoid are CPython built-ins.
92
+ Their type is `builtin_function_or_method`, not a Python `FunctionType`.
93
+ This causes `torch.export` (and FX tracing) to fail with:
94
+ "expected FunctionType, found builtin_function_or_method".
95
+
96
+ - By defining a thin Python wrapper (e.g., `def _tanh(x): return torch.tanh(x)`),
97
+ we convert it into a normal Python function object (`FunctionType`),
98
+ which satisfies export/tracing requirements.
99
+
100
+ - Functionally, this adds zero overhead and preserves semantics,
101
+ but makes the callable introspectable (has __code__, __name__, etc.)
102
+ and compatible with TorchDynamo / FX graph capture.
103
+
104
+ - It also keeps FUNC pure and stateless, ensuring the elementwise op
105
+ is represented as `call_function(_tanh)` in the traced graph
106
+ rather than a bound `call_method` or module attribute access.
90
107
  """
91
108
 
92
- # Sigmoid
109
+
110
+ def _relu(x: torch.Tensor) -> torch.Tensor:
111
+ return torch.relu(x)
112
+
113
+
114
+ def _tanh(x: torch.Tensor) -> torch.Tensor:
115
+ return torch.tanh(x)
116
+
117
+
118
+ def _sigmoid(x: torch.Tensor) -> torch.Tensor:
119
+ return torch.sigmoid(x)
120
+
121
+
122
+ def _gelu(x: torch.Tensor) -> torch.Tensor:
123
+ return torch.nn.functional.gelu(x)
124
+
125
+
93
126
  @register(nn.Sigmoid)
94
127
  class QuantSigmoid(QuantElementwise):
95
- FUNC = staticmethod(torch.sigmoid)
128
+ @staticmethod
129
+ def FUNC(x: torch.Tensor) -> torch.Tensor:
130
+ return _sigmoid(x)
96
131
 
97
132
 
98
- # Tanh
99
133
  @register(nn.Tanh)
100
134
  class QuantTanh(QuantElementwise):
101
- FUNC = staticmethod(torch.tanh)
135
+ @staticmethod
136
+ def FUNC(x: torch.Tensor) -> torch.Tensor:
137
+ return _tanh(x)
102
138
 
103
139
 
104
- # ReLU
105
140
  @register(nn.ReLU)
106
141
  class QuantReLU(QuantElementwise):
107
- FUNC = staticmethod(torch.relu)
142
+ @staticmethod
143
+ def FUNC(x: torch.Tensor) -> torch.Tensor:
144
+ return _relu(x)
108
145
 
109
146
 
110
- # GELU (approximate)
111
147
  @register(nn.GELU)
112
148
  class QuantGELU(QuantElementwise):
113
- FUNC = staticmethod(torch.nn.functional.gelu)
149
+ @staticmethod
150
+ def FUNC(x: torch.Tensor) -> torch.Tensor:
151
+ return _gelu(x)
@@ -17,9 +17,10 @@ from typing import Iterable, Optional, Tuple
17
17
 
18
18
  import torch.nn as nn
19
19
 
20
- from tico.experimental.quantization.ptq.mode import Mode
21
- from tico.experimental.quantization.ptq.observers.base import ObserverBase
22
- from tico.experimental.quantization.ptq.quant_config import QuantConfig
20
+ from tico.quantization.config.ptq import PTQConfig
21
+
22
+ from tico.quantization.wrapq.mode import Mode
23
+ from tico.quantization.wrapq.observers.base import ObserverBase
23
24
 
24
25
 
25
26
  class QuantModuleBase(nn.Module, ABC):
@@ -29,7 +30,7 @@ class QuantModuleBase(nn.Module, ABC):
29
30
  Responsibilities
30
31
  ----------------
31
32
  • Own *one* Mode enum (`NO_QUANT / CALIB / QUANT`)
32
- • Own a QuantConfig describing default / per-observer dtypes
33
+ • Own a PTQConfig describing default / per-observer dtypes
33
34
  • Expose a canonical lifecycle:
34
35
  enable_calibration()
35
36
  freeze_qparams()
@@ -38,10 +39,10 @@ class QuantModuleBase(nn.Module, ABC):
38
39
  """
39
40
 
40
41
  def __init__(
41
- self, qcfg: Optional[QuantConfig] = None, *, fp_name: Optional[str] = None
42
+ self, qcfg: Optional[PTQConfig] = None, *, fp_name: Optional[str] = None
42
43
  ) -> None:
43
44
  super().__init__()
44
- self.qcfg = qcfg or QuantConfig()
45
+ self.qcfg = qcfg or PTQConfig()
45
46
  self._mode: Mode = Mode.NO_QUANT # default state
46
47
  self.fp_name = fp_name
47
48
 
@@ -118,9 +119,9 @@ class QuantModuleBase(nn.Module, ABC):
118
119
  Instantiate an observer named *name*.
119
120
 
120
121
  Precedence (3-tier) for keys:
121
- • observer: user > wrapper-default > QuantConfig.default_observer
122
- • dtype: user > wrapper-default > QuantConfig.default_dtype
123
- • qscheme: user > wrapper-default > QuantConfig.default_qscheme
122
+ • observer: user > wrapper-default > PTQConfig.default_observer
123
+ • dtype: user > wrapper-default > PTQConfig.default_dtype
124
+ • qscheme: user > wrapper-default > PTQConfig.default_qscheme
124
125
 
125
126
  Other kwargs (e.g., qscheme, channel_axis, etc.) remain:
126
127
  user override > wrapper-default
@@ -17,26 +17,27 @@ from typing import Callable, Dict, Type
17
17
 
18
18
  import torch.nn as nn
19
19
 
20
- from tico.experimental.quantization.ptq.wrappers.quant_module_base import (
21
- QuantModuleBase,
22
- )
20
+ from tico.quantization.wrapq.wrappers.quant_module_base import QuantModuleBase
23
21
 
24
22
  _WRAPPERS: Dict[Type[nn.Module], Type[QuantModuleBase]] = {}
25
23
  _IMPORT_ONCE = False
26
24
  _CORE_MODULES = (
27
- "tico.experimental.quantization.ptq.wrappers.quant_elementwise",
28
- "tico.experimental.quantization.ptq.wrappers.nn.quant_layernorm",
29
- "tico.experimental.quantization.ptq.wrappers.nn.quant_linear",
30
- "tico.experimental.quantization.ptq.wrappers.nn.quant_silu",
31
- # llama
32
- "tico.experimental.quantization.ptq.wrappers.llama.quant_attn",
33
- "tico.experimental.quantization.ptq.wrappers.llama.quant_decoder_layer",
34
- "tico.experimental.quantization.ptq.wrappers.llama.quant_mlp",
35
- # fairseq
36
- "tico.experimental.quantization.ptq.wrappers.fairseq.quant_decoder_layer",
37
- "tico.experimental.quantization.ptq.wrappers.fairseq.quant_encoder",
38
- "tico.experimental.quantization.ptq.wrappers.fairseq.quant_encoder_layer",
39
- "tico.experimental.quantization.ptq.wrappers.fairseq.quant_mha",
25
+ "tico.quantization.wrapq.wrappers.quant_elementwise",
26
+ ## nn ##
27
+ "tico.quantization.wrapq.wrappers.nn.quant_layernorm",
28
+ "tico.quantization.wrapq.wrappers.nn.quant_linear",
29
+ # This includes not only `nn.SiLU` but also `SiLUActivation` from transformers
30
+ # as they are same operation.
31
+ "tico.quantization.wrapq.wrappers.nn.quant_silu",
32
+ ## llama ##
33
+ "tico.quantization.wrapq.wrappers.llama.quant_attn",
34
+ "tico.quantization.wrapq.wrappers.llama.quant_decoder_layer",
35
+ "tico.quantization.wrapq.wrappers.llama.quant_mlp",
36
+ ## fairseq ##
37
+ "tico.quantization.wrapq.wrappers.fairseq.quant_decoder_layer",
38
+ "tico.quantization.wrapq.wrappers.fairseq.quant_encoder",
39
+ "tico.quantization.wrapq.wrappers.fairseq.quant_encoder_layer",
40
+ "tico.quantization.wrapq.wrappers.fairseq.quant_mha",
40
41
  # add future core wrappers here
41
42
  )
42
43
 
tico/utils/convert.py CHANGED
@@ -20,20 +20,6 @@ import torch
20
20
  from torch.export import export, ExportedProgram
21
21
 
22
22
  from tico.config import CompileConfigBase, get_default_config
23
- from tico.experimental.quantization.passes.fold_quant_ops import FoldQuantOps
24
- from tico.experimental.quantization.passes.insert_quantize_on_dtype_mismatch import (
25
- InsertQuantizeOnDtypeMismatch,
26
- )
27
- from tico.experimental.quantization.passes.propagate_qparam_backward import (
28
- PropagateQParamBackward,
29
- )
30
- from tico.experimental.quantization.passes.propagate_qparam_forward import (
31
- PropagateQParamForward,
32
- )
33
- from tico.experimental.quantization.passes.quantize_bias import QuantizeBias
34
- from tico.experimental.quantization.passes.remove_weight_dequant_op import (
35
- RemoveWeightDequantOp,
36
- )
37
23
  from tico.passes.cast_aten_where_arg_type import CastATenWhereArgType
38
24
  from tico.passes.cast_clamp_mixed_type_args import CastClampMixedTypeArgs
39
25
  from tico.passes.cast_mixed_type_args import CastMixedTypeArgs
@@ -74,6 +60,14 @@ from tico.passes.remove_redundant_slice import RemoveRedundantSlice
74
60
  from tico.passes.remove_redundant_to_copy import RemoveRedundantToCopy
75
61
  from tico.passes.restore_linear import RestoreLinear
76
62
  from tico.passes.segment_index_select import SegmentIndexSelectConst
63
+ from tico.quantization.passes.fold_quant_ops import FoldQuantOps
64
+ from tico.quantization.passes.insert_quantize_on_dtype_mismatch import (
65
+ InsertQuantizeOnDtypeMismatch,
66
+ )
67
+ from tico.quantization.passes.propagate_qparam_backward import PropagateQParamBackward
68
+ from tico.quantization.passes.propagate_qparam_forward import PropagateQParamForward
69
+ from tico.quantization.passes.quantize_bias import QuantizeBias
70
+ from tico.quantization.passes.remove_weight_dequant_op import RemoveWeightDequantOp
77
71
  from tico.serialize.circle_serializer import build_circle
78
72
  from tico.serialize.operators.node_visitor import get_support_targets
79
73
  from tico.utils import logging
@@ -143,6 +137,7 @@ def traced_run_decompositions(exported_program: ExportedProgram):
143
137
  or torch.__version__.startswith("2.7")
144
138
  or torch.__version__.startswith("2.8")
145
139
  or torch.__version__.startswith("2.9")
140
+ or torch.__version__.startswith("2.10")
146
141
  ):
147
142
  return run_decompositions(exported_program)
148
143
  else:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: tico
3
- Version: 0.1.0.dev250924
3
+ Version: 0.1.0.dev251109
4
4
  Summary: Convert exported Torch module to circle
5
5
  Home-page: UNKNOWN
6
6
  License: UNKNOWN
@@ -30,6 +30,7 @@ designed for optimized on-device neural network inference.
30
30
  - [From torch module](#from-torch-module)
31
31
  - [From .pt2](#from-pt2)
32
32
  - [Running circle models directly in Python](#running-circle-models-directly-in-python)
33
+ - [Quantization](#quantization)
33
34
 
34
35
  ### For Developers
35
36
 
@@ -68,7 +69,7 @@ This will generate `build` and `dist` directories in the root directory.
68
69
  **Available options**
69
70
  - `--dist` To install the package from .whl (without this option, _TICO_ is installed in an editable mode)
70
71
  - `--torch_ver <torch version>` To install a specific torch version (default: 2.6).
71
- - Available <torch version>: 2.5, 2.6, 2.7, nightly
72
+ - Available <torch version>: 2.5, 2.6, 2.7, 2.8, nightly
72
73
 
73
74
  4. Now you can convert a torch module to a `.circle`.
74
75
 
@@ -188,6 +189,48 @@ circle_model(*example_inputs)
188
189
  # numpy.ndarray([2., 2., 2., 2.], dtype=float32)
189
190
  ```
190
191
 
192
+ ### Quantization
193
+
194
+ The `tico.quantization` module provides a unified and modular interface for quantizing
195
+ large language models (LLMs) and other neural networks.
196
+
197
+ It introduces a simple two-step workflow — **prepare** and **convert** — that
198
+ abstracts the details of different quantization algorithms.
199
+
200
+ #### Basic Usage
201
+
202
+ ```python
203
+ from tico.quantization import prepare, convert
204
+ from tico.quantization.config.gptq import GPTQConfig
205
+ import torch
206
+ import torch.nn as nn
207
+
208
+ class LinearModel(nn.Module):
209
+ def __init__(self):
210
+ super().__init__()
211
+ self.linear = nn.Linear(8, 8)
212
+
213
+ def forward(self, x):
214
+ return self.linear(x)
215
+
216
+ model = LinearModel().eval()
217
+
218
+ # 1. Prepare for quantization
219
+ quant_config = GPTQConfig()
220
+ prepared_model = prepare(model, quant_config)
221
+
222
+ # 2. Calibration
223
+ for d in dataset:
224
+ prepared_model(d)
225
+
226
+ # 3. Apply GPTQ
227
+ quantized_model = convert(prepared_model, quant_config)
228
+ ```
229
+
230
+ For detailed documentation, design notes, and contributing guidelines,
231
+ see [tico/quantization/README.md](./tico/quantization/README.md).
232
+
233
+
191
234
  ## For Developers
192
235
 
193
236
  ### Testing & Code Formatting
@@ -276,6 +319,9 @@ If you want to test them locally, you can do so by navigating to each model dire
276
319
  $ pip install -r test/modules/model/<model_name>/requirements.txt
277
320
  # Run test for a single model
278
321
  $ ./ccex test -m <model_name>
322
+ # Run models whose names contain "Llama" (e.g., Llama, LlamaDecoderLayer, LlamaWithGQA, etc.)
323
+ # Note that you should use quotes for the wildcard(*) pattern
324
+ $ ./ccex test -m "Llama*"
279
325
  ```
280
326
 
281
327
  For example, to run a single model