tico 0.1.0.dev250924__py3-none-any.whl → 0.1.0.dev251109__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of tico might be problematic. Click here for more details.

Files changed (114) hide show
  1. tico/__init__.py +1 -1
  2. tico/quantization/__init__.py +6 -0
  3. tico/quantization/algorithm/fpi_gptq/fpi_gptq.py +161 -0
  4. tico/quantization/algorithm/fpi_gptq/quantizer.py +179 -0
  5. tico/{experimental/quantization → quantization}/algorithm/gptq/gptq.py +24 -3
  6. tico/{experimental/quantization → quantization}/algorithm/gptq/quantizer.py +12 -6
  7. tico/{experimental/quantization → quantization}/algorithm/pt2e/annotation/annotator.py +6 -8
  8. tico/{experimental/quantization → quantization}/algorithm/pt2e/annotation/op/adaptive_avg_pool2d.py +4 -6
  9. tico/{experimental/quantization → quantization}/algorithm/pt2e/annotation/op/add.py +4 -6
  10. tico/{experimental/quantization → quantization}/algorithm/pt2e/annotation/op/conv2d.py +4 -6
  11. tico/{experimental/quantization → quantization}/algorithm/pt2e/annotation/op/div.py +4 -6
  12. tico/{experimental/quantization → quantization}/algorithm/pt2e/annotation/op/linear.py +4 -6
  13. tico/{experimental/quantization → quantization}/algorithm/pt2e/annotation/op/mean.py +4 -6
  14. tico/{experimental/quantization → quantization}/algorithm/pt2e/annotation/op/mul.py +4 -6
  15. tico/{experimental/quantization → quantization}/algorithm/pt2e/annotation/op/relu6.py +4 -6
  16. tico/{experimental/quantization → quantization}/algorithm/pt2e/annotation/op/rsqrt.py +4 -6
  17. tico/{experimental/quantization → quantization}/algorithm/pt2e/annotation/op/sub.py +4 -6
  18. tico/{experimental/quantization → quantization}/algorithm/pt2e/annotation/spec.py +1 -3
  19. tico/{experimental/quantization → quantization}/algorithm/pt2e/annotation/utils.py +1 -1
  20. tico/{experimental/quantization → quantization}/algorithm/pt2e/quantizer.py +4 -4
  21. tico/{experimental/quantization → quantization}/algorithm/pt2e/utils.py +1 -3
  22. tico/{experimental/quantization → quantization}/algorithm/smoothquant/quantizer.py +6 -10
  23. tico/quantization/config/fpi_gptq.py +29 -0
  24. tico/{experimental/quantization → quantization}/config/gptq.py +1 -1
  25. tico/{experimental/quantization → quantization}/config/pt2e.py +1 -1
  26. tico/{experimental/quantization/ptq/quant_config.py → quantization/config/ptq.py} +18 -10
  27. tico/{experimental/quantization → quantization}/config/smoothquant.py +1 -1
  28. tico/{experimental/quantization → quantization}/evaluation/evaluate.py +6 -12
  29. tico/{experimental/quantization → quantization}/evaluation/executor/circle_executor.py +1 -3
  30. tico/{experimental/quantization → quantization}/evaluation/executor/triv24_executor.py +2 -4
  31. tico/{experimental/quantization → quantization}/evaluation/utils.py +1 -1
  32. tico/{experimental/quantization → quantization}/public_interface.py +7 -7
  33. tico/{experimental/quantization → quantization}/quantizer.py +1 -1
  34. tico/{experimental/quantization → quantization}/quantizer_registry.py +11 -10
  35. tico/{experimental/quantization/ptq → quantization/wrapq}/examples/compare_ppl.py +8 -19
  36. tico/{experimental/quantization/ptq → quantization/wrapq}/examples/debug_quant_outputs.py +9 -24
  37. tico/{experimental/quantization/ptq → quantization/wrapq}/examples/quantize_linear.py +11 -10
  38. tico/{experimental/quantization/ptq → quantization/wrapq}/examples/quantize_llama_attn.py +10 -12
  39. tico/{experimental/quantization/ptq → quantization/wrapq}/examples/quantize_llama_decoder_layer.py +10 -9
  40. tico/{experimental/quantization/ptq → quantization/wrapq}/examples/quantize_llama_mlp.py +13 -13
  41. tico/{experimental/quantization/ptq → quantization/wrapq}/examples/quantize_with_gptq.py +14 -35
  42. tico/{experimental/quantization/ptq → quantization/wrapq}/observers/affine_base.py +3 -3
  43. tico/{experimental/quantization/ptq → quantization/wrapq}/observers/base.py +2 -2
  44. tico/{experimental/quantization/ptq → quantization/wrapq}/observers/ema.py +2 -2
  45. tico/{experimental/quantization/ptq → quantization/wrapq}/observers/identity.py +1 -1
  46. tico/{experimental/quantization/ptq → quantization/wrapq}/observers/minmax.py +2 -2
  47. tico/{experimental/quantization/ptq → quantization/wrapq}/observers/mx.py +1 -1
  48. tico/quantization/wrapq/quantizer.py +179 -0
  49. tico/{experimental/quantization/ptq → quantization/wrapq}/utils/introspection.py +3 -5
  50. tico/{experimental/quantization/ptq → quantization/wrapq}/utils/metrics.py +3 -2
  51. tico/{experimental/quantization/ptq → quantization/wrapq}/wrappers/fairseq/__init__.py +1 -1
  52. tico/{experimental/quantization/ptq → quantization/wrapq}/wrappers/fairseq/quant_decoder.py +6 -8
  53. tico/{experimental/quantization/ptq → quantization/wrapq}/wrappers/fairseq/quant_decoder_layer.py +6 -8
  54. tico/{experimental/quantization/ptq → quantization/wrapq}/wrappers/fairseq/quant_encoder.py +6 -8
  55. tico/{experimental/quantization/ptq → quantization/wrapq}/wrappers/fairseq/quant_encoder_layer.py +6 -8
  56. tico/{experimental/quantization/ptq → quantization/wrapq}/wrappers/fairseq/quant_mha.py +5 -7
  57. tico/{experimental/quantization/ptq → quantization/wrapq}/wrappers/llama/quant_attn.py +5 -7
  58. tico/{experimental/quantization/ptq → quantization/wrapq}/wrappers/llama/quant_decoder_layer.py +8 -12
  59. tico/{experimental/quantization/ptq → quantization/wrapq}/wrappers/llama/quant_mlp.py +5 -7
  60. tico/quantization/wrapq/wrappers/nn/__init__.py +1 -0
  61. tico/{experimental/quantization/ptq → quantization/wrapq}/wrappers/nn/quant_layernorm.py +6 -7
  62. tico/{experimental/quantization/ptq → quantization/wrapq}/wrappers/nn/quant_linear.py +7 -8
  63. tico/{experimental/quantization/ptq → quantization/wrapq}/wrappers/nn/quant_silu.py +8 -9
  64. tico/{experimental/quantization/ptq → quantization/wrapq}/wrappers/ptq_wrapper.py +4 -6
  65. tico/{experimental/quantization/ptq → quantization/wrapq}/wrappers/quant_elementwise.py +55 -17
  66. tico/{experimental/quantization/ptq → quantization/wrapq}/wrappers/quant_module_base.py +10 -9
  67. tico/{experimental/quantization/ptq → quantization/wrapq}/wrappers/registry.py +17 -16
  68. tico/utils/convert.py +9 -14
  69. {tico-0.1.0.dev250924.dist-info → tico-0.1.0.dev251109.dist-info}/METADATA +48 -2
  70. {tico-0.1.0.dev250924.dist-info → tico-0.1.0.dev251109.dist-info}/RECORD +113 -108
  71. tico/experimental/quantization/__init__.py +0 -6
  72. /tico/{experimental/quantization → quantization}/algorithm/__init__.py +0 -0
  73. /tico/{experimental/quantization/algorithm/gptq → quantization/algorithm/fpi_gptq}/__init__.py +0 -0
  74. /tico/{experimental/quantization/algorithm/pt2e → quantization/algorithm/gptq}/__init__.py +0 -0
  75. /tico/{experimental/quantization → quantization}/algorithm/gptq/quant.py +0 -0
  76. /tico/{experimental/quantization → quantization}/algorithm/gptq/utils.py +0 -0
  77. /tico/{experimental/quantization/algorithm/pt2e/annotation → quantization/algorithm/pt2e}/__init__.py +0 -0
  78. /tico/{experimental/quantization/algorithm/pt2e/transformation → quantization/algorithm/pt2e/annotation}/__init__.py +0 -0
  79. /tico/{experimental/quantization → quantization}/algorithm/pt2e/annotation/config.py +0 -0
  80. /tico/{experimental/quantization → quantization}/algorithm/pt2e/annotation/op/__init__.py +0 -0
  81. /tico/{experimental/quantization/algorithm/smoothquant → quantization/algorithm/pt2e/transformation}/__init__.py +0 -0
  82. /tico/{experimental/quantization → quantization}/algorithm/pt2e/transformation/convert_scalars_to_attrs.py +0 -0
  83. /tico/{experimental/quantization/config → quantization/algorithm/smoothquant}/__init__.py +0 -0
  84. /tico/{experimental/quantization → quantization}/algorithm/smoothquant/observer.py +0 -0
  85. /tico/{experimental/quantization → quantization}/algorithm/smoothquant/smooth_quant.py +0 -0
  86. /tico/{experimental/quantization/evaluation → quantization/config}/__init__.py +0 -0
  87. /tico/{experimental/quantization → quantization}/config/base.py +0 -0
  88. /tico/{experimental/quantization/evaluation/executor → quantization/evaluation}/__init__.py +0 -0
  89. /tico/{experimental/quantization → quantization}/evaluation/backend.py +0 -0
  90. /tico/{experimental/quantization/passes → quantization/evaluation/executor}/__init__.py +0 -0
  91. /tico/{experimental/quantization → quantization}/evaluation/executor/backend_executor.py +0 -0
  92. /tico/{experimental/quantization → quantization}/evaluation/metric.py +0 -0
  93. /tico/{experimental/quantization/ptq → quantization/passes}/__init__.py +0 -0
  94. /tico/{experimental/quantization → quantization}/passes/fold_quant_ops.py +0 -0
  95. /tico/{experimental/quantization → quantization}/passes/insert_quantize_on_dtype_mismatch.py +0 -0
  96. /tico/{experimental/quantization → quantization}/passes/propagate_qparam_backward.py +0 -0
  97. /tico/{experimental/quantization → quantization}/passes/propagate_qparam_forward.py +0 -0
  98. /tico/{experimental/quantization → quantization}/passes/quantize_bias.py +0 -0
  99. /tico/{experimental/quantization → quantization}/passes/remove_weight_dequant_op.py +0 -0
  100. /tico/{experimental/quantization/ptq/examples → quantization/wrapq}/__init__.py +0 -0
  101. /tico/{experimental/quantization/ptq → quantization/wrapq}/dtypes.py +0 -0
  102. /tico/{experimental/quantization/ptq/observers → quantization/wrapq/examples}/__init__.py +0 -0
  103. /tico/{experimental/quantization/ptq → quantization/wrapq}/mode.py +0 -0
  104. /tico/{experimental/quantization/ptq/utils → quantization/wrapq/observers}/__init__.py +0 -0
  105. /tico/{experimental/quantization/ptq → quantization/wrapq}/qscheme.py +0 -0
  106. /tico/{experimental/quantization/ptq/wrappers → quantization/wrapq/utils}/__init__.py +0 -0
  107. /tico/{experimental/quantization/ptq → quantization/wrapq}/utils/reduce_utils.py +0 -0
  108. /tico/{experimental/quantization/ptq/wrappers/llama → quantization/wrapq/wrappers}/__init__.py +0 -0
  109. /tico/{experimental/quantization/ptq → quantization/wrapq}/wrappers/fairseq/decoder_export_single_step.py +0 -0
  110. /tico/{experimental/quantization/ptq/wrappers/nn → quantization/wrapq/wrappers/llama}/__init__.py +0 -0
  111. {tico-0.1.0.dev250924.dist-info → tico-0.1.0.dev251109.dist-info}/LICENSE +0 -0
  112. {tico-0.1.0.dev250924.dist-info → tico-0.1.0.dev251109.dist-info}/WHEEL +0 -0
  113. {tico-0.1.0.dev250924.dist-info → tico-0.1.0.dev251109.dist-info}/entry_points.txt +0 -0
  114. {tico-0.1.0.dev250924.dist-info → tico-0.1.0.dev251109.dist-info}/top_level.txt +0 -0
@@ -18,13 +18,13 @@ import torch
18
18
 
19
19
  from torch.ao.quantization.quantize_pt2e import convert_pt2e, prepare_pt2e
20
20
 
21
- from tico.experimental.quantization.algorithm.pt2e.annotation.annotator import (
21
+ from tico.quantization.algorithm.pt2e.annotation.annotator import (
22
22
  get_asymmetric_quantization_config,
23
23
  PT2EAnnotator,
24
24
  )
25
- from tico.experimental.quantization.config.pt2e import PT2EConfig
26
- from tico.experimental.quantization.quantizer import BaseQuantizer
27
- from tico.experimental.quantization.quantizer_registry import register_quantizer
25
+ from tico.quantization.config.pt2e import PT2EConfig
26
+ from tico.quantization.quantizer import BaseQuantizer
27
+ from tico.quantization.quantizer_registry import register_quantizer
28
28
 
29
29
 
30
30
  @register_quantizer(PT2EConfig)
@@ -20,9 +20,7 @@ import torch
20
20
  from torch.ao.quantization.quantizer import QuantizationSpec
21
21
  from torch.ao.quantization.quantizer.utils import _get_module_name_filter
22
22
 
23
- from tico.experimental.quantization.algorithm.pt2e.annotation.config import (
24
- QuantizationConfig,
25
- )
23
+ from tico.quantization.algorithm.pt2e.annotation.config import QuantizationConfig
26
24
 
27
25
 
28
26
  def get_module_type_filter(tp: Callable):
@@ -16,16 +16,12 @@ from typing import Any, Dict, Optional
16
16
 
17
17
  import torch
18
18
 
19
- from tico.experimental.quantization.algorithm.smoothquant.observer import (
20
- ChannelwiseMaxActsObserver,
21
- )
22
-
23
- from tico.experimental.quantization.algorithm.smoothquant.smooth_quant import (
24
- apply_smoothing,
25
- )
26
- from tico.experimental.quantization.config.smoothquant import SmoothQuantConfig
27
- from tico.experimental.quantization.quantizer import BaseQuantizer
28
- from tico.experimental.quantization.quantizer_registry import register_quantizer
19
+ from tico.quantization.algorithm.smoothquant.observer import ChannelwiseMaxActsObserver
20
+
21
+ from tico.quantization.algorithm.smoothquant.smooth_quant import apply_smoothing
22
+ from tico.quantization.config.smoothquant import SmoothQuantConfig
23
+ from tico.quantization.quantizer import BaseQuantizer
24
+ from tico.quantization.quantizer_registry import register_quantizer
29
25
 
30
26
 
31
27
  @register_quantizer(SmoothQuantConfig)
@@ -0,0 +1,29 @@
1
+ # Copyright (c) 2025 Samsung Electronics Co., Ltd. All Rights Reserved
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ from tico.quantization.config.gptq import GPTQConfig
16
+
17
+
18
+ class FPIGPTQConfig(GPTQConfig):
19
+ """
20
+ Configuration for FPIGPTQ (Fixed Point Iteration).
21
+ """
22
+
23
+ def __init__(self, verbose: bool = False, show_progress: bool = True):
24
+ self.verbose = verbose
25
+ self.show_progress = show_progress
26
+
27
+ @property
28
+ def name(self) -> str:
29
+ return "fpi_gptq"
@@ -12,7 +12,7 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
- from tico.experimental.quantization.config.base import BaseConfig
15
+ from tico.quantization.config.base import BaseConfig
16
16
 
17
17
 
18
18
  class GPTQConfig(BaseConfig):
@@ -12,7 +12,7 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
- from tico.experimental.quantization.config.base import BaseConfig
15
+ from tico.quantization.config.base import BaseConfig
16
16
 
17
17
 
18
18
  class PT2EConfig(BaseConfig):
@@ -15,14 +15,15 @@
15
15
  from dataclasses import dataclass, field
16
16
  from typing import Any, Dict, Mapping, Type
17
17
 
18
- from tico.experimental.quantization.ptq.dtypes import DType
19
- from tico.experimental.quantization.ptq.observers.base import ObserverBase
20
- from tico.experimental.quantization.ptq.observers.minmax import MinMaxObserver
21
- from tico.experimental.quantization.ptq.qscheme import QScheme
18
+ from tico.quantization.config.base import BaseConfig
19
+ from tico.quantization.wrapq.dtypes import DType
20
+ from tico.quantization.wrapq.observers.base import ObserverBase
21
+ from tico.quantization.wrapq.observers.minmax import MinMaxObserver
22
+ from tico.quantization.wrapq.qscheme import QScheme
22
23
 
23
24
 
24
25
  @dataclass
25
- class QuantConfig:
26
+ class PTQConfig(BaseConfig):
26
27
  """
27
28
  One object describes the quantization preferences for a single wrapper
28
29
  and its descendants.
@@ -54,9 +55,9 @@ class QuantConfig:
54
55
  Example
55
56
  -------
56
57
  ```python
57
- from ptq.observers import PercentileObserver
58
+ from wrapq.observers import PercentileObserver
58
59
 
59
- cfg = QuantConfig(
60
+ cfg = PTQConfig(
60
61
  default_dtype = DType.uint(8),
61
62
  default_qscheme = QScheme.PER_TENSOR_SYMM, # <- global scheme
62
63
  default_observer = PercentileObserver, # <- global algorithm
@@ -74,6 +75,12 @@ class QuantConfig:
74
75
  default_observer: Type[ObserverBase] = MinMaxObserver
75
76
  default_qscheme: QScheme = QScheme.PER_TENSOR_ASYMM
76
77
  overrides: Mapping[str, Mapping[str, Any]] = field(default_factory=dict)
78
+ # If True, any module that cannot be wrapped will raise.
79
+ strict_wrap: bool = True
80
+
81
+ @property
82
+ def name(self) -> str:
83
+ return "ptq"
77
84
 
78
85
  def get_kwargs(self, obs_name: str) -> Dict[str, Any]:
79
86
  """
@@ -87,7 +94,7 @@ class QuantConfig:
87
94
  """
88
95
  return dict(self.overrides.get(obs_name, {}))
89
96
 
90
- def child(self, scope: str) -> "QuantConfig":
97
+ def child(self, scope: str) -> "PTQConfig":
91
98
  """
92
99
  Produce a *view* for a child wrapper.
93
100
 
@@ -100,12 +107,13 @@ class QuantConfig:
100
107
  Other scopes remain invisible to the child.
101
108
  """
102
109
  sub_overrides = self.overrides.get(scope, {})
103
- return QuantConfig(
110
+ return PTQConfig(
104
111
  self.default_dtype,
105
112
  self.default_observer,
106
113
  default_qscheme=self.default_qscheme,
107
114
  overrides=sub_overrides,
115
+ strict_wrap=self.strict_wrap,
108
116
  )
109
117
 
110
118
  def __repr__(self):
111
- return f"QuantConfig(default_dtype={self.default_dtype}, default_observer={self.default_observer}, default_qscheme={self.default_qscheme}, overrides={dict(self.overrides)})"
119
+ return f"PTQConfig(default_dtype={self.default_dtype}, default_observer={self.default_observer}, default_qscheme={self.default_qscheme}, overrides={dict(self.overrides)}, strict_wrap={self.strict_wrap})"
@@ -14,7 +14,7 @@
14
14
 
15
15
  from typing import Dict, Literal, Optional
16
16
 
17
- from tico.experimental.quantization.config.base import BaseConfig
17
+ from tico.quantization.config.base import BaseConfig
18
18
 
19
19
 
20
20
  class SmoothQuantConfig(BaseConfig):
@@ -20,18 +20,12 @@ import torch
20
20
  from circle_schema import circle
21
21
  from torch.utils import _pytree as pytree
22
22
 
23
- from tico.experimental.quantization.evaluation.backend import BACKEND
24
- from tico.experimental.quantization.evaluation.executor.backend_executor import (
25
- BackendExecutor,
26
- )
27
- from tico.experimental.quantization.evaluation.executor.circle_executor import (
28
- CircleExecutor,
29
- )
30
- from tico.experimental.quantization.evaluation.executor.triv24_executor import (
31
- Triv24Executor,
32
- )
33
- from tico.experimental.quantization.evaluation.metric import MetricCalculator
34
- from tico.experimental.quantization.evaluation.utils import (
23
+ from tico.quantization.evaluation.backend import BACKEND
24
+ from tico.quantization.evaluation.executor.backend_executor import BackendExecutor
25
+ from tico.quantization.evaluation.executor.circle_executor import CircleExecutor
26
+ from tico.quantization.evaluation.executor.triv24_executor import Triv24Executor
27
+ from tico.quantization.evaluation.metric import MetricCalculator
28
+ from tico.quantization.evaluation.utils import (
35
29
  ensure_list,
36
30
  find_invalid_types,
37
31
  get_graph_input_output,
@@ -19,9 +19,7 @@ from typing import List
19
19
  import numpy as np
20
20
  import torch
21
21
 
22
- from tico.experimental.quantization.evaluation.executor.backend_executor import (
23
- BackendExecutor,
24
- )
22
+ from tico.quantization.evaluation.executor.backend_executor import BackendExecutor
25
23
  from tico.utils.model import CircleModel
26
24
  from tico.utils.utils import run_bash_cmd
27
25
 
@@ -20,10 +20,8 @@ import numpy as np
20
20
  import torch
21
21
  from circle_schema import circle
22
22
 
23
- from tico.experimental.quantization.evaluation.executor.backend_executor import (
24
- BackendExecutor,
25
- )
26
- from tico.experimental.quantization.evaluation.utils import (
23
+ from tico.quantization.evaluation.executor.backend_executor import BackendExecutor
24
+ from tico.quantization.evaluation.utils import (
27
25
  dequantize,
28
26
  get_graph_input_output,
29
27
  quantize,
@@ -44,7 +44,7 @@ def quantize(
44
44
  data = np.array(data)
45
45
  # Perfrom quantization
46
46
  if not scale:
47
- logger.warn("WARNING: scale value is 0. 1e-7 will be used instead.")
47
+ logger.warning("WARNING: scale value is 0. 1e-7 will be used instead.")
48
48
  scale = 1e-7
49
49
  rescaled = np.round(data / scale) + zero_point
50
50
  # Clamp the values
@@ -17,11 +17,11 @@ from typing import Any, Dict, Optional
17
17
 
18
18
  import torch
19
19
 
20
- from tico.experimental.quantization.algorithm.gptq.quantizer import GPTQQuantizer
21
- from tico.experimental.quantization.algorithm.pt2e.quantizer import PT2EQuantizer
22
- from tico.experimental.quantization.config.base import BaseConfig
23
- from tico.experimental.quantization.quantizer import BaseQuantizer
24
- from tico.experimental.quantization.quantizer_registry import get_quantizer
20
+ from tico.quantization.algorithm.gptq.quantizer import GPTQQuantizer
21
+ from tico.quantization.algorithm.pt2e.quantizer import PT2EQuantizer
22
+ from tico.quantization.config.base import BaseConfig
23
+ from tico.quantization.quantizer import BaseQuantizer
24
+ from tico.quantization.quantizer_registry import get_quantizer
25
25
 
26
26
 
27
27
  QUANTIZER_ATTRIBUTE_NAME = "tico_quantizer"
@@ -32,7 +32,7 @@ def prepare(
32
32
  quant_config: BaseConfig,
33
33
  args: Optional[Any] = None,
34
34
  kwargs: Optional[Dict[str, Any]] = None,
35
- inplace: Optional[bool] = False,
35
+ inplace: Optional[bool] = True,
36
36
  ):
37
37
  """
38
38
  Prepare the model for quantization using the provided configuration.
@@ -68,7 +68,7 @@ def prepare(
68
68
  return model
69
69
 
70
70
 
71
- def convert(model, inplace: Optional[bool] = False):
71
+ def convert(model, inplace: Optional[bool] = True):
72
72
  """
73
73
  Convert the prepared model to a quantized model using the provided configuration.
74
74
 
@@ -17,7 +17,7 @@ from typing import Any, Dict, Optional
17
17
 
18
18
  import torch
19
19
 
20
- from tico.experimental.quantization.config.base import BaseConfig
20
+ from tico.quantization.config.base import BaseConfig
21
21
 
22
22
 
23
23
  class BaseQuantizer(ABC):
@@ -15,8 +15,8 @@
15
15
  import importlib
16
16
  from typing import Dict, Optional, Type, TypeVar
17
17
 
18
- from tico.experimental.quantization.config.base import BaseConfig
19
- from tico.experimental.quantization.quantizer import BaseQuantizer
18
+ from tico.quantization.config.base import BaseConfig
19
+ from tico.quantization.quantizer import BaseQuantizer
20
20
 
21
21
  TQ = TypeVar("TQ", bound=BaseQuantizer)
22
22
 
@@ -53,14 +53,15 @@ def get_quantizer(cfg: BaseConfig) -> BaseQuantizer:
53
53
  # Lazy import by naming convention
54
54
  name = getattr(cfg, "name", None)
55
55
  if name:
56
- try:
57
- importlib.import_module(
58
- f"tico.experimental.quantization.algorithm.{name}.quantizer"
59
- )
60
- except Exception as e:
61
- raise RuntimeError(
62
- f"Failed to import quantizer module for config name='{name}': {e}"
63
- )
56
+ if name == "ptq":
57
+ importlib.import_module(f"tico.quantization.wrapq.quantizer")
58
+ else:
59
+ try:
60
+ importlib.import_module(f"tico.quantization.algorithm.{name}.quantizer")
61
+ except Exception as e:
62
+ raise RuntimeError(
63
+ f"Failed to import quantizer module for config name='{name}': {e}"
64
+ )
64
65
 
65
66
  qcls = _lookup(cfg)
66
67
  if qcls is not None:
@@ -22,16 +22,15 @@
22
22
 
23
23
  import argparse
24
24
  import sys
25
- from typing import Optional
26
25
 
27
26
  import torch
28
27
  import tqdm
29
28
  from datasets import load_dataset
30
29
  from transformers import AutoModelForCausalLM, AutoTokenizer
31
30
 
32
- from tico.experimental.quantization.ptq.quant_config import QuantConfig
33
- from tico.experimental.quantization.ptq.utils.metrics import perplexity
34
- from tico.experimental.quantization.ptq.wrappers.ptq_wrapper import PTQWrapper
31
+ from tico.quantization import convert, prepare
32
+ from tico.quantization.config.ptq import PTQConfig
33
+ from tico.quantization.wrapq.utils.metrics import perplexity
35
34
 
36
35
  # Token-budget presets for activation calibration
37
36
  TOKENS: dict[str, int] = {
@@ -165,13 +164,8 @@ def main():
165
164
  # ---------------------------------------------------------------------
166
165
  # 2. Wrap every Transformer layer with PTQWrapper
167
166
  # ---------------------------------------------------------------------
168
- qcfg = QuantConfig() # all-uint8 defaults
169
-
170
- wrapped_layers = torch.nn.ModuleList()
171
- for idx, layer in enumerate(uint8_model.model.layers):
172
- layer_cfg = qcfg.child(f"layer{idx}")
173
- wrapped_layers.append(PTQWrapper(layer, qcfg=layer_cfg))
174
- uint8_model.model.layers = wrapped_layers
167
+ qcfg = PTQConfig() # all-uint8 defaults
168
+ prepare(uint8_model, qcfg)
175
169
 
176
170
  # ---------------------------------------------------------------------
177
171
  # 3. Single-pass activation calibration
@@ -182,11 +176,7 @@ def main():
182
176
  )[:CALIB_TOKENS]
183
177
  ids = tokenizer(calib_txt, return_tensors="pt").input_ids.to(device)
184
178
 
185
- # (a) switch every QuantModuleBase to CALIB mode
186
- for l in uint8_model.model.layers:
187
- l.enable_calibration()
188
-
189
- # (b) run inference to collect ranges
179
+ # Run inference to collect ranges
190
180
  iterator = range(0, ids.size(1) - 1, args.stride)
191
181
  if not args.no_tqdm:
192
182
  iterator = tqdm.tqdm(iterator, desc="Calibration")
@@ -194,9 +184,8 @@ def main():
194
184
  for i in iterator:
195
185
  uint8_model(ids[:, i : i + args.stride])
196
186
 
197
- # (c) freeze (scale, zero-point)
198
- for l in uint8_model.model.layers:
199
- l.freeze_qparams()
187
+ # Freeze (scale, zero-point)
188
+ convert(uint8_model)
200
189
 
201
190
  # -------------------------------------------------------------------------
202
191
  # 4. Evaluate perplexity
@@ -38,13 +38,14 @@ import tqdm
38
38
  from datasets import load_dataset
39
39
  from transformers import AutoModelForCausalLM, AutoTokenizer
40
40
 
41
- from tico.experimental.quantization.ptq.quant_config import QuantConfig
42
- from tico.experimental.quantization.ptq.utils.introspection import (
41
+ from tico.quantization import convert, prepare
42
+ from tico.quantization.config.ptq import PTQConfig
43
+ from tico.quantization.wrapq.utils.introspection import (
43
44
  build_fqn_map,
44
45
  compare_layer_outputs,
45
46
  save_fp_outputs,
46
47
  )
47
- from tico.experimental.quantization.ptq.wrappers.ptq_wrapper import PTQWrapper
48
+ from tico.quantization.wrapq.wrappers.ptq_wrapper import PTQWrapper
48
49
 
49
50
  # Token-budget presets for activation calibration
50
51
  TOKENS: dict[str, int] = {
@@ -176,19 +177,8 @@ def main():
176
177
  # 2. Wrap every layer with PTQWrapper (UINT-8 activations)
177
178
  # -------------------------------------------------------------------------
178
179
  print("Wrapping layers with PTQWrapper …")
179
- qcfg = QuantConfig() # default: per-tensor UINT8
180
-
181
- new_layers = torch.nn.ModuleList()
182
- for idx, fp_layer in enumerate(model.model.layers):
183
- layer_cfg = qcfg.child(f"layer{idx}")
184
- q_layer = PTQWrapper(
185
- fp_layer,
186
- qcfg=layer_cfg,
187
- fp_name=m_to_fqn.get(fp_layer),
188
- )
189
- new_layers.append(q_layer)
190
-
191
- model.model.layers = new_layers # swap in quant wrappers
180
+ qcfg = PTQConfig() # default: per-tensor UINT8
181
+ prepare(model, qcfg)
192
182
 
193
183
  # -------------------------------------------------------------------------
194
184
  # 3. Activation calibration plus FP-vs-UINT8 diffing
@@ -197,10 +187,6 @@ def main():
197
187
  calib_txt = " ".join(dataset["text"])[:CALIB_TOKENS]
198
188
  ids = tokenizer(calib_txt, return_tensors="pt").input_ids.to(device)
199
189
 
200
- # (a) Enable CALIB mode on every QuantModuleBase
201
- for l in model.model.layers:
202
- l.enable_calibration()
203
-
204
190
  # Save reference FP activations before observers clamp/quantize
205
191
  save_handles, act_cache = save_fp_outputs(model)
206
192
 
@@ -216,11 +202,10 @@ def main():
216
202
  for h in save_handles:
217
203
  h.remove()
218
204
 
219
- # (b) Freeze (scale, zero-point) after calibration
220
- for l in model.model.layers:
221
- l.freeze_qparams()
205
+ # Freeze (scale, zero-point) after calibration
206
+ convert(model)
222
207
 
223
- # (c) Register diff hooks and measure per-layer deltas
208
+ # Register diff hooks and measure per-layer deltas
224
209
  cmp_handles = compare_layer_outputs(model, act_cache, metrics=["diff", "peir"])
225
210
  # Use same inputs for comparison.
226
211
  with torch.no_grad():
@@ -29,13 +29,15 @@ import pathlib
29
29
  import torch
30
30
  import torch.nn as nn
31
31
 
32
- from tico.experimental.quantization.evaluation.metric import compute_peir
33
- from tico.experimental.quantization.evaluation.utils import plot_two_outputs
34
-
35
- from tico.experimental.quantization.ptq.mode import Mode
36
- from tico.experimental.quantization.ptq.wrappers.nn.quant_linear import QuantLinear
32
+ from tico.quantization import convert, prepare
33
+ from tico.quantization.config.ptq import PTQConfig
34
+ from tico.quantization.evaluation.metric import compute_peir
35
+ from tico.quantization.evaluation.utils import plot_two_outputs
36
+ from tico.quantization.wrapq.mode import Mode
37
+ from tico.quantization.wrapq.wrappers.nn.quant_linear import QuantLinear
37
38
  from tico.utils.utils import SuppressWarning
38
39
 
40
+
39
41
  # -------------------------------------------------------------------------
40
42
  # 0. Define a toy model (1 Linear layer only)
41
43
  # -------------------------------------------------------------------------
@@ -60,20 +62,19 @@ fp32_layer = model.fc
60
62
  # -------------------------------------------------------------------------
61
63
  # 1. Replace the Linear with QuantLinear wrapper
62
64
  # -------------------------------------------------------------------------
63
- model.fc = QuantLinear(fp32_layer) # type: ignore[assignment]
64
- # model.fc = PTQWrapper(fp32_layer) (Wrapping helper class)
65
+ model.fc = prepare(fp32_layer, PTQConfig()) # type: ignore[assignment]
65
66
  qlayer = model.fc # alias for brevity
66
67
 
67
68
  # -------------------------------------------------------------------------
68
69
  # 2. Single-pass calibration (collect activation ranges)
69
70
  # -------------------------------------------------------------------------
70
- assert isinstance(qlayer, QuantLinear)
71
+ assert isinstance(qlayer.wrapped, QuantLinear)
71
72
  with torch.no_grad():
72
- qlayer.enable_calibration()
73
73
  for _ in range(16): # small toy batch
74
74
  x = torch.randn(4, 16) # (batch=4, features=16)
75
75
  _ = model(x)
76
- qlayer.freeze_qparams() # lock scales & zero-points
76
+
77
+ convert(qlayer)
77
78
 
78
79
  assert qlayer._mode is Mode.QUANT, "Quantization mode should be active now."
79
80
 
@@ -17,13 +17,12 @@ import pathlib
17
17
  import torch
18
18
  from transformers import AutoModelForCausalLM, AutoTokenizer
19
19
 
20
- from tico.experimental.quantization.evaluation.metric import compute_peir
21
- from tico.experimental.quantization.evaluation.utils import plot_two_outputs
22
-
23
- from tico.experimental.quantization.ptq.mode import Mode
24
- from tico.experimental.quantization.ptq.wrappers.llama.quant_attn import (
25
- QuantLlamaAttention,
26
- )
20
+ from tico.quantization import convert, prepare
21
+ from tico.quantization.config.ptq import PTQConfig
22
+ from tico.quantization.evaluation.metric import compute_peir
23
+ from tico.quantization.evaluation.utils import plot_two_outputs
24
+ from tico.quantization.wrapq.mode import Mode
25
+ from tico.quantization.wrapq.wrappers.llama.quant_attn import QuantLlamaAttention
27
26
  from tico.utils.utils import SuppressWarning
28
27
 
29
28
  name = "Maykeye/TinyLLama-v0"
@@ -34,12 +33,11 @@ tokenizer = AutoTokenizer.from_pretrained(name)
34
33
  # 1. Replace layer-0’s MLP with QuantLlamaMLP
35
34
  # -------------------------------------------------------------------------
36
35
  orig_attn = model.model.layers[0].self_attn
37
- model.model.layers[0].self_attn = QuantLlamaAttention(
38
- orig_attn
39
- ) # PTQWrapper(orig_attn) is also fine
36
+ model.model.layers[0].self_attn = prepare(orig_attn, PTQConfig())
40
37
  model.eval()
41
38
 
42
39
  attn_q = model.model.layers[0].self_attn # quant wrapper
40
+ assert isinstance(attn_q.wrapped, QuantLlamaAttention)
43
41
  rotary = model.model.rotary_emb
44
42
 
45
43
  # -------------------------------------------------------------------------
@@ -55,7 +53,6 @@ PROMPTS = [
55
53
  ]
56
54
 
57
55
  with torch.no_grad():
58
- attn_q.enable_calibration()
59
56
  for prompt in PROMPTS:
60
57
  ids = tokenizer(prompt, return_tensors="pt")
61
58
  embeds = model.model.embed_tokens(ids["input_ids"])
@@ -63,7 +60,8 @@ with torch.no_grad():
63
60
  S = cos_sin[0].shape[1]
64
61
  float_mask = torch.zeros(1, 1, S, S)
65
62
  _ = attn_q(embeds, cos_sin) # observers collect
66
- attn_q.freeze_qparams()
63
+
64
+ convert(attn_q)
67
65
 
68
66
  assert attn_q._mode is Mode.QUANT, "Quantization mode should be active now."
69
67
 
@@ -31,10 +31,12 @@ import pathlib
31
31
  import torch
32
32
  from transformers import AutoModelForCausalLM, AutoTokenizer
33
33
 
34
- from tico.experimental.quantization.evaluation.metric import compute_peir
35
- from tico.experimental.quantization.evaluation.utils import plot_two_outputs
36
- from tico.experimental.quantization.ptq.mode import Mode
37
- from tico.experimental.quantization.ptq.wrappers.llama.quant_decoder_layer import (
34
+ from tico.quantization import convert, prepare
35
+ from tico.quantization.config.ptq import PTQConfig
36
+ from tico.quantization.evaluation.metric import compute_peir
37
+ from tico.quantization.evaluation.utils import plot_two_outputs
38
+ from tico.quantization.wrapq.mode import Mode
39
+ from tico.quantization.wrapq.wrappers.llama.quant_decoder_layer import (
38
40
  QuantLlamaDecoderLayer,
39
41
  )
40
42
  from tico.utils.utils import SuppressWarning
@@ -50,12 +52,11 @@ rotary = model.model.rotary_emb # RoPE helper
50
52
  # 1. Swap in the quant wrapper
51
53
  # -------------------------------------------------------------------------
52
54
  fp32_layer = model.model.layers[0] # keep a reference for diff check
53
- model.model.layers[0] = QuantLlamaDecoderLayer(
54
- fp32_layer
55
- ) # PTQWrapper(fp32_layer) is also fine
55
+ model.model.layers[0] = prepare(fp32_layer, PTQConfig())
56
56
  model.eval()
57
57
 
58
58
  qlayer = model.model.layers[0] # alias for brevity
59
+ assert isinstance(qlayer.wrapped, QuantLlamaDecoderLayer)
59
60
 
60
61
  # -------------------------------------------------------------------------
61
62
  # 2. Single-pass calibration (gather activation ranges)
@@ -70,7 +71,6 @@ PROMPTS = [
70
71
  ]
71
72
 
72
73
  with torch.no_grad():
73
- qlayer.enable_calibration()
74
74
  for prompt in PROMPTS:
75
75
  ids = tokenizer(prompt, return_tensors="pt")
76
76
  hidden = model.model.embed_tokens(ids["input_ids"])
@@ -78,7 +78,8 @@ with torch.no_grad():
78
78
  S = pos[0].shape[1]
79
79
  attn_mask = torch.zeros(1, 1, S, S) # causal-mask placeholder
80
80
  _ = qlayer(hidden, attention_mask=attn_mask, position_embeddings=pos)
81
- qlayer.freeze_qparams()
81
+
82
+ convert(qlayer)
82
83
 
83
84
  assert qlayer._mode is Mode.QUANT, "Quantization mode should be active now."
84
85
 
@@ -18,13 +18,14 @@ import torch
18
18
  from transformers import AutoModelForCausalLM, AutoTokenizer
19
19
 
20
20
  import tico
21
- from tico.experimental.quantization.evaluation.metric import compute_peir
22
- from tico.experimental.quantization.evaluation.utils import plot_two_outputs
23
- from tico.experimental.quantization.ptq.dtypes import INT16
24
- from tico.experimental.quantization.ptq.mode import Mode
25
- from tico.experimental.quantization.ptq.qscheme import QScheme
26
- from tico.experimental.quantization.ptq.quant_config import QuantConfig
27
- from tico.experimental.quantization.ptq.wrappers.llama.quant_mlp import QuantLlamaMLP
21
+ from tico.quantization import convert, prepare
22
+ from tico.quantization.config.ptq import PTQConfig
23
+ from tico.quantization.evaluation.metric import compute_peir
24
+ from tico.quantization.evaluation.utils import plot_two_outputs
25
+ from tico.quantization.wrapq.dtypes import INT16
26
+ from tico.quantization.wrapq.mode import Mode
27
+ from tico.quantization.wrapq.qscheme import QScheme
28
+ from tico.quantization.wrapq.wrappers.llama.quant_mlp import QuantLlamaMLP
28
29
  from tico.utils.utils import SuppressWarning
29
30
 
30
31
  name = "Maykeye/TinyLLama-v0"
@@ -36,13 +37,13 @@ model.eval()
36
37
  # 1. Replace layer-0’s MLP with QuantLlamaMLP
37
38
  # -------------------------------------------------------------------------
38
39
  fp32_mlp = model.model.layers[0].mlp
39
- model.model.layers[0].mlp = QuantLlamaMLP(
40
- fp32_mlp,
41
- qcfg=QuantConfig(default_dtype=INT16, default_qscheme=QScheme.PER_TENSOR_SYMM),
42
- ) # PTQWrapper(fp32_mlp) is also fine
40
+ model.model.layers[0].mlp = prepare(
41
+ fp32_mlp, PTQConfig(default_dtype=INT16, default_qscheme=QScheme.PER_TENSOR_SYMM)
42
+ )
43
43
  model.eval()
44
44
 
45
45
  mlp_q = model.model.layers[0].mlp
46
+ assert isinstance(mlp_q.wrapped, QuantLlamaMLP)
46
47
 
47
48
  # -------------------------------------------------------------------------
48
49
  # 2. Single-pass calibration
@@ -57,13 +58,12 @@ PROMPTS = [
57
58
  ]
58
59
 
59
60
  with torch.no_grad():
60
- mlp_q.enable_calibration()
61
61
  for prompt in PROMPTS:
62
62
  enc = tokenizer(prompt, return_tensors="pt")
63
63
  emb = model.model.embed_tokens(enc["input_ids"])
64
64
  _ = mlp_q(emb)
65
65
 
66
- mlp_q.freeze_qparams()
66
+ convert(mlp_q)
67
67
 
68
68
  assert mlp_q._mode is Mode.QUANT, "Quantization mode should be active now."
69
69