tico 0.1.0.dev251013__py3-none-any.whl → 0.1.0.dev251014__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of tico might be problematic. Click here for more details.

Files changed (28) hide show
  1. tico/__init__.py +1 -1
  2. tico/experimental/quantization/{ptq/quant_config.py → config/ptq.py} +10 -5
  3. tico/experimental/quantization/evaluation/utils.py +1 -1
  4. tico/experimental/quantization/ptq/examples/compare_ppl.py +2 -2
  5. tico/experimental/quantization/ptq/examples/debug_quant_outputs.py +2 -2
  6. tico/experimental/quantization/ptq/examples/quantize_llama_mlp.py +2 -2
  7. tico/experimental/quantization/ptq/examples/quantize_with_gptq.py +2 -2
  8. tico/experimental/quantization/ptq/wrappers/fairseq/quant_decoder.py +3 -3
  9. tico/experimental/quantization/ptq/wrappers/fairseq/quant_decoder_layer.py +2 -2
  10. tico/experimental/quantization/ptq/wrappers/fairseq/quant_encoder.py +3 -3
  11. tico/experimental/quantization/ptq/wrappers/fairseq/quant_encoder_layer.py +2 -2
  12. tico/experimental/quantization/ptq/wrappers/fairseq/quant_mha.py +2 -2
  13. tico/experimental/quantization/ptq/wrappers/llama/quant_attn.py +2 -2
  14. tico/experimental/quantization/ptq/wrappers/llama/quant_decoder_layer.py +3 -3
  15. tico/experimental/quantization/ptq/wrappers/llama/quant_mlp.py +2 -2
  16. tico/experimental/quantization/ptq/wrappers/nn/quant_layernorm.py +3 -2
  17. tico/experimental/quantization/ptq/wrappers/nn/quant_linear.py +3 -2
  18. tico/experimental/quantization/ptq/wrappers/nn/quant_silu.py +2 -2
  19. tico/experimental/quantization/ptq/wrappers/ptq_wrapper.py +2 -2
  20. tico/experimental/quantization/ptq/wrappers/quant_elementwise.py +2 -2
  21. tico/experimental/quantization/ptq/wrappers/quant_module_base.py +8 -7
  22. tico/utils/convert.py +1 -0
  23. {tico-0.1.0.dev251013.dist-info → tico-0.1.0.dev251014.dist-info}/METADATA +2 -2
  24. {tico-0.1.0.dev251013.dist-info → tico-0.1.0.dev251014.dist-info}/RECORD +28 -28
  25. {tico-0.1.0.dev251013.dist-info → tico-0.1.0.dev251014.dist-info}/LICENSE +0 -0
  26. {tico-0.1.0.dev251013.dist-info → tico-0.1.0.dev251014.dist-info}/WHEEL +0 -0
  27. {tico-0.1.0.dev251013.dist-info → tico-0.1.0.dev251014.dist-info}/entry_points.txt +0 -0
  28. {tico-0.1.0.dev251013.dist-info → tico-0.1.0.dev251014.dist-info}/top_level.txt +0 -0
tico/__init__.py CHANGED
@@ -29,7 +29,7 @@ __all__ = [
29
29
  ]
30
30
 
31
31
  # THIS LINE IS AUTOMATICALLY GENERATED BY setup.py
32
- __version__ = "0.1.0.dev251013"
32
+ __version__ = "0.1.0.dev251014"
33
33
 
34
34
  MINIMUM_SUPPORTED_VERSION = "2.5.0"
35
35
  SECURE_TORCH_VERSION = "2.6.0"
@@ -15,6 +15,7 @@
15
15
  from dataclasses import dataclass, field
16
16
  from typing import Any, Dict, Mapping, Type
17
17
 
18
+ from tico.experimental.quantization.config.base import BaseConfig
18
19
  from tico.experimental.quantization.ptq.dtypes import DType
19
20
  from tico.experimental.quantization.ptq.observers.base import ObserverBase
20
21
  from tico.experimental.quantization.ptq.observers.minmax import MinMaxObserver
@@ -22,7 +23,7 @@ from tico.experimental.quantization.ptq.qscheme import QScheme
22
23
 
23
24
 
24
25
  @dataclass
25
- class QuantConfig:
26
+ class PTQConfig(BaseConfig):
26
27
  """
27
28
  One object describes the quantization preferences for a single wrapper
28
29
  and its descendants.
@@ -56,7 +57,7 @@ class QuantConfig:
56
57
  ```python
57
58
  from ptq.observers import PercentileObserver
58
59
 
59
- cfg = QuantConfig(
60
+ cfg = PTQConfig(
60
61
  default_dtype = DType.uint(8),
61
62
  default_qscheme = QScheme.PER_TENSOR_SYMM, # <- global scheme
62
63
  default_observer = PercentileObserver, # <- global algorithm
@@ -75,6 +76,10 @@ class QuantConfig:
75
76
  default_qscheme: QScheme = QScheme.PER_TENSOR_ASYMM
76
77
  overrides: Mapping[str, Mapping[str, Any]] = field(default_factory=dict)
77
78
 
79
+ @property
80
+ def name(self) -> str:
81
+ return "ptq"
82
+
78
83
  def get_kwargs(self, obs_name: str) -> Dict[str, Any]:
79
84
  """
80
85
  Return user-specified kwargs for *obs_name* inside **this** wrapper.
@@ -87,7 +92,7 @@ class QuantConfig:
87
92
  """
88
93
  return dict(self.overrides.get(obs_name, {}))
89
94
 
90
- def child(self, scope: str) -> "QuantConfig":
95
+ def child(self, scope: str) -> "PTQConfig":
91
96
  """
92
97
  Produce a *view* for a child wrapper.
93
98
 
@@ -100,7 +105,7 @@ class QuantConfig:
100
105
  Other scopes remain invisible to the child.
101
106
  """
102
107
  sub_overrides = self.overrides.get(scope, {})
103
- return QuantConfig(
108
+ return PTQConfig(
104
109
  self.default_dtype,
105
110
  self.default_observer,
106
111
  default_qscheme=self.default_qscheme,
@@ -108,4 +113,4 @@ class QuantConfig:
108
113
  )
109
114
 
110
115
  def __repr__(self):
111
- return f"QuantConfig(default_dtype={self.default_dtype}, default_observer={self.default_observer}, default_qscheme={self.default_qscheme}, overrides={dict(self.overrides)})"
116
+ return f"PTQConfig(default_dtype={self.default_dtype}, default_observer={self.default_observer}, default_qscheme={self.default_qscheme}, overrides={dict(self.overrides)})"
@@ -44,7 +44,7 @@ def quantize(
44
44
  data = np.array(data)
45
45
  # Perfrom quantization
46
46
  if not scale:
47
- logger.warn("WARNING: scale value is 0. 1e-7 will be used instead.")
47
+ logger.warning("WARNING: scale value is 0. 1e-7 will be used instead.")
48
48
  scale = 1e-7
49
49
  rescaled = np.round(data / scale) + zero_point
50
50
  # Clamp the values
@@ -29,7 +29,7 @@ import tqdm
29
29
  from datasets import load_dataset
30
30
  from transformers import AutoModelForCausalLM, AutoTokenizer
31
31
 
32
- from tico.experimental.quantization.ptq.quant_config import QuantConfig
32
+ from tico.experimental.quantization.config.ptq import PTQConfig
33
33
  from tico.experimental.quantization.ptq.utils.metrics import perplexity
34
34
  from tico.experimental.quantization.ptq.wrappers.ptq_wrapper import PTQWrapper
35
35
 
@@ -165,7 +165,7 @@ def main():
165
165
  # ---------------------------------------------------------------------
166
166
  # 2. Wrap every Transformer layer with PTQWrapper
167
167
  # ---------------------------------------------------------------------
168
- qcfg = QuantConfig() # all-uint8 defaults
168
+ qcfg = PTQConfig() # all-uint8 defaults
169
169
 
170
170
  wrapped_layers = torch.nn.ModuleList()
171
171
  for idx, layer in enumerate(uint8_model.model.layers):
@@ -38,7 +38,7 @@ import tqdm
38
38
  from datasets import load_dataset
39
39
  from transformers import AutoModelForCausalLM, AutoTokenizer
40
40
 
41
- from tico.experimental.quantization.ptq.quant_config import QuantConfig
41
+ from tico.experimental.quantization.config.ptq import PTQConfig
42
42
  from tico.experimental.quantization.ptq.utils.introspection import (
43
43
  build_fqn_map,
44
44
  compare_layer_outputs,
@@ -176,7 +176,7 @@ def main():
176
176
  # 2. Wrap every layer with PTQWrapper (UINT-8 activations)
177
177
  # -------------------------------------------------------------------------
178
178
  print("Wrapping layers with PTQWrapper …")
179
- qcfg = QuantConfig() # default: per-tensor UINT8
179
+ qcfg = PTQConfig() # default: per-tensor UINT8
180
180
 
181
181
  new_layers = torch.nn.ModuleList()
182
182
  for idx, fp_layer in enumerate(model.model.layers):
@@ -18,12 +18,12 @@ import torch
18
18
  from transformers import AutoModelForCausalLM, AutoTokenizer
19
19
 
20
20
  import tico
21
+ from tico.experimental.quantization.config.ptq import PTQConfig
21
22
  from tico.experimental.quantization.evaluation.metric import compute_peir
22
23
  from tico.experimental.quantization.evaluation.utils import plot_two_outputs
23
24
  from tico.experimental.quantization.ptq.dtypes import INT16
24
25
  from tico.experimental.quantization.ptq.mode import Mode
25
26
  from tico.experimental.quantization.ptq.qscheme import QScheme
26
- from tico.experimental.quantization.ptq.quant_config import QuantConfig
27
27
  from tico.experimental.quantization.ptq.wrappers.llama.quant_mlp import QuantLlamaMLP
28
28
  from tico.utils.utils import SuppressWarning
29
29
 
@@ -38,7 +38,7 @@ model.eval()
38
38
  fp32_mlp = model.model.layers[0].mlp
39
39
  model.model.layers[0].mlp = QuantLlamaMLP(
40
40
  fp32_mlp,
41
- qcfg=QuantConfig(default_dtype=INT16, default_qscheme=QScheme.PER_TENSOR_SYMM),
41
+ qcfg=PTQConfig(default_dtype=INT16, default_qscheme=QScheme.PER_TENSOR_SYMM),
42
42
  ) # PTQWrapper(fp32_mlp) is also fine
43
43
  model.eval()
44
44
 
@@ -35,8 +35,8 @@ from transformers import AutoModelForCausalLM, AutoTokenizer
35
35
 
36
36
  from tico.experimental.quantization import convert, prepare
37
37
  from tico.experimental.quantization.config.gptq import GPTQConfig
38
+ from tico.experimental.quantization.config.ptq import PTQConfig
38
39
  from tico.experimental.quantization.ptq.observers.affine_base import AffineObserverBase
39
- from tico.experimental.quantization.ptq.quant_config import QuantConfig
40
40
  from tico.experimental.quantization.ptq.utils.introspection import build_fqn_map
41
41
  from tico.experimental.quantization.ptq.utils.metrics import perplexity
42
42
  from tico.experimental.quantization.ptq.wrappers.ptq_wrapper import PTQWrapper
@@ -219,7 +219,7 @@ def main():
219
219
  if not isinstance(layers, (list, torch.nn.ModuleList)):
220
220
  raise TypeError(f"'model.layers' must be list/ModuleList, got {type(layers)}")
221
221
 
222
- qcfg = QuantConfig() # default: per-tensor UINT8
222
+ qcfg = PTQConfig() # default: per-tensor UINT8
223
223
  wrapped = torch.nn.ModuleList()
224
224
  for idx, fp_layer in enumerate(layers):
225
225
  layer_cfg = qcfg.child(f"layer{idx}")
@@ -25,7 +25,7 @@ import torch
25
25
  import torch.nn.functional as F
26
26
  from torch import nn, Tensor
27
27
 
28
- from tico.experimental.quantization.ptq.quant_config import QuantConfig
28
+ from tico.experimental.quantization.config.ptq import PTQConfig
29
29
  from tico.experimental.quantization.ptq.wrappers.ptq_wrapper import PTQWrapper
30
30
  from tico.experimental.quantization.ptq.wrappers.quant_module_base import (
31
31
  QuantModuleBase,
@@ -53,7 +53,7 @@ class QuantFairseqDecoder(QuantModuleBase):
53
53
  self,
54
54
  fp_decoder: nn.Module,
55
55
  *,
56
- qcfg: Optional[QuantConfig] = None,
56
+ qcfg: Optional[PTQConfig] = None,
57
57
  fp_name: Optional[str] = None,
58
58
  ):
59
59
  super().__init__(qcfg, fp_name=fp_name)
@@ -116,7 +116,7 @@ class QuantFairseqDecoder(QuantModuleBase):
116
116
 
117
117
  prefix = _safe_prefix(fp_name)
118
118
 
119
- # Prepare child QuantConfig namespaces: layers/<idx>
119
+ # Prepare child PTQConfig namespaces: layers/<idx>
120
120
  layers_qcfg = qcfg.child("layers") if qcfg else None
121
121
  for i, layer in enumerate(fp_layers):
122
122
  child_cfg = layers_qcfg.child(str(i)) if layers_qcfg else None
@@ -23,7 +23,7 @@ from typing import Dict, Iterable, List, Optional, Tuple
23
23
  import torch
24
24
  from torch import nn, Tensor
25
25
 
26
- from tico.experimental.quantization.ptq.quant_config import QuantConfig
26
+ from tico.experimental.quantization.config.ptq import PTQConfig
27
27
  from tico.experimental.quantization.ptq.wrappers.fairseq.quant_mha import (
28
28
  QuantFairseqMultiheadAttention,
29
29
  )
@@ -55,7 +55,7 @@ class QuantFairseqDecoderLayer(QuantModuleBase):
55
55
  self,
56
56
  fp_layer: nn.Module,
57
57
  *,
58
- qcfg: Optional[QuantConfig] = None,
58
+ qcfg: Optional[PTQConfig] = None,
59
59
  fp_name: Optional[str] = None,
60
60
  ):
61
61
  super().__init__(qcfg, fp_name=fp_name)
@@ -25,7 +25,7 @@ import torch
25
25
  import torch.nn as nn
26
26
  from torch import Tensor
27
27
 
28
- from tico.experimental.quantization.ptq.quant_config import QuantConfig
28
+ from tico.experimental.quantization.config.ptq import PTQConfig
29
29
  from tico.experimental.quantization.ptq.wrappers.ptq_wrapper import PTQWrapper
30
30
  from tico.experimental.quantization.ptq.wrappers.quant_module_base import (
31
31
  QuantModuleBase,
@@ -56,7 +56,7 @@ class QuantFairseqEncoder(QuantModuleBase):
56
56
  self,
57
57
  fp_encoder: nn.Module,
58
58
  *,
59
- qcfg: Optional[QuantConfig] = None,
59
+ qcfg: Optional[PTQConfig] = None,
60
60
  fp_name: Optional[str] = None,
61
61
  use_external_inputs: bool = False, # export-mode flag
62
62
  return_type: Literal["tensor", "dict"] = "dict",
@@ -100,7 +100,7 @@ class QuantFairseqEncoder(QuantModuleBase):
100
100
  fp_layers = list(fp_encoder.layers) # type: ignore[arg-type]
101
101
  self.layers = nn.ModuleList()
102
102
 
103
- # Prepare child QuantConfig namespaces: layers/<idx>
103
+ # Prepare child PTQConfig namespaces: layers/<idx>
104
104
  layers_qcfg = qcfg.child("layers") if qcfg else None
105
105
  for i, layer in enumerate(fp_layers):
106
106
  child_cfg = layers_qcfg.child(str(i)) if layers_qcfg else None
@@ -23,7 +23,7 @@ from typing import Optional
23
23
  import torch.nn as nn
24
24
  from torch import Tensor
25
25
 
26
- from tico.experimental.quantization.ptq.quant_config import QuantConfig
26
+ from tico.experimental.quantization.config.ptq import PTQConfig
27
27
  from tico.experimental.quantization.ptq.wrappers.fairseq.quant_mha import (
28
28
  QuantFairseqMultiheadAttention,
29
29
  )
@@ -49,7 +49,7 @@ class QuantFairseqEncoderLayer(QuantModuleBase):
49
49
  self,
50
50
  fp_layer: nn.Module,
51
51
  *,
52
- qcfg: Optional[QuantConfig] = None,
52
+ qcfg: Optional[PTQConfig] = None,
53
53
  fp_name: Optional[str] = None,
54
54
  ):
55
55
  super().__init__(qcfg, fp_name=fp_name)
@@ -24,7 +24,7 @@ import torch
24
24
  import torch.nn as nn
25
25
  import torch.nn.functional as F
26
26
 
27
- from tico.experimental.quantization.ptq.quant_config import QuantConfig
27
+ from tico.experimental.quantization.config.ptq import PTQConfig
28
28
  from tico.experimental.quantization.ptq.wrappers.ptq_wrapper import PTQWrapper
29
29
  from tico.experimental.quantization.ptq.wrappers.quant_module_base import (
30
30
  QuantModuleBase,
@@ -59,7 +59,7 @@ class QuantFairseqMultiheadAttention(QuantModuleBase):
59
59
  self,
60
60
  fp_attn: nn.Module,
61
61
  *,
62
- qcfg: Optional[QuantConfig] = None,
62
+ qcfg: Optional[PTQConfig] = None,
63
63
  fp_name: Optional[str] = None,
64
64
  max_seq: int = 4096,
65
65
  use_static_causal: bool = False,
@@ -17,7 +17,7 @@ from typing import Optional, Tuple
17
17
  import torch
18
18
  import torch.nn as nn
19
19
 
20
- from tico.experimental.quantization.ptq.quant_config import QuantConfig
20
+ from tico.experimental.quantization.config.ptq import PTQConfig
21
21
  from tico.experimental.quantization.ptq.wrappers.ptq_wrapper import PTQWrapper
22
22
  from tico.experimental.quantization.ptq.wrappers.quant_module_base import (
23
23
  QuantModuleBase,
@@ -34,7 +34,7 @@ class QuantLlamaAttention(QuantModuleBase):
34
34
  self,
35
35
  fp_attn: nn.Module,
36
36
  *,
37
- qcfg: Optional[QuantConfig] = None,
37
+ qcfg: Optional[PTQConfig] = None,
38
38
  fp_name: Optional[str] = None,
39
39
  ):
40
40
  super().__init__(qcfg, fp_name=fp_name)
@@ -17,7 +17,7 @@ from typing import Optional, Tuple
17
17
  import torch
18
18
  import torch.nn as nn
19
19
 
20
- from tico.experimental.quantization.ptq.quant_config import QuantConfig
20
+ from tico.experimental.quantization.config.ptq import PTQConfig
21
21
  from tico.experimental.quantization.ptq.wrappers.llama.quant_attn import (
22
22
  QuantLlamaAttention,
23
23
  )
@@ -56,7 +56,7 @@ class QuantLlamaDecoderLayer(QuantModuleBase):
56
56
  self,
57
57
  fp_layer: nn.Module,
58
58
  *,
59
- qcfg: Optional[QuantConfig] = None,
59
+ qcfg: Optional[PTQConfig] = None,
60
60
  fp_name: Optional[str] = None,
61
61
  return_type: Optional[str] = None,
62
62
  ):
@@ -165,7 +165,7 @@ class QuantLlamaDecoderLayer(QuantModuleBase):
165
165
  # - If use_cache: always return (hidden_states, present_key_value)
166
166
  # - Else: return as configured (tuple/tensor) for HF compatibility
167
167
  if use_cache:
168
- return hidden_states, present_key_value
168
+ return hidden_states, present_key_value # type: ignore[return-value]
169
169
 
170
170
  if self.return_type == "tuple":
171
171
  return (hidden_states,)
@@ -17,7 +17,7 @@ from typing import Optional
17
17
  import torch
18
18
  import torch.nn as nn
19
19
 
20
- from tico.experimental.quantization.ptq.quant_config import QuantConfig
20
+ from tico.experimental.quantization.config.ptq import PTQConfig
21
21
  from tico.experimental.quantization.ptq.wrappers.ptq_wrapper import PTQWrapper
22
22
  from tico.experimental.quantization.ptq.wrappers.quant_module_base import (
23
23
  QuantModuleBase,
@@ -31,7 +31,7 @@ class QuantLlamaMLP(QuantModuleBase):
31
31
  self,
32
32
  mlp_fp: nn.Module,
33
33
  *,
34
- qcfg: Optional[QuantConfig] = None,
34
+ qcfg: Optional[PTQConfig] = None,
35
35
  fp_name: Optional[str] = None,
36
36
  ):
37
37
  super().__init__(qcfg, fp_name=fp_name)
@@ -17,8 +17,9 @@ from typing import Iterable, Optional, Tuple
17
17
  import torch
18
18
  import torch.nn as nn
19
19
 
20
+ from tico.experimental.quantization.config.ptq import PTQConfig
21
+
20
22
  from tico.experimental.quantization.ptq.mode import Mode
21
- from tico.experimental.quantization.ptq.quant_config import QuantConfig
22
23
  from tico.experimental.quantization.ptq.wrappers.quant_module_base import (
23
24
  QuantModuleBase,
24
25
  )
@@ -46,7 +47,7 @@ class QuantLayerNorm(QuantModuleBase):
46
47
  self,
47
48
  fp: nn.LayerNorm,
48
49
  *,
49
- qcfg: Optional[QuantConfig] = None,
50
+ qcfg: Optional[PTQConfig] = None,
50
51
  fp_name: Optional[str] = None
51
52
  ):
52
53
  super().__init__(qcfg, fp_name=fp_name)
@@ -17,9 +17,10 @@ from typing import Optional
17
17
  import torch.nn as nn
18
18
  import torch.nn.functional as F
19
19
 
20
+ from tico.experimental.quantization.config.ptq import PTQConfig
21
+
20
22
  from tico.experimental.quantization.ptq.mode import Mode
21
23
  from tico.experimental.quantization.ptq.qscheme import QScheme
22
- from tico.experimental.quantization.ptq.quant_config import QuantConfig
23
24
  from tico.experimental.quantization.ptq.wrappers.quant_module_base import (
24
25
  QuantModuleBase,
25
26
  )
@@ -34,7 +35,7 @@ class QuantLinear(QuantModuleBase):
34
35
  self,
35
36
  fp: nn.Linear,
36
37
  *,
37
- qcfg: Optional[QuantConfig] = None,
38
+ qcfg: Optional[PTQConfig] = None,
38
39
  fp_name: Optional[str] = None
39
40
  ):
40
41
  super().__init__(qcfg, fp_name=fp_name)
@@ -17,7 +17,7 @@ from typing import Optional
17
17
  import torch
18
18
  import torch.nn as nn
19
19
 
20
- from tico.experimental.quantization.ptq.quant_config import QuantConfig
20
+ from tico.experimental.quantization.config.ptq import PTQConfig
21
21
  from tico.experimental.quantization.ptq.wrappers.quant_module_base import (
22
22
  QuantModuleBase,
23
23
  )
@@ -37,7 +37,7 @@ class QuantSiLU(QuantModuleBase):
37
37
  self,
38
38
  fp: nn.SiLU,
39
39
  *,
40
- qcfg: Optional[QuantConfig] = None,
40
+ qcfg: Optional[PTQConfig] = None,
41
41
  fp_name: Optional[str] = None
42
42
  ):
43
43
  super().__init__(qcfg, fp_name=fp_name)
@@ -16,7 +16,7 @@ from typing import Optional
16
16
 
17
17
  import torch
18
18
 
19
- from tico.experimental.quantization.ptq.quant_config import QuantConfig
19
+ from tico.experimental.quantization.config.ptq import PTQConfig
20
20
  from tico.experimental.quantization.ptq.wrappers.quant_module_base import (
21
21
  QuantModuleBase,
22
22
  )
@@ -34,7 +34,7 @@ class PTQWrapper(QuantModuleBase):
34
34
  def __init__(
35
35
  self,
36
36
  module: torch.nn.Module,
37
- qcfg: Optional[QuantConfig] = None,
37
+ qcfg: Optional[PTQConfig] = None,
38
38
  *,
39
39
  fp_name: Optional[str] = None,
40
40
  ):
@@ -17,7 +17,7 @@ from typing import Callable, Optional
17
17
  import torch
18
18
  import torch.nn as nn
19
19
 
20
- from tico.experimental.quantization.ptq.quant_config import QuantConfig
20
+ from tico.experimental.quantization.config.ptq import PTQConfig
21
21
  from tico.experimental.quantization.ptq.wrappers.quant_module_base import (
22
22
  QuantModuleBase,
23
23
  )
@@ -48,7 +48,7 @@ class QuantElementwise(QuantModuleBase):
48
48
  self,
49
49
  fp_module: nn.Module,
50
50
  *,
51
- qcfg: Optional[QuantConfig] = None,
51
+ qcfg: Optional[PTQConfig] = None,
52
52
  fp_name: Optional[str] = None,
53
53
  ):
54
54
  super().__init__(qcfg, fp_name=fp_name)
@@ -17,9 +17,10 @@ from typing import Iterable, Optional, Tuple
17
17
 
18
18
  import torch.nn as nn
19
19
 
20
+ from tico.experimental.quantization.config.ptq import PTQConfig
21
+
20
22
  from tico.experimental.quantization.ptq.mode import Mode
21
23
  from tico.experimental.quantization.ptq.observers.base import ObserverBase
22
- from tico.experimental.quantization.ptq.quant_config import QuantConfig
23
24
 
24
25
 
25
26
  class QuantModuleBase(nn.Module, ABC):
@@ -29,7 +30,7 @@ class QuantModuleBase(nn.Module, ABC):
29
30
  Responsibilities
30
31
  ----------------
31
32
  • Own *one* Mode enum (`NO_QUANT / CALIB / QUANT`)
32
- • Own a QuantConfig describing default / per-observer dtypes
33
+ • Own a PTQConfig describing default / per-observer dtypes
33
34
  • Expose a canonical lifecycle:
34
35
  enable_calibration()
35
36
  freeze_qparams()
@@ -38,10 +39,10 @@ class QuantModuleBase(nn.Module, ABC):
38
39
  """
39
40
 
40
41
  def __init__(
41
- self, qcfg: Optional[QuantConfig] = None, *, fp_name: Optional[str] = None
42
+ self, qcfg: Optional[PTQConfig] = None, *, fp_name: Optional[str] = None
42
43
  ) -> None:
43
44
  super().__init__()
44
- self.qcfg = qcfg or QuantConfig()
45
+ self.qcfg = qcfg or PTQConfig()
45
46
  self._mode: Mode = Mode.NO_QUANT # default state
46
47
  self.fp_name = fp_name
47
48
 
@@ -118,9 +119,9 @@ class QuantModuleBase(nn.Module, ABC):
118
119
  Instantiate an observer named *name*.
119
120
 
120
121
  Precedence (3-tier) for keys:
121
- • observer: user > wrapper-default > QuantConfig.default_observer
122
- • dtype: user > wrapper-default > QuantConfig.default_dtype
123
- • qscheme: user > wrapper-default > QuantConfig.default_qscheme
122
+ • observer: user > wrapper-default > PTQConfig.default_observer
123
+ • dtype: user > wrapper-default > PTQConfig.default_dtype
124
+ • qscheme: user > wrapper-default > PTQConfig.default_qscheme
124
125
 
125
126
  Other kwargs (e.g., qscheme, channel_axis, etc.) remain:
126
127
  user override > wrapper-default
tico/utils/convert.py CHANGED
@@ -143,6 +143,7 @@ def traced_run_decompositions(exported_program: ExportedProgram):
143
143
  or torch.__version__.startswith("2.7")
144
144
  or torch.__version__.startswith("2.8")
145
145
  or torch.__version__.startswith("2.9")
146
+ or torch.__version__.startswith("2.10")
146
147
  ):
147
148
  return run_decompositions(exported_program)
148
149
  else:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: tico
3
- Version: 0.1.0.dev251013
3
+ Version: 0.1.0.dev251014
4
4
  Summary: Convert exported Torch module to circle
5
5
  Home-page: UNKNOWN
6
6
  License: UNKNOWN
@@ -68,7 +68,7 @@ This will generate `build` and `dist` directories in the root directory.
68
68
  **Available options**
69
69
  - `--dist` To install the package from .whl (without this option, _TICO_ is installed in an editable mode)
70
70
  - `--torch_ver <torch version>` To install a specific torch version (default: 2.6).
71
- - Available <torch version>: 2.5, 2.6, 2.7, nightly
71
+ - Available <torch version>: 2.5, 2.6, 2.7, 2.8, nightly
72
72
 
73
73
  4. Now you can convert a torch module to a `.circle`.
74
74
 
@@ -1,4 +1,4 @@
1
- tico/__init__.py,sha256=U-GkjWKgZwkndQ10KSKpe0dF_BMQeunWF7ktsiz0FGQ,1883
1
+ tico/__init__.py,sha256=-_cU2NnQ66r8uJLRWR_AzjQdiPVMOTGKMriQY1yrnBA,1883
2
2
  tico/pt2_to_circle.py,sha256=gu3MD4Iqc0zMZcCZ2IT8oGbyj21CTSbT3Rgd9s2B_9A,2767
3
3
  tico/config/__init__.py,sha256=xZzCXjZ84qE-CsBi-dfaL05bqpQ3stKKfTXhnrJRyVs,142
4
4
  tico/config/base.py,sha256=q5xMqGxTUZs4mFqt5c7i_y9U00fYgdMGl9nUqIVMlCo,1248
@@ -44,12 +44,13 @@ tico/experimental/quantization/config/__init__.py,sha256=IO6FP_xYbGy0dW0HL26GXD3
44
44
  tico/experimental/quantization/config/base.py,sha256=xg_HCDSuMgYvMd6ENZe4Sm2SYJgMaCBj4cmqaz_lhAs,816
45
45
  tico/experimental/quantization/config/gptq.py,sha256=IUIEz5bLhsTXqoBCE1rfPec99zsRjwgpDbPW5YJqOPg,973
46
46
  tico/experimental/quantization/config/pt2e.py,sha256=9HCrraTGGZeKEN9puKV-ODi7ncV2Wjc3oe_JCO1D_Rs,850
47
+ tico/experimental/quantization/config/ptq.py,sha256=uloDu-BKLJ9RussCmoLsw0Wq41zdk_iKsjdi_xqOn30,4431
47
48
  tico/experimental/quantization/config/smoothquant.py,sha256=b92dz4-MiBbkaLzXb47bVoO29d2P416woFQUZ1wpO_s,1414
48
49
  tico/experimental/quantization/evaluation/__init__.py,sha256=IO6FP_xYbGy0dW0HL26GXD3ouxARaxCK7bz9dn4blPQ,26
49
50
  tico/experimental/quantization/evaluation/backend.py,sha256=CZL9rZOA0t8cH7PHp6u9l7dGqWNvTj9bKOvwo0PVul0,692
50
51
  tico/experimental/quantization/evaluation/evaluate.py,sha256=kfa_GvFaX6DoSTAmuCImMJqF2jgqtnor5UpC7wVmGPI,7877
51
52
  tico/experimental/quantization/evaluation/metric.py,sha256=t9M058dOQ8iy_2PcrbNMAebBNJs8TU8USZw_nbi2iWI,5488
52
- tico/experimental/quantization/evaluation/utils.py,sha256=82RG_e5LuKfWo786wEZUVwXY93nNl901n04fB7D0Z6k,5909
53
+ tico/experimental/quantization/evaluation/utils.py,sha256=n4Im3FiIVG3oVjB-wtIwV-0GUs24E6zS6Vc_cBnN5QQ,5912
53
54
  tico/experimental/quantization/evaluation/executor/__init__.py,sha256=IO6FP_xYbGy0dW0HL26GXD3ouxARaxCK7bz9dn4blPQ,26
54
55
  tico/experimental/quantization/evaluation/executor/backend_executor.py,sha256=3kLu3_rcsreA_NK42yRgRgubPtZmVp7QCRvaqLNw10E,1522
55
56
  tico/experimental/quantization/evaluation/executor/circle_executor.py,sha256=8o2Cb1fewPMtYd6jrn3nKHnSTGs7FOw-vD5_DZ2FVlE,2745
@@ -65,15 +66,14 @@ tico/experimental/quantization/ptq/__init__.py,sha256=IO6FP_xYbGy0dW0HL26GXD3oux
65
66
  tico/experimental/quantization/ptq/dtypes.py,sha256=xfCBtq6mQmUYRwsoFgII6gvRl1raQi0Inj9pznDuKwQ,2236
66
67
  tico/experimental/quantization/ptq/mode.py,sha256=lT-T8vIv8YWcwrjT7xXVhOw1g7aoAdh_3PWB-ptPKaI,1052
67
68
  tico/experimental/quantization/ptq/qscheme.py,sha256=uwhv7bCxOOXB3I-IKlRyr_u4eXOq48uIqGy4TLDqGxY,1301
68
- tico/experimental/quantization/ptq/quant_config.py,sha256=nm7570Y1X2mOT_8s27ilWid04otor6cVTi9GwgAEaKc,4300
69
69
  tico/experimental/quantization/ptq/examples/__init__.py,sha256=IO6FP_xYbGy0dW0HL26GXD3ouxARaxCK7bz9dn4blPQ,26
70
- tico/experimental/quantization/ptq/examples/compare_ppl.py,sha256=eVQn8-M24QkoCy_FCBQLSlUrnyqUDSkvUFpUpEdpMx4,8265
71
- tico/experimental/quantization/ptq/examples/debug_quant_outputs.py,sha256=Hpx_jj46WISwdVp33NrIadizVAzf1nFTXuAcHsKEQuQ,8179
70
+ tico/experimental/quantization/ptq/examples/compare_ppl.py,sha256=2we7u4WgeK1NbYa31T2irUWE-RbTPUxRBdFtc9mPccY,8255
71
+ tico/experimental/quantization/ptq/examples/debug_quant_outputs.py,sha256=GviYQa3MZ0-nKTKRaRsPXRI24VtWvDL3uOhOqsqxniY,8169
72
72
  tico/experimental/quantization/ptq/examples/quantize_linear.py,sha256=8zq-ZJDYgam0xQ-PbC6Xb1I7W1mv0Wi-b--IP2wwXtw,4539
73
73
  tico/experimental/quantization/ptq/examples/quantize_llama_attn.py,sha256=cVWUSSzaZWFp5QZkNkrlpHU3kXyP84QtnZbahVml_yQ,4329
74
74
  tico/experimental/quantization/ptq/examples/quantize_llama_decoder_layer.py,sha256=mBWrjkyEovYQsPC4Rrsri6Pm1rlFmDb3NiP0DQQhFyM,5751
75
- tico/experimental/quantization/ptq/examples/quantize_llama_mlp.py,sha256=N1qZQgt1S-xZrdv-PW7OfXEcv0gsO2q9faOF4aD-zKo,4147
76
- tico/experimental/quantization/ptq/examples/quantize_with_gptq.py,sha256=y-SK56j4wL-9j-0jtuOqQUq4CElZtGOETp-Tg4XivUI,10438
75
+ tico/experimental/quantization/ptq/examples/quantize_llama_mlp.py,sha256=poP-TFmsP_Iy3K6NEu6f8UmHInaCX3wUSFZWhhqoUCQ,4137
76
+ tico/experimental/quantization/ptq/examples/quantize_with_gptq.py,sha256=OqlaegX7ySR2PN6mSOZjcfKdxzrDe3gT_jBJg9HMzvM,10428
77
77
  tico/experimental/quantization/ptq/observers/__init__.py,sha256=IO6FP_xYbGy0dW0HL26GXD3ouxARaxCK7bz9dn4blPQ,26
78
78
  tico/experimental/quantization/ptq/observers/affine_base.py,sha256=e2Eba64nrxKQyE4F_WJ7WTSsk3xe6bkdGUKaoLFWGFw,4638
79
79
  tico/experimental/quantization/ptq/observers/base.py,sha256=Wons1MzpqK1mfcy-ppl-B2Dum0edXg2dWW2Lw3V18tw,3280
@@ -86,25 +86,25 @@ tico/experimental/quantization/ptq/utils/introspection.py,sha256=y2oGf7RoApMHJeX
86
86
  tico/experimental/quantization/ptq/utils/metrics.py,sha256=EW_FQmJrl9Y4esspZQ0GHfJ58RwuJUz0l8IfYq3NWY4,4461
87
87
  tico/experimental/quantization/ptq/utils/reduce_utils.py,sha256=3kWawLB91EcvvHlCrNqqfZF7tpgr22htBSA049mKw_4,973
88
88
  tico/experimental/quantization/ptq/wrappers/__init__.py,sha256=IO6FP_xYbGy0dW0HL26GXD3ouxARaxCK7bz9dn4blPQ,26
89
- tico/experimental/quantization/ptq/wrappers/ptq_wrapper.py,sha256=F9sK_DiRaXiGNHULcwIbs5EUtHz6ZJ7N4r5CWTTfhsM,2442
90
- tico/experimental/quantization/ptq/wrappers/quant_elementwise.py,sha256=LhEoobfvto6zKrBOKL4gmxfFFc31jHzyQV_zfps-iQM,3604
91
- tico/experimental/quantization/ptq/wrappers/quant_module_base.py,sha256=vkcDos_knGSS29rIZuEIWkAJLHrENbGz8nCH2-iara8,5969
89
+ tico/experimental/quantization/ptq/wrappers/ptq_wrapper.py,sha256=5JgLwO6C3LMjVPmbZvl7cJcsJYKno91YPJZHl4HT1GQ,2432
90
+ tico/experimental/quantization/ptq/wrappers/quant_elementwise.py,sha256=A9XYDPYqEnFmE-FGu1pS6V3LetqLr6xNXgo2TeoiF1c,3594
91
+ tico/experimental/quantization/ptq/wrappers/quant_module_base.py,sha256=2F9NVvUP8Bs4Rs_XIPsQDUmLh-s4ZSgfNWwlXi-jLh4,5950
92
92
  tico/experimental/quantization/ptq/wrappers/registry.py,sha256=OVO5nev6J8Br9zsIX-Ut7ZgWzA9f_jk0Np9bGioXgQM,5171
93
93
  tico/experimental/quantization/ptq/wrappers/fairseq/__init__.py,sha256=Mc8FLd9DusyB_IT1vk1OYrRkngOYnYd05IvtA9ORVQc,160
94
94
  tico/experimental/quantization/ptq/wrappers/fairseq/decoder_export_single_step.py,sha256=d7ZieKiSbZ2ffkaLYMg2PJl1OyAxkKjB3OHKB4poxJs,9796
95
- tico/experimental/quantization/ptq/wrappers/fairseq/quant_decoder.py,sha256=CILYvxPhW7xLkroWW_hunQBGAYGexLqnPnO5xmMnK-E,17877
96
- tico/experimental/quantization/ptq/wrappers/fairseq/quant_decoder_layer.py,sha256=JT79shxOhDtRFgm8jrrN6HKvyVotiytLjMjAxX-Cztg,20416
97
- tico/experimental/quantization/ptq/wrappers/fairseq/quant_encoder.py,sha256=r9DPUAbL2KRJ8zpMJ39Y9n6Oe79nte-mFcdjG2qEP-w,13809
98
- tico/experimental/quantization/ptq/wrappers/fairseq/quant_encoder_layer.py,sha256=aGr80Ku75j2H-UZ0elEa0mOQEyaAs2YJ4WJCN0lonn0,6412
99
- tico/experimental/quantization/ptq/wrappers/fairseq/quant_mha.py,sha256=HsigmOLeacLXc46QNeFqwQ0DwKQhNrtWTKEtLJoqXoc,15562
95
+ tico/experimental/quantization/ptq/wrappers/fairseq/quant_decoder.py,sha256=Lu6jBX1xUvpof3Q4YRh_FdwjKYbioKjvbtQQyBlA4bg,17865
96
+ tico/experimental/quantization/ptq/wrappers/fairseq/quant_decoder_layer.py,sha256=r9-Ws-MhAgO7pXFAx_pFy42hT2syMS54cPdtsreVq_c,20406
97
+ tico/experimental/quantization/ptq/wrappers/fairseq/quant_encoder.py,sha256=LtPJGk8avTWSNCTvzA5J88mcdT27Cg9HLmwCjrj8Lp0,13797
98
+ tico/experimental/quantization/ptq/wrappers/fairseq/quant_encoder_layer.py,sha256=O-rkqWnTEzYG39bg9xNB3RH6T8I-cZyLgAwfWWBXFBk,6402
99
+ tico/experimental/quantization/ptq/wrappers/fairseq/quant_mha.py,sha256=ek6lq5J0lAIpIlVQa4vri-7_Qe6Z38jE0A-lzS9UinE,15552
100
100
  tico/experimental/quantization/ptq/wrappers/llama/__init__.py,sha256=IO6FP_xYbGy0dW0HL26GXD3ouxARaxCK7bz9dn4blPQ,26
101
- tico/experimental/quantization/ptq/wrappers/llama/quant_attn.py,sha256=futw-XhAhErdaK2cZY8T3_xCxZbsj-l1dbsSbeunE_4,10403
102
- tico/experimental/quantization/ptq/wrappers/llama/quant_decoder_layer.py,sha256=ZImtfT2pyYyGJa0QCcHgCVootiWeflpRvLa4LisjZSY,7646
103
- tico/experimental/quantization/ptq/wrappers/llama/quant_mlp.py,sha256=uZMnrX66oZwxhKhcNbLXXeri-WxxRBiZnr15aBXJMm0,3562
101
+ tico/experimental/quantization/ptq/wrappers/llama/quant_attn.py,sha256=B7SRKXclY1mdt31Yxbe8WLCxuCOTo8LVsgLQFVSrX7A,10393
102
+ tico/experimental/quantization/ptq/wrappers/llama/quant_decoder_layer.py,sha256=hQRoccrsop4f0dj_y1sIRPcNOw2h4hBB8nZoBWpxB50,7666
103
+ tico/experimental/quantization/ptq/wrappers/llama/quant_mlp.py,sha256=SVQsv4qwa48rLP_080YlxeEZlMkynvsFgkfLJ__F2uM,3552
104
104
  tico/experimental/quantization/ptq/wrappers/nn/__init__.py,sha256=IO6FP_xYbGy0dW0HL26GXD3ouxARaxCK7bz9dn4blPQ,26
105
- tico/experimental/quantization/ptq/wrappers/nn/quant_layernorm.py,sha256=G5Sgt-tXnzh0Rxyk-2honmZIfEQOZlRfOsoDBdSGmA4,6887
106
- tico/experimental/quantization/ptq/wrappers/nn/quant_linear.py,sha256=xW-VEPB7RJoslS3xLVCdhIuMjppknvpkZleRGK4JFVQ,2240
107
- tico/experimental/quantization/ptq/wrappers/nn/quant_silu.py,sha256=XnJDggkWUTfXC1-BLeAbcCUtp687XLIkIIbuQlqycDw,1864
105
+ tico/experimental/quantization/ptq/wrappers/nn/quant_layernorm.py,sha256=-lhH2iR8kZZw89ejMg8i6MnFwE1RaP--xNzgiTFhWGY,6878
106
+ tico/experimental/quantization/ptq/wrappers/nn/quant_linear.py,sha256=F2RN_lNStUwi6NQ9HOz0pdWerfMCK5JYt1NTTke49L4,2231
107
+ tico/experimental/quantization/ptq/wrappers/nn/quant_silu.py,sha256=bIuMbAARoXhTdusPhchqj3DAL8dno8xO1mEMNhSF9OM,1854
108
108
  tico/interpreter/__init__.py,sha256=IO6FP_xYbGy0dW0HL26GXD3ouxARaxCK7bz9dn4blPQ,26
109
109
  tico/interpreter/infer.py,sha256=1ZFe3DVMR2mlwBosoedqoL0-CGN_01CKLgMgxuw62KA,4861
110
110
  tico/interpreter/interpreter.py,sha256=tGbluCbrehTCqBu8mtGDNzby_ieJ2ry8_RH_eC0CQxk,3828
@@ -238,7 +238,7 @@ tico/serialize/operators/utils.py,sha256=lXGpEJW1h8U_-gfc6EWjvvSiq3yJ9P-v1v3EMRT
238
238
  tico/serialize/operators/adapters/__init__.py,sha256=IO6FP_xYbGy0dW0HL26GXD3ouxARaxCK7bz9dn4blPQ,26
239
239
  tico/serialize/operators/adapters/llama_rmsnorm.py,sha256=6t3dhfNpR03eIjsmhymF2JKd6lCf7PvInqMf77c_BOE,1139
240
240
  tico/utils/__init__.py,sha256=IO6FP_xYbGy0dW0HL26GXD3ouxARaxCK7bz9dn4blPQ,26
241
- tico/utils/convert.py,sha256=10YufXpuqHz274ACUb1_F5594uClUFhBEh8SY6gYp7w,13809
241
+ tico/utils/convert.py,sha256=ylEnz_1jrvWTVEItRw9xCwaMhnbjp6TSrePbXiiccbc,13857
242
242
  tico/utils/define.py,sha256=Ypgp7YffM4pgPl4Zh6TmogSn1OxGBMRw_e09qYGflZk,1467
243
243
  tico/utils/diff_graph.py,sha256=_eDGGPDPYQD4b--MXX0DLoVgSt_wLfNPt47UlolLLR4,5272
244
244
  tico/utils/dtype.py,sha256=L5Qb7qgbt0eQ5frUTvHYrRtTJb1dg4-JNEopcxCNg1U,1389
@@ -262,9 +262,9 @@ tico/utils/mx/__init__.py,sha256=IO6FP_xYbGy0dW0HL26GXD3ouxARaxCK7bz9dn4blPQ,26
262
262
  tico/utils/mx/elemwise_ops.py,sha256=V6glyAHsVR1joqpsgnNytatCD_ew92xNWZ19UFDoMTA,10281
263
263
  tico/utils/mx/formats.py,sha256=uzNWyu-1onUlwQfX5cZ6fZSUfHMRqorper7_T1k3jfk,3404
264
264
  tico/utils/mx/mx_ops.py,sha256=RcfUTYVi-wilGB2sC35OeARdwDqnixv7dG5iyZ-fQT8,8555
265
- tico-0.1.0.dev251013.dist-info/LICENSE,sha256=kp4JLII7bzRhPb0CPD5XTDZMh22BQ7h3k3B7t8TiSbw,12644
266
- tico-0.1.0.dev251013.dist-info/METADATA,sha256=gxBnCYAGBeSVsMkXVBKj0fDqApxxDeM5lIDdM_0ziBk,8450
267
- tico-0.1.0.dev251013.dist-info/WHEEL,sha256=G16H4A3IeoQmnOrYV4ueZGKSjhipXx8zc8nu9FGlvMA,92
268
- tico-0.1.0.dev251013.dist-info/entry_points.txt,sha256=kBKYSS_IYrSXmUYevmmepqIVPScq5vF8ulQRu3I_Zf0,59
269
- tico-0.1.0.dev251013.dist-info/top_level.txt,sha256=oqs7UPoNSKZEwqsX8B-KAWdQwfAa7i60pbxW_Jk7P3w,5
270
- tico-0.1.0.dev251013.dist-info/RECORD,,
265
+ tico-0.1.0.dev251014.dist-info/LICENSE,sha256=kp4JLII7bzRhPb0CPD5XTDZMh22BQ7h3k3B7t8TiSbw,12644
266
+ tico-0.1.0.dev251014.dist-info/METADATA,sha256=CrvKqaj02su0UWPLp4FUbHTBRBjna1DZeM6aKopnlhQ,8455
267
+ tico-0.1.0.dev251014.dist-info/WHEEL,sha256=G16H4A3IeoQmnOrYV4ueZGKSjhipXx8zc8nu9FGlvMA,92
268
+ tico-0.1.0.dev251014.dist-info/entry_points.txt,sha256=kBKYSS_IYrSXmUYevmmepqIVPScq5vF8ulQRu3I_Zf0,59
269
+ tico-0.1.0.dev251014.dist-info/top_level.txt,sha256=oqs7UPoNSKZEwqsX8B-KAWdQwfAa7i60pbxW_Jk7P3w,5
270
+ tico-0.1.0.dev251014.dist-info/RECORD,,