tico 0.1.0.dev251106__py3-none-any.whl → 0.2.0.dev260122__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (56) hide show
  1. tico/__init__.py +2 -2
  2. tico/_version.py +1 -0
  3. tico/passes/convert_conv3d_to_conv2d.py +435 -0
  4. tico/passes/convert_sym_size_to_circle_shape.py +99 -0
  5. tico/passes/decompose_batch_norm.py +9 -5
  6. tico/passes/lower_copy.py +95 -0
  7. tico/passes/ops.py +4 -0
  8. tico/quantization/algorithm/fpi_gptq/fpi_gptq.py +251 -0
  9. tico/quantization/algorithm/fpi_gptq/quantizer.py +180 -0
  10. tico/quantization/algorithm/gptq/gptq.py +231 -11
  11. tico/quantization/algorithm/gptq/quantizer.py +18 -6
  12. tico/quantization/config/{pt2e.py → fpi_gptq.py} +11 -4
  13. tico/quantization/config/gptq.py +27 -4
  14. tico/quantization/public_interface.py +0 -10
  15. tico/quantization/wrapq/quantizer.py +2 -0
  16. tico/quantization/wrapq/wrappers/quant_elementwise.py +51 -11
  17. tico/serialize/operators/adapters/onert/llama_attention.py +51 -0
  18. tico/serialize/operators/op_attention.py +58 -0
  19. tico/serialize/operators/op_circle_shape.py +64 -0
  20. tico/serialize/operators/op_dequantize_per_channel.py +1 -0
  21. tico/serialize/operators/op_dequantize_per_tensor.py +1 -0
  22. tico/serialize/operators/op_transpose_conv.py +66 -50
  23. tico/utils/convert.py +16 -1
  24. tico/utils/padding.py +13 -5
  25. tico/utils/record_input.py +2 -2
  26. tico/utils/register_custom_op.py +63 -0
  27. tico/utils/validate_args_kwargs.py +49 -4
  28. tico-0.2.0.dev260122.dist-info/METADATA +631 -0
  29. {tico-0.1.0.dev251106.dist-info → tico-0.2.0.dev260122.dist-info}/RECORD +35 -46
  30. {tico-0.1.0.dev251106.dist-info → tico-0.2.0.dev260122.dist-info}/WHEEL +1 -1
  31. {tico-0.1.0.dev251106.dist-info → tico-0.2.0.dev260122.dist-info}/entry_points.txt +0 -1
  32. tico/quantization/algorithm/pt2e/annotation/annotator.py +0 -208
  33. tico/quantization/algorithm/pt2e/annotation/config.py +0 -26
  34. tico/quantization/algorithm/pt2e/annotation/op/__init__.py +0 -21
  35. tico/quantization/algorithm/pt2e/annotation/op/adaptive_avg_pool2d.py +0 -63
  36. tico/quantization/algorithm/pt2e/annotation/op/add.py +0 -55
  37. tico/quantization/algorithm/pt2e/annotation/op/conv2d.py +0 -90
  38. tico/quantization/algorithm/pt2e/annotation/op/div.py +0 -55
  39. tico/quantization/algorithm/pt2e/annotation/op/linear.py +0 -92
  40. tico/quantization/algorithm/pt2e/annotation/op/mean.py +0 -51
  41. tico/quantization/algorithm/pt2e/annotation/op/mul.py +0 -55
  42. tico/quantization/algorithm/pt2e/annotation/op/relu6.py +0 -51
  43. tico/quantization/algorithm/pt2e/annotation/op/rsqrt.py +0 -51
  44. tico/quantization/algorithm/pt2e/annotation/op/sub.py +0 -55
  45. tico/quantization/algorithm/pt2e/annotation/spec.py +0 -45
  46. tico/quantization/algorithm/pt2e/annotation/utils.py +0 -88
  47. tico/quantization/algorithm/pt2e/quantizer.py +0 -81
  48. tico/quantization/algorithm/pt2e/transformation/__init__.py +0 -1
  49. tico/quantization/algorithm/pt2e/transformation/convert_scalars_to_attrs.py +0 -58
  50. tico/quantization/algorithm/pt2e/utils.py +0 -135
  51. tico/serialize/operators/op_copy.py +0 -187
  52. tico-0.1.0.dev251106.dist-info/METADATA +0 -392
  53. /tico/quantization/algorithm/{pt2e → fpi_gptq}/__init__.py +0 -0
  54. /tico/{quantization/algorithm/pt2e/annotation → serialize/operators/adapters/onert}/__init__.py +0 -0
  55. {tico-0.1.0.dev251106.dist-info → tico-0.2.0.dev260122.dist-info/licenses}/LICENSE +0 -0
  56. {tico-0.1.0.dev251106.dist-info → tico-0.2.0.dev260122.dist-info}/top_level.txt +0 -0
@@ -1,81 +0,0 @@
1
- # Copyright (c) 2025 Samsung Electronics Co., Ltd. All Rights Reserved
2
- #
3
- # Licensed under the Apache License, Version 2.0 (the "License");
4
- # you may not use this file except in compliance with the License.
5
- # You may obtain a copy of the License at
6
- #
7
- # http://www.apache.org/licenses/LICENSE-2.0
8
- #
9
- # Unless required by applicable law or agreed to in writing, software
10
- # distributed under the License is distributed on an "AS IS" BASIS,
11
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
- # See the License for the specific language governing permissions and
13
- # limitations under the License.
14
-
15
- from typing import Any, Dict, Optional
16
-
17
- import torch
18
-
19
- from torch.ao.quantization.quantize_pt2e import convert_pt2e, prepare_pt2e
20
-
21
- from tico.quantization.algorithm.pt2e.annotation.annotator import (
22
- get_asymmetric_quantization_config,
23
- PT2EAnnotator,
24
- )
25
- from tico.quantization.config.pt2e import PT2EConfig
26
- from tico.quantization.quantizer import BaseQuantizer
27
- from tico.quantization.quantizer_registry import register_quantizer
28
-
29
-
30
- @register_quantizer(PT2EConfig)
31
- class PT2EQuantizer(BaseQuantizer):
32
- """
33
- Quantizer for applying pytorch 2.0 export quantization (typically for activation quantization).
34
- """
35
-
36
- def prepare(
37
- self,
38
- model: torch.nn.Module,
39
- args: Optional[Any] = None,
40
- kwargs: Optional[Dict[str, Any]] = None,
41
- ):
42
- """
43
- Prepare the model for pt2e quantization.
44
-
45
- Registers activation observers using the provided example inputs.
46
-
47
- Parameters:
48
- model: The target PyTorch model.
49
- args: Positional example inputs required for capturing graph.
50
- kwargs: Keyword example inputs required for capturing graph.
51
-
52
- Returns:
53
- The model prepared for pt2e quantization.
54
- """
55
- # Program capture
56
- assert isinstance(args, tuple)
57
- model = torch.export.export_for_training(
58
- model, args=args, kwargs=kwargs
59
- ).module()
60
- quantizer = PT2EAnnotator()
61
- quantizer = quantizer.set_global(get_asymmetric_quantization_config())
62
-
63
- # Register observers in each nodes
64
- assert isinstance(model, torch.fx.GraphModule)
65
- model = prepare_pt2e(model, quantizer)
66
-
67
- return model
68
-
69
- def convert(self, model: torch.fx.GraphModule):
70
- """
71
- Convert the prepared model to its pt2e quantized version.
72
-
73
- Applies the pt2e quantization on activations based on the collected statistics.
74
-
75
- Parameters:
76
- model: The prepared PyTorch model.
77
-
78
- Returns:
79
- The quantized model.
80
- """
81
- return convert_pt2e(model)
@@ -1 +0,0 @@
1
- # DO NOT REMOVE THIS FILE
@@ -1,58 +0,0 @@
1
- # Copyright (c) 2025 Samsung Electronics Co., Ltd. All Rights Reserved
2
- #
3
- # Licensed under the Apache License, Version 2.0 (the "License");
4
- # you may not use this file except in compliance with the License.
5
- # You may obtain a copy of the License at
6
- #
7
- # http://www.apache.org/licenses/LICENSE-2.0
8
- #
9
- # Unless required by applicable law or agreed to in writing, software
10
- # distributed under the License is distributed on an "AS IS" BASIS,
11
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
- # See the License for the specific language governing permissions and
13
- # limitations under the License.
14
-
15
- import torch
16
- from torch.ao.quantization.fx.utils import get_new_attr_name_with_prefix
17
-
18
-
19
- def convert_scalars_to_attrs(model: torch.fx.GraphModule) -> torch.fx.GraphModule:
20
- """
21
- Convert scalar values in the graph to `get_attr` nodes.
22
-
23
- This function identifies scalar constants in the graph and transforms them
24
- into `get_attr` nodes to ensure compatibility with quantization workflows.
25
- """
26
- for n in model.graph.nodes:
27
- if n.op != "call_function" or n.target not in [
28
- # The operators that have scalar parameters.
29
- torch.ops.aten.add.Tensor,
30
- ]:
31
- continue
32
- args = list(n.args)
33
- new_args = []
34
- for arg in args:
35
- if isinstance(arg, torch.fx.Node):
36
- new_args.append(arg)
37
- continue
38
-
39
- assert isinstance(arg, float)
40
- prefix = "_tensor_constant_"
41
- get_new_attr_name = get_new_attr_name_with_prefix(prefix)
42
- tensor_constant_name = get_new_attr_name(model)
43
- float_tensor = torch.tensor(float(arg))
44
- model.register_buffer(tensor_constant_name, float_tensor)
45
-
46
- fake_mode = n.meta["val"].fake_mode
47
- with model.graph.inserting_before(n):
48
- get_attr_node = model.graph.create_node(
49
- "get_attr", tensor_constant_name, (), {}
50
- )
51
- get_attr_node.meta["val"] = fake_mode.from_tensor(
52
- float_tensor, static_shapes=True
53
- )
54
- new_args.append(get_attr_node)
55
- n.args = tuple(new_args)
56
- model.recompile()
57
-
58
- return model
@@ -1,135 +0,0 @@
1
- # Copyright (c) 2025 Samsung Electronics Co., Ltd. All Rights Reserved
2
- #
3
- # Licensed under the Apache License, Version 2.0 (the "License");
4
- # you may not use this file except in compliance with the License.
5
- # You may obtain a copy of the License at
6
- #
7
- # http://www.apache.org/licenses/LICENSE-2.0
8
- #
9
- # Unless required by applicable law or agreed to in writing, software
10
- # distributed under the License is distributed on an "AS IS" BASIS,
11
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
- # See the License for the specific language governing permissions and
13
- # limitations under the License.
14
-
15
- from typing import Callable, List, Optional, TYPE_CHECKING
16
-
17
- if TYPE_CHECKING:
18
- import torch.fx
19
- import torch
20
- from torch.ao.quantization.quantizer import QuantizationSpec
21
- from torch.ao.quantization.quantizer.utils import _get_module_name_filter
22
-
23
- from tico.quantization.algorithm.pt2e.annotation.config import QuantizationConfig
24
-
25
-
26
- def get_module_type_filter(tp: Callable):
27
- """
28
- Get the module_type_filter function for a given module type.
29
-
30
- The filter accepts a node and checks if the node comes from a module
31
- that has certain module type.
32
-
33
- For example:
34
- node: linear_op = call_function[...](...) # comes from a module with type Block -> Sub -> Linear
35
-
36
-
37
- >> module_type_filter = get_module_type_filter(Sub) # submodule with type `Sub`, under the `Block` submodule
38
- >> print(module_type_filter(node))
39
- True # the node is from the submodule `Sub`
40
- """
41
-
42
- tp_str = tp.__module__ + "." + tp.__qualname__
43
-
44
- def module_type_filter(n: torch.fx.Node) -> bool:
45
- # example: {
46
- # 'L__self___sub': ("L['self'].sub", <class '....Sub'>),
47
- # 'L__self___sub_linear': ("L['self'].sub.linear", <class 'torch.nn.modules.linear.Linear'>)
48
- # }
49
- nn_module_stack = n.meta.get("nn_module_stack", {})
50
- types = []
51
- for _, t in nn_module_stack.values():
52
- # export() returns str, but older APIs (e.g. capture_pre_autograd_graph)
53
- # return type. Handle both cases.
54
- if isinstance(t, type):
55
- t = t.__module__ + "." + t.__qualname__
56
- types.append(t)
57
- return tp_str in types
58
-
59
- return module_type_filter
60
-
61
-
62
- def get_not_module_type_or_name_filter(
63
- tp_list: List[Callable], module_name_list: List[str]
64
- ) -> Callable[[torch.fx.Node], bool]:
65
- module_type_filters = [get_module_type_filter(tp) for tp in tp_list]
66
- module_name_list_filters = [_get_module_name_filter(m) for m in module_name_list]
67
-
68
- def not_module_type_or_name_filter(n: torch.fx.Node) -> bool:
69
- return not any(f(n) for f in module_type_filters + module_name_list_filters)
70
-
71
- return not_module_type_or_name_filter
72
-
73
-
74
- def get_input_act_qspec(quantization_config: Optional[QuantizationConfig]):
75
- if quantization_config is None:
76
- return None
77
- if quantization_config.input_activation is None:
78
- return None
79
- quantization_spec: QuantizationSpec = quantization_config.input_activation
80
- assert quantization_spec.qscheme in [
81
- torch.per_tensor_affine,
82
- ]
83
- return quantization_spec
84
-
85
-
86
- def get_output_act_qspec(quantization_config: Optional[QuantizationConfig]):
87
- if quantization_config is None:
88
- return None
89
- if quantization_config.output_activation is None:
90
- return None
91
- quantization_spec: QuantizationSpec = quantization_config.output_activation
92
- assert quantization_spec.qscheme in [
93
- torch.per_tensor_affine,
94
- ]
95
- return quantization_spec
96
-
97
-
98
- def get_weight_qspec(quantization_config: Optional[QuantizationConfig]):
99
- if quantization_config is None:
100
- return None
101
- if quantization_config.weight is None:
102
- return None
103
- quantization_spec: QuantizationSpec = quantization_config.weight
104
- if quantization_spec.qscheme not in [
105
- torch.per_tensor_affine,
106
- torch.per_channel_affine,
107
- ]:
108
- raise ValueError(
109
- f"Unsupported quantization_spec {quantization_spec} for weight"
110
- )
111
- return quantization_spec
112
-
113
-
114
- def get_bias_qspec(quantization_config: Optional[QuantizationConfig]):
115
- if quantization_config is None:
116
- return None
117
- if quantization_config.bias is None:
118
- return None
119
- quantization_spec: QuantizationSpec = quantization_config.bias
120
- return quantization_spec
121
-
122
-
123
- def is_annotated(nodes: List[torch.fx.Node] | torch.fx.Node):
124
- """
125
- Check if any of the node in the given list is annotated.
126
- """
127
- annotated = False
128
- if isinstance(nodes, torch.fx.Node):
129
- nodes = [nodes]
130
- for node in nodes:
131
- annotated = annotated or (
132
- "quantization_annotation" in node.meta
133
- and node.meta["quantization_annotation"]._annotated
134
- )
135
- return annotated
@@ -1,187 +0,0 @@
1
- # Copyright (c) 2025 Samsung Electronics Co., Ltd. All Rights Reserved
2
- #
3
- # Licensed under the Apache License, Version 2.0 (the "License");
4
- # you may not use this file except in compliance with the License.
5
- # You may obtain a copy of the License at
6
- #
7
- # http://www.apache.org/licenses/LICENSE-2.0
8
- #
9
- # Unless required by applicable law or agreed to in writing, software
10
- # distributed under the License is distributed on an "AS IS" BASIS,
11
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
- # See the License for the specific language governing permissions and
13
- # limitations under the License.
14
-
15
- from typing import Dict, List, Optional, TYPE_CHECKING, Union
16
-
17
- if TYPE_CHECKING:
18
- import torch._ops
19
- import torch.fx
20
- import torch
21
- from circle_schema import circle
22
-
23
- from tico.serialize.circle_graph import CircleSubgraph
24
- from tico.serialize.operators.hashable_opcode import OpCode
25
- from tico.serialize.operators.node_visitor import NodeVisitor, register_node_visitor
26
- from tico.serialize.operators.utils import create_builtin_operator, get_op_index
27
- from tico.utils.errors import NotYetSupportedError
28
- from tico.utils.validate_args_kwargs import CopyArgs
29
-
30
-
31
- @register_node_visitor
32
- class CopyVisitor(NodeVisitor):
33
- """
34
- NOTE `torch.Tensor.copy_`'s behavior matches with `Reshape` of CIRCLE.
35
- - because `torch.Tensor.copy_` is a in-place operator, so `dst` is converted to `Shape` of CIRCLE.
36
- - after that, `dst` converted to `Shape` is connected to shape of `Reshape`.
37
- - `src` is connected to tensor of `Reshape`.
38
- - if `dst` is not converted to `Shape`.
39
- [dst] [src]
40
- |
41
- [Reshape]
42
- - if `dst` is converted to `Shape`.
43
- [dst] [src]
44
- | |
45
- [Shape] |
46
- \ /
47
- [Reshape]
48
- """
49
-
50
- target: List[torch._ops.OpOverload] = [torch.ops.aten.copy.default]
51
-
52
- def __init__(self, op_codes: Dict[OpCode, int], graph: CircleSubgraph):
53
- super().__init__(op_codes, graph)
54
-
55
- def check_to_do_broadcast(
56
- self,
57
- dst: List[int],
58
- dst_sig: Optional[List[int]],
59
- src: List[int],
60
- src_sig: Optional[List[int]],
61
- ) -> bool:
62
- assert dst_sig is None
63
- assert src_sig is None
64
- return dst != src
65
-
66
- def define_broadcast_to_node(
67
- self,
68
- inputs: List[Union[circle.Tensor.TensorT, torch.Tensor]],
69
- outputs: List[circle.Tensor.TensorT],
70
- ) -> circle.Operator.OperatorT:
71
- op_index = get_op_index(
72
- circle.BuiltinOperator.BuiltinOperator.BROADCAST_TO, self._op_codes
73
- )
74
- operator = create_builtin_operator(self.graph, op_index, inputs, outputs)
75
- operator.builtinOptionsType = (
76
- circle.BuiltinOptions.BuiltinOptions.BroadcastToOptions
77
- )
78
-
79
- option = circle.BroadcastToOptions.BroadcastToOptionsT()
80
- operator.builtinOptions = option
81
- return operator
82
-
83
- def define_shape_node(
84
- self, inputs: List[torch.fx.Node], outputs: List[circle.Tensor.TensorT]
85
- ) -> circle.Operator.OperatorT:
86
- op_index = get_op_index(
87
- circle.BuiltinOperator.BuiltinOperator.SHAPE, self._op_codes
88
- )
89
- operator = create_builtin_operator(self.graph, op_index, inputs, outputs)
90
- operator.builtinOptionsType = circle.BuiltinOptions.BuiltinOptions.ShapeOptions
91
-
92
- option = circle.ShapeOptions.ShapeOptionsT()
93
- option.outType = circle.TensorType.TensorType.INT32
94
- operator.builtinOptions = option
95
- return operator
96
-
97
- def define_node(
98
- self,
99
- node: torch.fx.Node,
100
- ) -> circle.Operator.OperatorT:
101
- if len(node.args) == 3:
102
- raise NotYetSupportedError("'non_blocking' is not supported yet.")
103
-
104
- assert len(node.args) == 2, len(node.args)
105
-
106
- args = CopyArgs(*node.args, **node.kwargs) # type: ignore[arg-type]
107
- dst = args.dst
108
- src = args.src
109
-
110
- # To connect 'dst' to Reshape node in the graph, 'dst' must be converted to Shape op.
111
- dst_tensor: circle.Tensor.TensorT = self.graph.get_tensor(dst)
112
- dst_shape: List[int] = dst_tensor.shape
113
- dst_shape_signature: Optional[List[int]] = dst_tensor.shapeSignature
114
-
115
- if dst_shape_signature is not None:
116
- # TODO: support dynamic shape
117
- raise NotYetSupportedError("Dynamic shape is not supported yet.")
118
-
119
- dst_shape_tensor = torch.as_tensor(dst_shape, dtype=torch.int32)
120
-
121
- dst_shape_shape = [len(dst_shape)]
122
- dst_name: str = dst.name
123
-
124
- shape_output = self.graph.add_tensor_from_scratch(
125
- prefix=f"{dst_name}_shape_output",
126
- shape=dst_shape_shape,
127
- shape_signature=None,
128
- dtype=circle.TensorType.TensorType.INT32,
129
- source_node=node,
130
- )
131
-
132
- shape_operator = self.define_shape_node([dst], [shape_output])
133
- self.graph.add_operator(shape_operator)
134
-
135
- src_tensor: circle.Tensor.TensorT = self.graph.get_tensor(src)
136
- src_shape: List[int] = src_tensor.shape
137
- src_shape_signature: Optional[List[int]] = src_tensor.shapeSignature
138
-
139
- if src_shape_signature is not None:
140
- # TODO: support dynamic shape
141
- raise NotYetSupportedError("Dynamic shape is not supported yet.")
142
-
143
- # The src tensor must be broadcastable with the dst tensor.
144
- do_broadcast = self.check_to_do_broadcast(
145
- dst_shape, dst_shape_signature, src_shape, src_shape_signature
146
- )
147
- if do_broadcast:
148
- # create braodcastTo output tensor
149
- src_name: str = src.name
150
- src_type: int = src_tensor.type
151
-
152
- broadcast_to_output: circle.Tensor.TensorT = (
153
- self.graph.add_tensor_from_scratch(
154
- prefix=f"{src_name}_broadcast_to_output",
155
- shape=dst_shape,
156
- shape_signature=dst_shape_signature,
157
- dtype=src_type,
158
- source_node=node,
159
- )
160
- )
161
-
162
- broadcast_to_operator: circle.Operator.OperatorT = (
163
- self.define_broadcast_to_node(
164
- [src_tensor, dst_shape_tensor], [broadcast_to_output]
165
- )
166
- )
167
- self.graph.add_operator(broadcast_to_operator)
168
- inputs: List = [broadcast_to_output, shape_output]
169
- else:
170
- inputs = [src, shape_output]
171
-
172
- outputs = [node]
173
- op_index = get_op_index(
174
- circle.BuiltinOperator.BuiltinOperator.RESHAPE, self._op_codes
175
- )
176
-
177
- operator = create_builtin_operator(self.graph, op_index, inputs, outputs)
178
-
179
- # Op-specific option
180
- operator.builtinOptionsType = (
181
- circle.BuiltinOptions.BuiltinOptions.ReshapeOptions
182
- )
183
- option = circle.ReshapeOptions.ReshapeOptionsT()
184
- option.newShape = dst_shape
185
-
186
- operator.builtinOptions = option
187
- return operator