tico 0.1.0.dev250917__py3-none-any.whl → 0.1.0.dev250921__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of tico might be problematic. Click here for more details.

Files changed (25) hide show
  1. tico/__init__.py +1 -1
  2. tico/config/v1.py +3 -0
  3. tico/experimental/quantization/algorithm/gptq/quantizer.py +2 -2
  4. tico/experimental/quantization/algorithm/smoothquant/quantizer.py +1 -1
  5. tico/experimental/quantization/config/__init__.py +1 -0
  6. tico/experimental/quantization/config/base.py +26 -0
  7. tico/experimental/quantization/config/gptq.py +29 -0
  8. tico/experimental/quantization/config/pt2e.py +25 -0
  9. tico/experimental/quantization/{config.py → config/smoothquant.py} +1 -35
  10. tico/experimental/quantization/ptq/examples/quantize_with_gptq.py +191 -70
  11. tico/experimental/quantization/ptq/wrappers/fairseq/quant_decoder_layer.py +494 -0
  12. tico/experimental/quantization/ptq/wrappers/registry.py +1 -0
  13. tico/experimental/quantization/public_interface.py +1 -1
  14. tico/experimental/quantization/quantizer.py +1 -1
  15. tico/passes/convert_matmul_to_linear.py +200 -0
  16. tico/passes/convert_to_relu6.py +1 -1
  17. tico/serialize/circle_serializer.py +11 -4
  18. tico/serialize/operators/op_mm.py +15 -132
  19. tico/utils/convert.py +6 -1
  20. {tico-0.1.0.dev250917.dist-info → tico-0.1.0.dev250921.dist-info}/METADATA +1 -1
  21. {tico-0.1.0.dev250917.dist-info → tico-0.1.0.dev250921.dist-info}/RECORD +25 -19
  22. {tico-0.1.0.dev250917.dist-info → tico-0.1.0.dev250921.dist-info}/LICENSE +0 -0
  23. {tico-0.1.0.dev250917.dist-info → tico-0.1.0.dev250921.dist-info}/WHEEL +0 -0
  24. {tico-0.1.0.dev250917.dist-info → tico-0.1.0.dev250921.dist-info}/entry_points.txt +0 -0
  25. {tico-0.1.0.dev250917.dist-info → tico-0.1.0.dev250921.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,200 @@
1
+ # Copyright (c) 2025 Samsung Electronics Co., Ltd. All Rights Reserved
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ from typing import List, Optional, TYPE_CHECKING
16
+
17
+ if TYPE_CHECKING:
18
+ import torch.fx
19
+ import torch
20
+ from torch._export.utils import is_buffer, is_lifted_tensor_constant, is_param
21
+ from torch.export import ExportedProgram
22
+
23
+ from tico.utils import logging
24
+ from tico.utils.graph import create_node
25
+ from tico.utils.passes import PassBase, PassResult
26
+ from tico.utils.trace_decorators import trace_graph_diff_on_pass
27
+ from tico.utils.validate_args_kwargs import MatmulArgs
28
+
29
+
30
+ class Converter: # type: ignore[empty-body]
31
+ def __init__(self):
32
+ super().__init__()
33
+
34
+ def match(self, exported_program, node) -> bool: # type: ignore[empty-body]
35
+ return False
36
+
37
+ def convert(self, exported_program, node) -> torch.fx.Node: # type: ignore[empty-body]
38
+ pass
39
+
40
+
41
+ class MatmulToLinearConverter(Converter):
42
+ def __init__(self):
43
+ super().__init__()
44
+
45
+ def convert(self, exported_program, node) -> torch.fx.Node:
46
+ graph_module = exported_program.graph_module
47
+ graph = graph_module.graph
48
+
49
+ mm_args = MatmulArgs(*node.args, **node.kwargs) # type: ignore[arg-type]
50
+
51
+ lhs = mm_args.input
52
+ rhs = mm_args.other
53
+
54
+ with graph.inserting_before(node):
55
+ transpose_node = create_node(
56
+ graph,
57
+ torch.ops.aten.permute.default,
58
+ args=(rhs, [1, 0]),
59
+ )
60
+ fc_node = create_node(
61
+ graph,
62
+ torch.ops.aten.linear.default,
63
+ args=(lhs, transpose_node),
64
+ )
65
+ node.replace_all_uses_with(fc_node, propagate_meta=True)
66
+
67
+ return fc_node
68
+
69
+
70
+ class RhsConstMatmulToLinearConverter(MatmulToLinearConverter):
71
+ def __init__(self):
72
+ super().__init__()
73
+
74
+ def match(self, exported_program, node) -> bool:
75
+ if not node.target == torch.ops.aten.mm.default:
76
+ return False
77
+
78
+ mm_args = MatmulArgs(*node.args, **node.kwargs) # type: ignore[arg-type]
79
+
80
+ rhs = mm_args.other
81
+ if isinstance(rhs, torch.fx.Node):
82
+ if is_lifted_tensor_constant(exported_program, rhs):
83
+ return True
84
+ elif is_param(exported_program, rhs):
85
+ return True
86
+ elif is_buffer(exported_program, rhs):
87
+ return True
88
+ else:
89
+ return False
90
+ return False
91
+
92
+ def convert(self, exported_program, node) -> torch.fx.Node:
93
+ return super().convert(exported_program, node)
94
+
95
+
96
+ class LhsConstMatmulToLinearConverter(MatmulToLinearConverter):
97
+ def __init__(self):
98
+ super().__init__()
99
+
100
+ def match(self, exported_program, node) -> bool:
101
+ if not node.target == torch.ops.aten.mm.default:
102
+ return False
103
+
104
+ mm_args = MatmulArgs(*node.args, **node.kwargs)
105
+ lhs = mm_args.input
106
+ if isinstance(lhs, torch.fx.Node):
107
+ if is_lifted_tensor_constant(exported_program, lhs):
108
+ return True
109
+ elif is_param(exported_program, lhs):
110
+ return True
111
+ elif is_buffer(exported_program, lhs):
112
+ return True
113
+ else:
114
+ return False
115
+ return False
116
+
117
+ def convert(self, exported_program, node) -> torch.fx.Node:
118
+ return super().convert(exported_program, node)
119
+
120
+
121
+ @trace_graph_diff_on_pass
122
+ class ConvertMatmulToLinear(PassBase):
123
+ """
124
+ This pass converts matmul to linear selectively
125
+
126
+ How to select between `matmul` and `linear`?
127
+
128
+ * Linear has better quantization accuracy (NPU backend)
129
+ Due to ONE compiler's quantization policy;
130
+ FullyConnected(=Linear) uses per-channel quantization for weight and per-tensor for input.
131
+ BatchMatmul(=matmul) uses per-tensor quantization for both rhs and lhs.
132
+
133
+ * Matmul to Linear requires Transpose, which may harm latency
134
+ When RHS is constant, addtional transpose can be folded.
135
+
136
+ [RHS non-const case]
137
+ Constant folding cannot be performed.
138
+
139
+ lhs rhs (non-const)
140
+ | |
141
+ | transpose
142
+ | |
143
+ -- linear --
144
+ |
145
+ out
146
+
147
+ [RHS const case]
148
+ Constant folding can be performed to
149
+
150
+ lhs rhs (const) lh rhs (folded const)
151
+ | | | |
152
+ | transpose | |
153
+ | | | |
154
+ -- linear -- --> -- linear --
155
+ | |
156
+ out out
157
+
158
+
159
+ enable_lhs_const: If true, convert matmul where LHS is constant tensor. Default is False.
160
+ enable_rhs_const: If true, convert matmul where RHS is constant tensor. Default is True.
161
+ """
162
+
163
+ def __init__(
164
+ self,
165
+ enable_lhs_const: Optional[bool] = False,
166
+ enable_rhs_const: Optional[bool] = True,
167
+ ):
168
+ super().__init__()
169
+ self.converters: List[Converter] = []
170
+ if enable_lhs_const:
171
+ self.converters.append(LhsConstMatmulToLinearConverter())
172
+ if enable_rhs_const:
173
+ self.converters.append(RhsConstMatmulToLinearConverter())
174
+
175
+ def call(self, exported_program: ExportedProgram) -> PassResult:
176
+ logger = logging.getLogger(__name__)
177
+
178
+ graph_module = exported_program.graph_module
179
+ graph = graph_module.graph
180
+ modified = False
181
+ for node in graph.nodes:
182
+ if not node.op == "call_function":
183
+ continue
184
+
185
+ for converter in self.converters:
186
+ if not converter.match(exported_program, node):
187
+ continue
188
+
189
+ new_node = converter.convert(exported_program, node)
190
+ modified = True
191
+ logger.debug(
192
+ f"{node.name} is replaced with {new_node.name} operator (permute + linear)"
193
+ )
194
+ continue
195
+
196
+ graph.eliminate_dead_code()
197
+ graph.lint()
198
+ graph_module.recompile()
199
+
200
+ return PassResult(modified)
@@ -172,7 +172,7 @@ class ConvertToReLU6(PassBase):
172
172
  converter.convert(exported_program, node)
173
173
  modified = True
174
174
  logger.debug(f"{node.name} is replaced with ReLU6 operator")
175
- break
175
+ continue
176
176
 
177
177
  graph.eliminate_dead_code()
178
178
  graph.lint()
@@ -20,6 +20,7 @@ import torch
20
20
  from circle_schema import circle
21
21
  from torch.export.exported_program import ConstantArgument, ExportedProgram, InputKind
22
22
 
23
+ from tico.config import CompileConfigBase, get_default_config
23
24
  from tico.serialize.circle_mapping import to_circle_dtype, to_circle_shape
24
25
  from tico.serialize.operators import *
25
26
  from tico.serialize.circle_graph import CircleModel, CircleSubgraph
@@ -47,7 +48,9 @@ def _initialize_model() -> tuple[CircleModel, CircleSubgraph]:
47
48
  return model, graph
48
49
 
49
50
 
50
- def build_circle(ep: ExportedProgram) -> bytes:
51
+ def build_circle(
52
+ ep: ExportedProgram, config: CompileConfigBase = get_default_config()
53
+ ) -> bytes:
51
54
  """Convert ExportedProgram to Circle format.
52
55
 
53
56
  Args:
@@ -68,9 +71,13 @@ def build_circle(ep: ExportedProgram) -> bytes:
68
71
  for in_spec in ep.graph_signature.input_specs:
69
72
  if in_spec.kind != InputKind.USER_INPUT:
70
73
  continue
71
- # NoneType ConstantArgument is ignored.
72
- if isinstance(in_spec.arg, ConstantArgument) and in_spec.arg.value == None:
73
- continue
74
+ if isinstance(in_spec.arg, ConstantArgument):
75
+ # ConstantArgument is ignored when option is given
76
+ if config.get("remove_constant_input"):
77
+ continue
78
+ # NoneType ConstantArgument is ignored.
79
+ if in_spec.arg.value == None:
80
+ continue
74
81
  arg_name = in_spec.arg.name
75
82
  graph.add_input(arg_name)
76
83
  logger.debug(f"Registered input: {arg_name}")
@@ -20,7 +20,7 @@ if TYPE_CHECKING:
20
20
  import torch
21
21
  from circle_schema import circle
22
22
 
23
- from tico.serialize.circle_graph import CircleSubgraph, is_const
23
+ from tico.serialize.circle_graph import CircleSubgraph
24
24
  from tico.serialize.operators.hashable_opcode import OpCode
25
25
  from tico.serialize.operators.node_visitor import NodeVisitor, register_node_visitor
26
26
  from tico.serialize.operators.utils import create_builtin_operator, get_op_index
@@ -28,9 +28,9 @@ from tico.utils.validate_args_kwargs import MatmulArgs
28
28
 
29
29
 
30
30
  @register_node_visitor
31
- class MatmulDefaultVisitor(NodeVisitor):
31
+ class MatmulVisitor(NodeVisitor):
32
32
  """
33
- Convert matmul to equavalent BatchMatMul or FullyConnected with Transpose.
33
+ Convert matmul to Circle BatchMatMul
34
34
  """
35
35
 
36
36
  target: List[torch._ops.OpOverload] = [torch.ops.aten.mm.default]
@@ -38,131 +38,7 @@ class MatmulDefaultVisitor(NodeVisitor):
38
38
  def __init__(self, op_codes: Dict[OpCode, int], graph: CircleSubgraph):
39
39
  super().__init__(op_codes, graph)
40
40
 
41
- # NOTE: Matmul is equivalent to Batch MatMul (batch=1)
42
- def define_bmm_node(self, inputs, outputs) -> circle.Operator.OperatorT:
43
- def set_bmm_option(operator):
44
- operator.builtinOptionsType = (
45
- circle.BuiltinOptions.BuiltinOptions.BatchMatMulOptions
46
- )
47
- option = circle.BatchMatMulOptions.BatchMatMulOptionsT()
48
- option.adjointLhs, option.adjointRhs = False, False
49
- option.asymmetricQuantizeInputs = False
50
- operator.builtinOptions = option
51
-
52
- op_index = get_op_index(
53
- circle.BuiltinOperator.BuiltinOperator.BATCH_MATMUL, self._op_codes
54
- )
55
- operator = create_builtin_operator(self.graph, op_index, inputs, outputs)
56
- set_bmm_option(operator)
57
-
58
- return operator
59
-
60
- def define_transpose_node(self, inputs, outputs) -> circle.Operator.OperatorT:
61
- def set_transpose_option(operator):
62
- operator.builtinOptionsType = (
63
- circle.BuiltinOptions.BuiltinOptions.TransposeOptions
64
- )
65
- option = circle.TransposeOptions.TransposeOptionsT()
66
- operator.builtinOptions = option
67
-
68
- transpose_op_index = get_op_index(
69
- circle.BuiltinOperator.BuiltinOperator.TRANSPOSE, self._op_codes
70
- )
71
- operator = create_builtin_operator(
72
- self.graph, transpose_op_index, inputs, outputs
73
- )
74
- set_transpose_option(operator)
75
- return operator
76
-
77
- def define_fc_node(self, inputs, outputs) -> circle.Operator.OperatorT:
78
- def set_fc_option(operator):
79
- operator.builtinOptionsType = (
80
- circle.BuiltinOptions.BuiltinOptions.FullyConnectedOptions
81
- )
82
- option = circle.FullyConnectedOptions.FullyConnectedOptionsT()
83
-
84
- option.fusedActivationFunction = (
85
- circle.ActivationFunctionType.ActivationFunctionType.NONE
86
- )
87
- option.weightsFormat = (
88
- circle.FullyConnectedOptionsWeightsFormat.FullyConnectedOptionsWeightsFormat.DEFAULT
89
- )
90
- option.keepNumDims = False
91
- option.asymmetricQuantizeInputs = False
92
- option.quantizedBiasType = circle.TensorType.TensorType.FLOAT32
93
-
94
- operator.builtinOptions = option
95
-
96
- fc_op_index = get_op_index(
97
- circle.BuiltinOperator.BuiltinOperator.FULLY_CONNECTED, self._op_codes
98
- )
99
- operator = create_builtin_operator(self.graph, fc_op_index, inputs, outputs)
100
- set_fc_option(operator)
101
- return operator
102
-
103
- """
104
- Define FullyConnnected with Tranpose operator.
105
- Note that those sets of operators are equivalent.
106
- (1) Matmul
107
- matmul( lhs[H, K], rhs[K, W'] ) -> output(H, W')
108
-
109
- (2) Transpose + FullyConneccted
110
- transpose( rhs[K, W'] ) -> trs_output[W', K]
111
- fullyconnected( lhs[H, K], trs_output[W', K] ) -> output(H, W')
112
- """
113
-
114
- def define_fc_with_transpose(
115
- self, node, inputs, outputs
116
- ) -> circle.Operator.OperatorT:
117
- lhs, rhs = inputs
118
-
119
- # get transpose shape
120
- rhs_tid: int = self.graph.get_tid_registered(rhs)
121
- rhs_tensor: circle.Tensor.TensorT = self.graph.tensors[rhs_tid]
122
- rhs_name: str = rhs.name
123
- rhs_type: int = rhs_tensor.type
124
- rhs_shape: List[int] = rhs_tensor.shape
125
- assert len(rhs_shape) == 2, len(rhs_shape)
126
- rhs_shape_transpose = [rhs_shape[1], rhs_shape[0]]
127
-
128
- # create transpose output tensor
129
- trs_output = self.graph.add_tensor_from_scratch(
130
- prefix=f"{rhs_name}_transposed_output",
131
- shape=rhs_shape_transpose,
132
- shape_signature=None,
133
- dtype=rhs_type,
134
- source_node=node,
135
- )
136
- trs_perm = self.graph.add_const_tensor(data=[1, 0], source_node=node)
137
- trs_operator = self.define_transpose_node([rhs, trs_perm], [trs_output])
138
- self.graph.add_operator(trs_operator)
139
-
140
- # define fc node
141
- fc_input = lhs
142
- fc_weight = trs_output
143
- fc_shape = [fc_weight.shape[0]]
144
- fc_bias = self.graph.add_const_tensor(
145
- data=[0.0] * fc_shape[0], source_node=node
146
- )
147
-
148
- operator = self.define_fc_node([fc_input, fc_weight, fc_bias], outputs)
149
-
150
- return operator
151
-
152
- def define_node(
153
- self, node: torch.fx.Node, prior_latency=True
154
- ) -> circle.Operator.OperatorT:
155
- """
156
- NOTE: Possibility of accuracy-latency trade-off
157
- From ONE compiler's perspective:
158
- - BMM uses per-tensor quantization for both rhs and lhs.
159
- - FC uses per-channel quantization for weight and per-tensor for input.
160
- Thus, FC is better in terms of accuracy.
161
- FC necessarily involves an additional transpose operation to be identical with mm.
162
- If transposed operand is const, it can be optimized by constant folding.
163
- Thus, convert FC only if tranpose can be folded.
164
- TODO set prior_latency outside
165
- """
41
+ def define_node(self, node: torch.fx.Node) -> circle.Operator.OperatorT:
166
42
  args = MatmulArgs(*node.args, **node.kwargs) # type: ignore[arg-type]
167
43
  input = args.input
168
44
  other = args.other
@@ -170,9 +46,16 @@ class MatmulDefaultVisitor(NodeVisitor):
170
46
  inputs = [input, other]
171
47
  outputs = [node]
172
48
 
173
- if not is_const(other) and prior_latency:
174
- operator = self.define_bmm_node(inputs, outputs)
175
- else:
176
- operator = self.define_fc_with_transpose(node, inputs, outputs)
49
+ op_index = get_op_index(
50
+ circle.BuiltinOperator.BuiltinOperator.BATCH_MATMUL, self._op_codes
51
+ )
52
+ operator = create_builtin_operator(self.graph, op_index, inputs, outputs)
53
+ operator.builtinOptionsType = (
54
+ circle.BuiltinOptions.BuiltinOptions.BatchMatMulOptions
55
+ )
56
+ option = circle.BatchMatMulOptions.BatchMatMulOptionsT()
57
+ option.adjointLhs, option.adjointRhs = False, False
58
+ option.asymmetricQuantizeInputs = False
59
+ operator.builtinOptions = option
177
60
 
178
61
  return operator
tico/utils/convert.py CHANGED
@@ -40,6 +40,7 @@ from tico.passes.cast_mixed_type_args import CastMixedTypeArgs
40
40
  from tico.passes.const_prop_pass import ConstPropPass
41
41
  from tico.passes.convert_conv1d_to_conv2d import ConvertConv1dToConv2d
42
42
  from tico.passes.convert_layout_op_to_reshape import ConvertLayoutOpToReshape
43
+ from tico.passes.convert_matmul_to_linear import ConvertMatmulToLinear
43
44
  from tico.passes.convert_repeat_to_expand_copy import ConvertRepeatToExpandCopy
44
45
  from tico.passes.convert_to_relu6 import ConvertToReLU6
45
46
  from tico.passes.decompose_addmm import DecomposeAddmm
@@ -249,6 +250,10 @@ def convert_exported_module_to_circle(
249
250
  ConstPropPass(),
250
251
  SegmentIndexSelectConst(),
251
252
  LegalizeCausalMaskValue(enabled=config.get("legalize_causal_mask_value")),
253
+ ConvertMatmulToLinear(
254
+ enable_lhs_const=config.get("convert_lhs_const_mm_to_fc"),
255
+ enable_rhs_const=config.get("convert_rhs_const_mm_to_fc"),
256
+ ),
252
257
  LowerToResizeNearestNeighbor(),
253
258
  LegalizePreDefinedLayoutOperators(),
254
259
  LowerPow2ToMul(),
@@ -287,7 +292,7 @@ def convert_exported_module_to_circle(
287
292
 
288
293
  check_unsupported_target(exported_program)
289
294
  check_training_ops(exported_program)
290
- circle_program = build_circle(exported_program)
295
+ circle_program = build_circle(exported_program, config)
291
296
 
292
297
  return circle_program
293
298
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: tico
3
- Version: 0.1.0.dev250917
3
+ Version: 0.1.0.dev250921
4
4
  Summary: Convert exported Torch module to circle
5
5
  Home-page: UNKNOWN
6
6
  License: UNKNOWN
@@ -1,19 +1,18 @@
1
- tico/__init__.py,sha256=Da7Ln6MuWCBJXrjts6OsAslWSS79toVgPG2PITYPzE0,1883
1
+ tico/__init__.py,sha256=SJrnDNsVJlIf-r1ZVzi2Kj_xI68YVwAjm83FWgbiWLE,1883
2
2
  tico/pt2_to_circle.py,sha256=gu3MD4Iqc0zMZcCZ2IT8oGbyj21CTSbT3Rgd9s2B_9A,2767
3
3
  tico/config/__init__.py,sha256=xZzCXjZ84qE-CsBi-dfaL05bqpQ3stKKfTXhnrJRyVs,142
4
4
  tico/config/base.py,sha256=q5xMqGxTUZs4mFqt5c7i_y9U00fYgdMGl9nUqIVMlCo,1248
5
5
  tico/config/factory.py,sha256=il0zqB6Lm5NX2LnG-TUhmiP9vVeZ_3TucJMorVZIodY,1324
6
- tico/config/v1.py,sha256=O1jzpUBDwoWpLohEpI08pJNwVB-yz3ufPrQm2_XWq4Y,1108
6
+ tico/config/v1.py,sha256=AVgOck-HxR1R1FZPVjtN5J82hPLJvUxwzbnyWXIQZWE,1237
7
7
  tico/experimental/__init__.py,sha256=IO6FP_xYbGy0dW0HL26GXD3ouxARaxCK7bz9dn4blPQ,26
8
8
  tico/experimental/quantization/__init__.py,sha256=IaJPZegVJp0P3luutBo907Kp5sOJensE1Mm-XBG_jBs,122
9
- tico/experimental/quantization/config.py,sha256=nMepa_H471t7f3bKMvR8cZUZgruy_8kdb147rBkTWCQ,2004
10
- tico/experimental/quantization/public_interface.py,sha256=4-v9VXsokRG2-UUYYHd_MlbHxChqdGI5iuySyYDY_Pw,4420
11
- tico/experimental/quantization/quantizer.py,sha256=_2pDtWFKDCuKfYF2bptOwIYsa0VFNFM1ZNgi8_OGvHM,2365
9
+ tico/experimental/quantization/public_interface.py,sha256=y-iwaeuedBvHwTh5hflQg4u2ZCdqf46IlTl9ntHq8pU,4425
10
+ tico/experimental/quantization/quantizer.py,sha256=pDTQGzR-BcQJeGZ7O4cXRQdCme4q_POpxHetwnv0bYg,2370
12
11
  tico/experimental/quantization/algorithm/__init__.py,sha256=IO6FP_xYbGy0dW0HL26GXD3ouxARaxCK7bz9dn4blPQ,26
13
12
  tico/experimental/quantization/algorithm/gptq/__init__.py,sha256=IO6FP_xYbGy0dW0HL26GXD3ouxARaxCK7bz9dn4blPQ,26
14
13
  tico/experimental/quantization/algorithm/gptq/gptq.py,sha256=Qn9b_2ki7B64DcVEY25NMkww3PdZ5EqYQQXfYhNDQ6I,5555
15
14
  tico/experimental/quantization/algorithm/gptq/quant.py,sha256=Rl4wAOCmlE0U09BtNCDbccaSNohRHCNLwFi3zCqZfNo,5127
16
- tico/experimental/quantization/algorithm/gptq/quantizer.py,sha256=_ZnSD_LBag_FVcVEniPKBmw7bNZ2iZLZ8aZnexnCgrs,11693
15
+ tico/experimental/quantization/algorithm/gptq/quantizer.py,sha256=ZKeQQWm6eMUyRgntQxVR-QVjxJOc2pW4Dc_mrEPZA64,11686
17
16
  tico/experimental/quantization/algorithm/gptq/utils.py,sha256=leGKayf-xbSjVwwAGTA5RsxUKrhDiklOQdlsLifjdrs,1811
18
17
  tico/experimental/quantization/algorithm/pt2e/__init__.py,sha256=IO6FP_xYbGy0dW0HL26GXD3ouxARaxCK7bz9dn4blPQ,26
19
18
  tico/experimental/quantization/algorithm/pt2e/quantizer.py,sha256=mdTvsG87bo8fu0GaWqSM8iBCs-4f4EfUlVtk-Ko6M34,2546
@@ -38,8 +37,13 @@ tico/experimental/quantization/algorithm/pt2e/transformation/__init__.py,sha256=
38
37
  tico/experimental/quantization/algorithm/pt2e/transformation/convert_scalars_to_attrs.py,sha256=Idtoya2RcGKlgUJgC9WqNz0jH3gf6ViuPmsD9ySHbls,2253
39
38
  tico/experimental/quantization/algorithm/smoothquant/__init__.py,sha256=IO6FP_xYbGy0dW0HL26GXD3ouxARaxCK7bz9dn4blPQ,26
40
39
  tico/experimental/quantization/algorithm/smoothquant/observer.py,sha256=OWBKQ3ox6PqeqgevxOjpXvb7uApoqE4YbUBelGhVSN8,3435
41
- tico/experimental/quantization/algorithm/smoothquant/quantizer.py,sha256=QuZBi24L-LYI26nwZd6JmTdokxr6-l_vIgZvWVdqx_o,3637
40
+ tico/experimental/quantization/algorithm/smoothquant/quantizer.py,sha256=14-QrKAW-Rw6pIbbNaD5eORcH2fqi40-TNFGaWVakIg,3649
42
41
  tico/experimental/quantization/algorithm/smoothquant/smooth_quant.py,sha256=fxCy4m-BsSjraciSVPFlPhgsOT46RjrOgczQGb7B9TA,11561
42
+ tico/experimental/quantization/config/__init__.py,sha256=IO6FP_xYbGy0dW0HL26GXD3ouxARaxCK7bz9dn4blPQ,26
43
+ tico/experimental/quantization/config/base.py,sha256=xg_HCDSuMgYvMd6ENZe4Sm2SYJgMaCBj4cmqaz_lhAs,816
44
+ tico/experimental/quantization/config/gptq.py,sha256=IUIEz5bLhsTXqoBCE1rfPec99zsRjwgpDbPW5YJqOPg,973
45
+ tico/experimental/quantization/config/pt2e.py,sha256=9HCrraTGGZeKEN9puKV-ODi7ncV2Wjc3oe_JCO1D_Rs,850
46
+ tico/experimental/quantization/config/smoothquant.py,sha256=fcyhu3YlOTM7fDW9lGTXh-uJOUD6CeykZj7AMCNVbak,1415
43
47
  tico/experimental/quantization/evaluation/__init__.py,sha256=IO6FP_xYbGy0dW0HL26GXD3ouxARaxCK7bz9dn4blPQ,26
44
48
  tico/experimental/quantization/evaluation/backend.py,sha256=CZL9rZOA0t8cH7PHp6u9l7dGqWNvTj9bKOvwo0PVul0,692
45
49
  tico/experimental/quantization/evaluation/evaluate.py,sha256=kfa_GvFaX6DoSTAmuCImMJqF2jgqtnor5UpC7wVmGPI,7877
@@ -68,7 +72,7 @@ tico/experimental/quantization/ptq/examples/quantize_linear.py,sha256=8zq-ZJDYga
68
72
  tico/experimental/quantization/ptq/examples/quantize_llama_attn.py,sha256=cVWUSSzaZWFp5QZkNkrlpHU3kXyP84QtnZbahVml_yQ,4329
69
73
  tico/experimental/quantization/ptq/examples/quantize_llama_decoder_layer.py,sha256=mBWrjkyEovYQsPC4Rrsri6Pm1rlFmDb3NiP0DQQhFyM,5751
70
74
  tico/experimental/quantization/ptq/examples/quantize_llama_mlp.py,sha256=N1qZQgt1S-xZrdv-PW7OfXEcv0gsO2q9faOF4aD-zKo,4147
71
- tico/experimental/quantization/ptq/examples/quantize_with_gptq.py,sha256=w21Qao5_6SnWMuxmnZbZOoqaLQOuSnK52mHin4aedtA,6979
75
+ tico/experimental/quantization/ptq/examples/quantize_with_gptq.py,sha256=y-SK56j4wL-9j-0jtuOqQUq4CElZtGOETp-Tg4XivUI,10438
72
76
  tico/experimental/quantization/ptq/observers/__init__.py,sha256=IO6FP_xYbGy0dW0HL26GXD3ouxARaxCK7bz9dn4blPQ,26
73
77
  tico/experimental/quantization/ptq/observers/affine_base.py,sha256=e2Eba64nrxKQyE4F_WJ7WTSsk3xe6bkdGUKaoLFWGFw,4638
74
78
  tico/experimental/quantization/ptq/observers/base.py,sha256=Wons1MzpqK1mfcy-ppl-B2Dum0edXg2dWW2Lw3V18tw,3280
@@ -84,8 +88,9 @@ tico/experimental/quantization/ptq/wrappers/__init__.py,sha256=IO6FP_xYbGy0dW0HL
84
88
  tico/experimental/quantization/ptq/wrappers/ptq_wrapper.py,sha256=F9sK_DiRaXiGNHULcwIbs5EUtHz6ZJ7N4r5CWTTfhsM,2442
85
89
  tico/experimental/quantization/ptq/wrappers/quant_elementwise.py,sha256=LhEoobfvto6zKrBOKL4gmxfFFc31jHzyQV_zfps-iQM,3604
86
90
  tico/experimental/quantization/ptq/wrappers/quant_module_base.py,sha256=vkcDos_knGSS29rIZuEIWkAJLHrENbGz8nCH2-iara8,5969
87
- tico/experimental/quantization/ptq/wrappers/registry.py,sha256=GlVBPWPAnLRqTtemu_YOEX9WisF1eN6Mud7y1zzvpW0,5092
91
+ tico/experimental/quantization/ptq/wrappers/registry.py,sha256=OVO5nev6J8Br9zsIX-Ut7ZgWzA9f_jk0Np9bGioXgQM,5171
88
92
  tico/experimental/quantization/ptq/wrappers/fairseq/__init__.py,sha256=Mc8FLd9DusyB_IT1vk1OYrRkngOYnYd05IvtA9ORVQc,160
93
+ tico/experimental/quantization/ptq/wrappers/fairseq/quant_decoder_layer.py,sha256=JT79shxOhDtRFgm8jrrN6HKvyVotiytLjMjAxX-Cztg,20416
89
94
  tico/experimental/quantization/ptq/wrappers/fairseq/quant_encoder.py,sha256=r9DPUAbL2KRJ8zpMJ39Y9n6Oe79nte-mFcdjG2qEP-w,13809
90
95
  tico/experimental/quantization/ptq/wrappers/fairseq/quant_encoder_layer.py,sha256=aGr80Ku75j2H-UZ0elEa0mOQEyaAs2YJ4WJCN0lonn0,6412
91
96
  tico/experimental/quantization/ptq/wrappers/fairseq/quant_mha.py,sha256=HsigmOLeacLXc46QNeFqwQ0DwKQhNrtWTKEtLJoqXoc,15562
@@ -107,8 +112,9 @@ tico/passes/cast_mixed_type_args.py,sha256=Wd3sCDKJZwdb8GiMWKljm8X5CLFRd8eCz-dmW
107
112
  tico/passes/const_prop_pass.py,sha256=hDxGgJNiRjsgOArdaoeAOcOOA-nKBvA1W1zcMZQA5yg,11531
108
113
  tico/passes/convert_conv1d_to_conv2d.py,sha256=ktS3h158y9rg1sQiW8BZZbflV_dk_UdjBPQnuiOKyzg,5303
109
114
  tico/passes/convert_layout_op_to_reshape.py,sha256=sCAFjkmVtiKjvDQSAgnjNBHl3_hWXJZElGDXQiTH-7s,2963
115
+ tico/passes/convert_matmul_to_linear.py,sha256=Y_Me8YqrNumfMrB08WT4wwAoKIfKNak5y8Y10ekWe5s,6611
110
116
  tico/passes/convert_repeat_to_expand_copy.py,sha256=JbtFTmWyfJS2SSd_higP1IEhQeh7wHdN5dmTbbiFVCs,3237
111
- tico/passes/convert_to_relu6.py,sha256=1BJpUwUb6Zli_1y3eyJQo7dg9B1xvZ7sYjMbvEQsFJM,6442
117
+ tico/passes/convert_to_relu6.py,sha256=9B6OLyF72tMvD-ugV7aBx6l1szwERufNBUaX34pkZ4c,6445
112
118
  tico/passes/decompose_addmm.py,sha256=KjnpZjSuA0uvNmKaTN_EMwobcOi3CAB81buORzTDxro,3979
113
119
  tico/passes/decompose_batch_norm.py,sha256=06LAxhSmpTxFZJmUelwB3I_GipNWrLoM7PfM6ZkxOZY,6512
114
120
  tico/passes/decompose_fake_quantize.py,sha256=736srs8SM8K_mLR0WG10LVMMLRkYkBM9OF0k1GCkAW0,5218
@@ -139,7 +145,7 @@ tico/passes/segment_index_select.py,sha256=VVCKNLtYRkr9n5lGnlzEuQsQ0WVxEYXGchFrD
139
145
  tico/serialize/__init__.py,sha256=IO6FP_xYbGy0dW0HL26GXD3ouxARaxCK7bz9dn4blPQ,26
140
146
  tico/serialize/circle_graph.py,sha256=qvyul_HULoz7B_6RFKQ8s9RjEvMgPq-ynMVkZe8aqE4,12034
141
147
  tico/serialize/circle_mapping.py,sha256=c__AIHPi23lPugNJFolgMAKrw8j7gEeMaUQ1LAMSFnY,8542
142
- tico/serialize/circle_serializer.py,sha256=BGK9tltKkoL1h4rcrJUgDJIGlHst7aF3cZAKJk_GPWc,10950
148
+ tico/serialize/circle_serializer.py,sha256=tw2xwm8tRjaFzZdaaS8Fa8Jfqz0r7Gn8L6D66m0QA0g,11228
143
149
  tico/serialize/pack.py,sha256=5HZ9kX3x6C6CyT_FWS6FRmvx_P7Dx21orjUNQxJ2xlo,1297
144
150
  tico/serialize/quant_param.py,sha256=6nbGKdqwMI9Cx9BLXJ9A9JU4qb770S8vTM1vCZRX3Eo,1342
145
151
  tico/serialize/operators/__init__.py,sha256=LIvXsNnN4yUCS2CGNQ5XW8p8oXDTV_WHWuOEAw1t6WY,990
@@ -190,7 +196,7 @@ tico/serialize/operators/op_max_pool2d_with_indices.py,sha256=i4iKZ262ytDKUt7bG9
190
196
  tico/serialize/operators/op_maximum.py,sha256=JjBr6gWEnuakLuk1_feotTHfIIm3s5YqWmqhUMpSPI0,1873
191
197
  tico/serialize/operators/op_mean.py,sha256=rVQZOxCJkHFY4kQBAS1HVK0HkcqxgkSy6zvEDLX_WYQ,2267
192
198
  tico/serialize/operators/op_minimum.py,sha256=fASjQVcTPCin02umQwFPdq2ss-Ve7S5A33J3QmmQ_wQ,1873
193
- tico/serialize/operators/op_mm.py,sha256=XcH15gjbP5aAl9rBKFQsVvN2GE4127zNH6_0v81_ExA,6855
199
+ tico/serialize/operators/op_mm.py,sha256=VJJRLLYn9zAMcR2rsb86o809edyRJ7CW31waAL0ZXeI,2244
194
200
  tico/serialize/operators/op_mul.py,sha256=si_VdYNyFbULb50SnXHOINh0dZQ2PhRB6Fzl54ZBj5Y,3049
195
201
  tico/serialize/operators/op_ne.py,sha256=xa2WJL2tYksxw7fIJic_D9ltLEseyCII8HpR32Oq8Do,1900
196
202
  tico/serialize/operators/op_neg.py,sha256=fkI3ExyD3QF-qtxBcXqQutPNDbNL8g7lZYE7CyD2wLk,2046
@@ -228,7 +234,7 @@ tico/serialize/operators/utils.py,sha256=lXGpEJW1h8U_-gfc6EWjvvSiq3yJ9P-v1v3EMRT
228
234
  tico/serialize/operators/adapters/__init__.py,sha256=IO6FP_xYbGy0dW0HL26GXD3ouxARaxCK7bz9dn4blPQ,26
229
235
  tico/serialize/operators/adapters/llama_rmsnorm.py,sha256=6t3dhfNpR03eIjsmhymF2JKd6lCf7PvInqMf77c_BOE,1139
230
236
  tico/utils/__init__.py,sha256=IO6FP_xYbGy0dW0HL26GXD3ouxARaxCK7bz9dn4blPQ,26
231
- tico/utils/convert.py,sha256=GgZwZtiqFzTdszfUQO0vcX39lKjs97gYwZ-Tiw_4Bbo,13222
237
+ tico/utils/convert.py,sha256=bgk-a_gdRrrcDFFQHS_ElPdzORmfAZAgNendfzEpHOk,13501
232
238
  tico/utils/define.py,sha256=Ypgp7YffM4pgPl4Zh6TmogSn1OxGBMRw_e09qYGflZk,1467
233
239
  tico/utils/diff_graph.py,sha256=_eDGGPDPYQD4b--MXX0DLoVgSt_wLfNPt47UlolLLR4,5272
234
240
  tico/utils/dtype.py,sha256=L5Qb7qgbt0eQ5frUTvHYrRtTJb1dg4-JNEopcxCNg1U,1389
@@ -252,9 +258,9 @@ tico/utils/mx/__init__.py,sha256=IO6FP_xYbGy0dW0HL26GXD3ouxARaxCK7bz9dn4blPQ,26
252
258
  tico/utils/mx/elemwise_ops.py,sha256=V6glyAHsVR1joqpsgnNytatCD_ew92xNWZ19UFDoMTA,10281
253
259
  tico/utils/mx/formats.py,sha256=uzNWyu-1onUlwQfX5cZ6fZSUfHMRqorper7_T1k3jfk,3404
254
260
  tico/utils/mx/mx_ops.py,sha256=RcfUTYVi-wilGB2sC35OeARdwDqnixv7dG5iyZ-fQT8,8555
255
- tico-0.1.0.dev250917.dist-info/LICENSE,sha256=kp4JLII7bzRhPb0CPD5XTDZMh22BQ7h3k3B7t8TiSbw,12644
256
- tico-0.1.0.dev250917.dist-info/METADATA,sha256=WJdcwQ8suuOhdWCv9cW8_RW_qyckaOM5jEzlvi00vbM,8450
257
- tico-0.1.0.dev250917.dist-info/WHEEL,sha256=G16H4A3IeoQmnOrYV4ueZGKSjhipXx8zc8nu9FGlvMA,92
258
- tico-0.1.0.dev250917.dist-info/entry_points.txt,sha256=kBKYSS_IYrSXmUYevmmepqIVPScq5vF8ulQRu3I_Zf0,59
259
- tico-0.1.0.dev250917.dist-info/top_level.txt,sha256=oqs7UPoNSKZEwqsX8B-KAWdQwfAa7i60pbxW_Jk7P3w,5
260
- tico-0.1.0.dev250917.dist-info/RECORD,,
261
+ tico-0.1.0.dev250921.dist-info/LICENSE,sha256=kp4JLII7bzRhPb0CPD5XTDZMh22BQ7h3k3B7t8TiSbw,12644
262
+ tico-0.1.0.dev250921.dist-info/METADATA,sha256=PKokhTsAtNxesEROg_vhfa6pIcl8WyFzlx-5H7RBcGk,8450
263
+ tico-0.1.0.dev250921.dist-info/WHEEL,sha256=G16H4A3IeoQmnOrYV4ueZGKSjhipXx8zc8nu9FGlvMA,92
264
+ tico-0.1.0.dev250921.dist-info/entry_points.txt,sha256=kBKYSS_IYrSXmUYevmmepqIVPScq5vF8ulQRu3I_Zf0,59
265
+ tico-0.1.0.dev250921.dist-info/top_level.txt,sha256=oqs7UPoNSKZEwqsX8B-KAWdQwfAa7i60pbxW_Jk7P3w,5
266
+ tico-0.1.0.dev250921.dist-info/RECORD,,