tico 0.1.0.dev250917__py3-none-any.whl → 0.1.0.dev250921__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of tico might be problematic. Click here for more details.
- tico/__init__.py +1 -1
- tico/config/v1.py +3 -0
- tico/experimental/quantization/algorithm/gptq/quantizer.py +2 -2
- tico/experimental/quantization/algorithm/smoothquant/quantizer.py +1 -1
- tico/experimental/quantization/config/__init__.py +1 -0
- tico/experimental/quantization/config/base.py +26 -0
- tico/experimental/quantization/config/gptq.py +29 -0
- tico/experimental/quantization/config/pt2e.py +25 -0
- tico/experimental/quantization/{config.py → config/smoothquant.py} +1 -35
- tico/experimental/quantization/ptq/examples/quantize_with_gptq.py +191 -70
- tico/experimental/quantization/ptq/wrappers/fairseq/quant_decoder_layer.py +494 -0
- tico/experimental/quantization/ptq/wrappers/registry.py +1 -0
- tico/experimental/quantization/public_interface.py +1 -1
- tico/experimental/quantization/quantizer.py +1 -1
- tico/passes/convert_matmul_to_linear.py +200 -0
- tico/passes/convert_to_relu6.py +1 -1
- tico/serialize/circle_serializer.py +11 -4
- tico/serialize/operators/op_mm.py +15 -132
- tico/utils/convert.py +6 -1
- {tico-0.1.0.dev250917.dist-info → tico-0.1.0.dev250921.dist-info}/METADATA +1 -1
- {tico-0.1.0.dev250917.dist-info → tico-0.1.0.dev250921.dist-info}/RECORD +25 -19
- {tico-0.1.0.dev250917.dist-info → tico-0.1.0.dev250921.dist-info}/LICENSE +0 -0
- {tico-0.1.0.dev250917.dist-info → tico-0.1.0.dev250921.dist-info}/WHEEL +0 -0
- {tico-0.1.0.dev250917.dist-info → tico-0.1.0.dev250921.dist-info}/entry_points.txt +0 -0
- {tico-0.1.0.dev250917.dist-info → tico-0.1.0.dev250921.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,200 @@
|
|
|
1
|
+
# Copyright (c) 2025 Samsung Electronics Co., Ltd. All Rights Reserved
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
from typing import List, Optional, TYPE_CHECKING
|
|
16
|
+
|
|
17
|
+
if TYPE_CHECKING:
|
|
18
|
+
import torch.fx
|
|
19
|
+
import torch
|
|
20
|
+
from torch._export.utils import is_buffer, is_lifted_tensor_constant, is_param
|
|
21
|
+
from torch.export import ExportedProgram
|
|
22
|
+
|
|
23
|
+
from tico.utils import logging
|
|
24
|
+
from tico.utils.graph import create_node
|
|
25
|
+
from tico.utils.passes import PassBase, PassResult
|
|
26
|
+
from tico.utils.trace_decorators import trace_graph_diff_on_pass
|
|
27
|
+
from tico.utils.validate_args_kwargs import MatmulArgs
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
class Converter: # type: ignore[empty-body]
|
|
31
|
+
def __init__(self):
|
|
32
|
+
super().__init__()
|
|
33
|
+
|
|
34
|
+
def match(self, exported_program, node) -> bool: # type: ignore[empty-body]
|
|
35
|
+
return False
|
|
36
|
+
|
|
37
|
+
def convert(self, exported_program, node) -> torch.fx.Node: # type: ignore[empty-body]
|
|
38
|
+
pass
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
class MatmulToLinearConverter(Converter):
|
|
42
|
+
def __init__(self):
|
|
43
|
+
super().__init__()
|
|
44
|
+
|
|
45
|
+
def convert(self, exported_program, node) -> torch.fx.Node:
|
|
46
|
+
graph_module = exported_program.graph_module
|
|
47
|
+
graph = graph_module.graph
|
|
48
|
+
|
|
49
|
+
mm_args = MatmulArgs(*node.args, **node.kwargs) # type: ignore[arg-type]
|
|
50
|
+
|
|
51
|
+
lhs = mm_args.input
|
|
52
|
+
rhs = mm_args.other
|
|
53
|
+
|
|
54
|
+
with graph.inserting_before(node):
|
|
55
|
+
transpose_node = create_node(
|
|
56
|
+
graph,
|
|
57
|
+
torch.ops.aten.permute.default,
|
|
58
|
+
args=(rhs, [1, 0]),
|
|
59
|
+
)
|
|
60
|
+
fc_node = create_node(
|
|
61
|
+
graph,
|
|
62
|
+
torch.ops.aten.linear.default,
|
|
63
|
+
args=(lhs, transpose_node),
|
|
64
|
+
)
|
|
65
|
+
node.replace_all_uses_with(fc_node, propagate_meta=True)
|
|
66
|
+
|
|
67
|
+
return fc_node
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
class RhsConstMatmulToLinearConverter(MatmulToLinearConverter):
|
|
71
|
+
def __init__(self):
|
|
72
|
+
super().__init__()
|
|
73
|
+
|
|
74
|
+
def match(self, exported_program, node) -> bool:
|
|
75
|
+
if not node.target == torch.ops.aten.mm.default:
|
|
76
|
+
return False
|
|
77
|
+
|
|
78
|
+
mm_args = MatmulArgs(*node.args, **node.kwargs) # type: ignore[arg-type]
|
|
79
|
+
|
|
80
|
+
rhs = mm_args.other
|
|
81
|
+
if isinstance(rhs, torch.fx.Node):
|
|
82
|
+
if is_lifted_tensor_constant(exported_program, rhs):
|
|
83
|
+
return True
|
|
84
|
+
elif is_param(exported_program, rhs):
|
|
85
|
+
return True
|
|
86
|
+
elif is_buffer(exported_program, rhs):
|
|
87
|
+
return True
|
|
88
|
+
else:
|
|
89
|
+
return False
|
|
90
|
+
return False
|
|
91
|
+
|
|
92
|
+
def convert(self, exported_program, node) -> torch.fx.Node:
|
|
93
|
+
return super().convert(exported_program, node)
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
class LhsConstMatmulToLinearConverter(MatmulToLinearConverter):
|
|
97
|
+
def __init__(self):
|
|
98
|
+
super().__init__()
|
|
99
|
+
|
|
100
|
+
def match(self, exported_program, node) -> bool:
|
|
101
|
+
if not node.target == torch.ops.aten.mm.default:
|
|
102
|
+
return False
|
|
103
|
+
|
|
104
|
+
mm_args = MatmulArgs(*node.args, **node.kwargs)
|
|
105
|
+
lhs = mm_args.input
|
|
106
|
+
if isinstance(lhs, torch.fx.Node):
|
|
107
|
+
if is_lifted_tensor_constant(exported_program, lhs):
|
|
108
|
+
return True
|
|
109
|
+
elif is_param(exported_program, lhs):
|
|
110
|
+
return True
|
|
111
|
+
elif is_buffer(exported_program, lhs):
|
|
112
|
+
return True
|
|
113
|
+
else:
|
|
114
|
+
return False
|
|
115
|
+
return False
|
|
116
|
+
|
|
117
|
+
def convert(self, exported_program, node) -> torch.fx.Node:
|
|
118
|
+
return super().convert(exported_program, node)
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
@trace_graph_diff_on_pass
|
|
122
|
+
class ConvertMatmulToLinear(PassBase):
|
|
123
|
+
"""
|
|
124
|
+
This pass converts matmul to linear selectively
|
|
125
|
+
|
|
126
|
+
How to select between `matmul` and `linear`?
|
|
127
|
+
|
|
128
|
+
* Linear has better quantization accuracy (NPU backend)
|
|
129
|
+
Due to ONE compiler's quantization policy;
|
|
130
|
+
FullyConnected(=Linear) uses per-channel quantization for weight and per-tensor for input.
|
|
131
|
+
BatchMatmul(=matmul) uses per-tensor quantization for both rhs and lhs.
|
|
132
|
+
|
|
133
|
+
* Matmul to Linear requires Transpose, which may harm latency
|
|
134
|
+
When RHS is constant, addtional transpose can be folded.
|
|
135
|
+
|
|
136
|
+
[RHS non-const case]
|
|
137
|
+
Constant folding cannot be performed.
|
|
138
|
+
|
|
139
|
+
lhs rhs (non-const)
|
|
140
|
+
| |
|
|
141
|
+
| transpose
|
|
142
|
+
| |
|
|
143
|
+
-- linear --
|
|
144
|
+
|
|
|
145
|
+
out
|
|
146
|
+
|
|
147
|
+
[RHS const case]
|
|
148
|
+
Constant folding can be performed to
|
|
149
|
+
|
|
150
|
+
lhs rhs (const) lh rhs (folded const)
|
|
151
|
+
| | | |
|
|
152
|
+
| transpose | |
|
|
153
|
+
| | | |
|
|
154
|
+
-- linear -- --> -- linear --
|
|
155
|
+
| |
|
|
156
|
+
out out
|
|
157
|
+
|
|
158
|
+
|
|
159
|
+
enable_lhs_const: If true, convert matmul where LHS is constant tensor. Default is False.
|
|
160
|
+
enable_rhs_const: If true, convert matmul where RHS is constant tensor. Default is True.
|
|
161
|
+
"""
|
|
162
|
+
|
|
163
|
+
def __init__(
|
|
164
|
+
self,
|
|
165
|
+
enable_lhs_const: Optional[bool] = False,
|
|
166
|
+
enable_rhs_const: Optional[bool] = True,
|
|
167
|
+
):
|
|
168
|
+
super().__init__()
|
|
169
|
+
self.converters: List[Converter] = []
|
|
170
|
+
if enable_lhs_const:
|
|
171
|
+
self.converters.append(LhsConstMatmulToLinearConverter())
|
|
172
|
+
if enable_rhs_const:
|
|
173
|
+
self.converters.append(RhsConstMatmulToLinearConverter())
|
|
174
|
+
|
|
175
|
+
def call(self, exported_program: ExportedProgram) -> PassResult:
|
|
176
|
+
logger = logging.getLogger(__name__)
|
|
177
|
+
|
|
178
|
+
graph_module = exported_program.graph_module
|
|
179
|
+
graph = graph_module.graph
|
|
180
|
+
modified = False
|
|
181
|
+
for node in graph.nodes:
|
|
182
|
+
if not node.op == "call_function":
|
|
183
|
+
continue
|
|
184
|
+
|
|
185
|
+
for converter in self.converters:
|
|
186
|
+
if not converter.match(exported_program, node):
|
|
187
|
+
continue
|
|
188
|
+
|
|
189
|
+
new_node = converter.convert(exported_program, node)
|
|
190
|
+
modified = True
|
|
191
|
+
logger.debug(
|
|
192
|
+
f"{node.name} is replaced with {new_node.name} operator (permute + linear)"
|
|
193
|
+
)
|
|
194
|
+
continue
|
|
195
|
+
|
|
196
|
+
graph.eliminate_dead_code()
|
|
197
|
+
graph.lint()
|
|
198
|
+
graph_module.recompile()
|
|
199
|
+
|
|
200
|
+
return PassResult(modified)
|
tico/passes/convert_to_relu6.py
CHANGED
|
@@ -20,6 +20,7 @@ import torch
|
|
|
20
20
|
from circle_schema import circle
|
|
21
21
|
from torch.export.exported_program import ConstantArgument, ExportedProgram, InputKind
|
|
22
22
|
|
|
23
|
+
from tico.config import CompileConfigBase, get_default_config
|
|
23
24
|
from tico.serialize.circle_mapping import to_circle_dtype, to_circle_shape
|
|
24
25
|
from tico.serialize.operators import *
|
|
25
26
|
from tico.serialize.circle_graph import CircleModel, CircleSubgraph
|
|
@@ -47,7 +48,9 @@ def _initialize_model() -> tuple[CircleModel, CircleSubgraph]:
|
|
|
47
48
|
return model, graph
|
|
48
49
|
|
|
49
50
|
|
|
50
|
-
def build_circle(
|
|
51
|
+
def build_circle(
|
|
52
|
+
ep: ExportedProgram, config: CompileConfigBase = get_default_config()
|
|
53
|
+
) -> bytes:
|
|
51
54
|
"""Convert ExportedProgram to Circle format.
|
|
52
55
|
|
|
53
56
|
Args:
|
|
@@ -68,9 +71,13 @@ def build_circle(ep: ExportedProgram) -> bytes:
|
|
|
68
71
|
for in_spec in ep.graph_signature.input_specs:
|
|
69
72
|
if in_spec.kind != InputKind.USER_INPUT:
|
|
70
73
|
continue
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
+
if isinstance(in_spec.arg, ConstantArgument):
|
|
75
|
+
# ConstantArgument is ignored when option is given
|
|
76
|
+
if config.get("remove_constant_input"):
|
|
77
|
+
continue
|
|
78
|
+
# NoneType ConstantArgument is ignored.
|
|
79
|
+
if in_spec.arg.value == None:
|
|
80
|
+
continue
|
|
74
81
|
arg_name = in_spec.arg.name
|
|
75
82
|
graph.add_input(arg_name)
|
|
76
83
|
logger.debug(f"Registered input: {arg_name}")
|
|
@@ -20,7 +20,7 @@ if TYPE_CHECKING:
|
|
|
20
20
|
import torch
|
|
21
21
|
from circle_schema import circle
|
|
22
22
|
|
|
23
|
-
from tico.serialize.circle_graph import CircleSubgraph
|
|
23
|
+
from tico.serialize.circle_graph import CircleSubgraph
|
|
24
24
|
from tico.serialize.operators.hashable_opcode import OpCode
|
|
25
25
|
from tico.serialize.operators.node_visitor import NodeVisitor, register_node_visitor
|
|
26
26
|
from tico.serialize.operators.utils import create_builtin_operator, get_op_index
|
|
@@ -28,9 +28,9 @@ from tico.utils.validate_args_kwargs import MatmulArgs
|
|
|
28
28
|
|
|
29
29
|
|
|
30
30
|
@register_node_visitor
|
|
31
|
-
class
|
|
31
|
+
class MatmulVisitor(NodeVisitor):
|
|
32
32
|
"""
|
|
33
|
-
Convert matmul to
|
|
33
|
+
Convert matmul to Circle BatchMatMul
|
|
34
34
|
"""
|
|
35
35
|
|
|
36
36
|
target: List[torch._ops.OpOverload] = [torch.ops.aten.mm.default]
|
|
@@ -38,131 +38,7 @@ class MatmulDefaultVisitor(NodeVisitor):
|
|
|
38
38
|
def __init__(self, op_codes: Dict[OpCode, int], graph: CircleSubgraph):
|
|
39
39
|
super().__init__(op_codes, graph)
|
|
40
40
|
|
|
41
|
-
|
|
42
|
-
def define_bmm_node(self, inputs, outputs) -> circle.Operator.OperatorT:
|
|
43
|
-
def set_bmm_option(operator):
|
|
44
|
-
operator.builtinOptionsType = (
|
|
45
|
-
circle.BuiltinOptions.BuiltinOptions.BatchMatMulOptions
|
|
46
|
-
)
|
|
47
|
-
option = circle.BatchMatMulOptions.BatchMatMulOptionsT()
|
|
48
|
-
option.adjointLhs, option.adjointRhs = False, False
|
|
49
|
-
option.asymmetricQuantizeInputs = False
|
|
50
|
-
operator.builtinOptions = option
|
|
51
|
-
|
|
52
|
-
op_index = get_op_index(
|
|
53
|
-
circle.BuiltinOperator.BuiltinOperator.BATCH_MATMUL, self._op_codes
|
|
54
|
-
)
|
|
55
|
-
operator = create_builtin_operator(self.graph, op_index, inputs, outputs)
|
|
56
|
-
set_bmm_option(operator)
|
|
57
|
-
|
|
58
|
-
return operator
|
|
59
|
-
|
|
60
|
-
def define_transpose_node(self, inputs, outputs) -> circle.Operator.OperatorT:
|
|
61
|
-
def set_transpose_option(operator):
|
|
62
|
-
operator.builtinOptionsType = (
|
|
63
|
-
circle.BuiltinOptions.BuiltinOptions.TransposeOptions
|
|
64
|
-
)
|
|
65
|
-
option = circle.TransposeOptions.TransposeOptionsT()
|
|
66
|
-
operator.builtinOptions = option
|
|
67
|
-
|
|
68
|
-
transpose_op_index = get_op_index(
|
|
69
|
-
circle.BuiltinOperator.BuiltinOperator.TRANSPOSE, self._op_codes
|
|
70
|
-
)
|
|
71
|
-
operator = create_builtin_operator(
|
|
72
|
-
self.graph, transpose_op_index, inputs, outputs
|
|
73
|
-
)
|
|
74
|
-
set_transpose_option(operator)
|
|
75
|
-
return operator
|
|
76
|
-
|
|
77
|
-
def define_fc_node(self, inputs, outputs) -> circle.Operator.OperatorT:
|
|
78
|
-
def set_fc_option(operator):
|
|
79
|
-
operator.builtinOptionsType = (
|
|
80
|
-
circle.BuiltinOptions.BuiltinOptions.FullyConnectedOptions
|
|
81
|
-
)
|
|
82
|
-
option = circle.FullyConnectedOptions.FullyConnectedOptionsT()
|
|
83
|
-
|
|
84
|
-
option.fusedActivationFunction = (
|
|
85
|
-
circle.ActivationFunctionType.ActivationFunctionType.NONE
|
|
86
|
-
)
|
|
87
|
-
option.weightsFormat = (
|
|
88
|
-
circle.FullyConnectedOptionsWeightsFormat.FullyConnectedOptionsWeightsFormat.DEFAULT
|
|
89
|
-
)
|
|
90
|
-
option.keepNumDims = False
|
|
91
|
-
option.asymmetricQuantizeInputs = False
|
|
92
|
-
option.quantizedBiasType = circle.TensorType.TensorType.FLOAT32
|
|
93
|
-
|
|
94
|
-
operator.builtinOptions = option
|
|
95
|
-
|
|
96
|
-
fc_op_index = get_op_index(
|
|
97
|
-
circle.BuiltinOperator.BuiltinOperator.FULLY_CONNECTED, self._op_codes
|
|
98
|
-
)
|
|
99
|
-
operator = create_builtin_operator(self.graph, fc_op_index, inputs, outputs)
|
|
100
|
-
set_fc_option(operator)
|
|
101
|
-
return operator
|
|
102
|
-
|
|
103
|
-
"""
|
|
104
|
-
Define FullyConnnected with Tranpose operator.
|
|
105
|
-
Note that those sets of operators are equivalent.
|
|
106
|
-
(1) Matmul
|
|
107
|
-
matmul( lhs[H, K], rhs[K, W'] ) -> output(H, W')
|
|
108
|
-
|
|
109
|
-
(2) Transpose + FullyConneccted
|
|
110
|
-
transpose( rhs[K, W'] ) -> trs_output[W', K]
|
|
111
|
-
fullyconnected( lhs[H, K], trs_output[W', K] ) -> output(H, W')
|
|
112
|
-
"""
|
|
113
|
-
|
|
114
|
-
def define_fc_with_transpose(
|
|
115
|
-
self, node, inputs, outputs
|
|
116
|
-
) -> circle.Operator.OperatorT:
|
|
117
|
-
lhs, rhs = inputs
|
|
118
|
-
|
|
119
|
-
# get transpose shape
|
|
120
|
-
rhs_tid: int = self.graph.get_tid_registered(rhs)
|
|
121
|
-
rhs_tensor: circle.Tensor.TensorT = self.graph.tensors[rhs_tid]
|
|
122
|
-
rhs_name: str = rhs.name
|
|
123
|
-
rhs_type: int = rhs_tensor.type
|
|
124
|
-
rhs_shape: List[int] = rhs_tensor.shape
|
|
125
|
-
assert len(rhs_shape) == 2, len(rhs_shape)
|
|
126
|
-
rhs_shape_transpose = [rhs_shape[1], rhs_shape[0]]
|
|
127
|
-
|
|
128
|
-
# create transpose output tensor
|
|
129
|
-
trs_output = self.graph.add_tensor_from_scratch(
|
|
130
|
-
prefix=f"{rhs_name}_transposed_output",
|
|
131
|
-
shape=rhs_shape_transpose,
|
|
132
|
-
shape_signature=None,
|
|
133
|
-
dtype=rhs_type,
|
|
134
|
-
source_node=node,
|
|
135
|
-
)
|
|
136
|
-
trs_perm = self.graph.add_const_tensor(data=[1, 0], source_node=node)
|
|
137
|
-
trs_operator = self.define_transpose_node([rhs, trs_perm], [trs_output])
|
|
138
|
-
self.graph.add_operator(trs_operator)
|
|
139
|
-
|
|
140
|
-
# define fc node
|
|
141
|
-
fc_input = lhs
|
|
142
|
-
fc_weight = trs_output
|
|
143
|
-
fc_shape = [fc_weight.shape[0]]
|
|
144
|
-
fc_bias = self.graph.add_const_tensor(
|
|
145
|
-
data=[0.0] * fc_shape[0], source_node=node
|
|
146
|
-
)
|
|
147
|
-
|
|
148
|
-
operator = self.define_fc_node([fc_input, fc_weight, fc_bias], outputs)
|
|
149
|
-
|
|
150
|
-
return operator
|
|
151
|
-
|
|
152
|
-
def define_node(
|
|
153
|
-
self, node: torch.fx.Node, prior_latency=True
|
|
154
|
-
) -> circle.Operator.OperatorT:
|
|
155
|
-
"""
|
|
156
|
-
NOTE: Possibility of accuracy-latency trade-off
|
|
157
|
-
From ONE compiler's perspective:
|
|
158
|
-
- BMM uses per-tensor quantization for both rhs and lhs.
|
|
159
|
-
- FC uses per-channel quantization for weight and per-tensor for input.
|
|
160
|
-
Thus, FC is better in terms of accuracy.
|
|
161
|
-
FC necessarily involves an additional transpose operation to be identical with mm.
|
|
162
|
-
If transposed operand is const, it can be optimized by constant folding.
|
|
163
|
-
Thus, convert FC only if tranpose can be folded.
|
|
164
|
-
TODO set prior_latency outside
|
|
165
|
-
"""
|
|
41
|
+
def define_node(self, node: torch.fx.Node) -> circle.Operator.OperatorT:
|
|
166
42
|
args = MatmulArgs(*node.args, **node.kwargs) # type: ignore[arg-type]
|
|
167
43
|
input = args.input
|
|
168
44
|
other = args.other
|
|
@@ -170,9 +46,16 @@ class MatmulDefaultVisitor(NodeVisitor):
|
|
|
170
46
|
inputs = [input, other]
|
|
171
47
|
outputs = [node]
|
|
172
48
|
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
49
|
+
op_index = get_op_index(
|
|
50
|
+
circle.BuiltinOperator.BuiltinOperator.BATCH_MATMUL, self._op_codes
|
|
51
|
+
)
|
|
52
|
+
operator = create_builtin_operator(self.graph, op_index, inputs, outputs)
|
|
53
|
+
operator.builtinOptionsType = (
|
|
54
|
+
circle.BuiltinOptions.BuiltinOptions.BatchMatMulOptions
|
|
55
|
+
)
|
|
56
|
+
option = circle.BatchMatMulOptions.BatchMatMulOptionsT()
|
|
57
|
+
option.adjointLhs, option.adjointRhs = False, False
|
|
58
|
+
option.asymmetricQuantizeInputs = False
|
|
59
|
+
operator.builtinOptions = option
|
|
177
60
|
|
|
178
61
|
return operator
|
tico/utils/convert.py
CHANGED
|
@@ -40,6 +40,7 @@ from tico.passes.cast_mixed_type_args import CastMixedTypeArgs
|
|
|
40
40
|
from tico.passes.const_prop_pass import ConstPropPass
|
|
41
41
|
from tico.passes.convert_conv1d_to_conv2d import ConvertConv1dToConv2d
|
|
42
42
|
from tico.passes.convert_layout_op_to_reshape import ConvertLayoutOpToReshape
|
|
43
|
+
from tico.passes.convert_matmul_to_linear import ConvertMatmulToLinear
|
|
43
44
|
from tico.passes.convert_repeat_to_expand_copy import ConvertRepeatToExpandCopy
|
|
44
45
|
from tico.passes.convert_to_relu6 import ConvertToReLU6
|
|
45
46
|
from tico.passes.decompose_addmm import DecomposeAddmm
|
|
@@ -249,6 +250,10 @@ def convert_exported_module_to_circle(
|
|
|
249
250
|
ConstPropPass(),
|
|
250
251
|
SegmentIndexSelectConst(),
|
|
251
252
|
LegalizeCausalMaskValue(enabled=config.get("legalize_causal_mask_value")),
|
|
253
|
+
ConvertMatmulToLinear(
|
|
254
|
+
enable_lhs_const=config.get("convert_lhs_const_mm_to_fc"),
|
|
255
|
+
enable_rhs_const=config.get("convert_rhs_const_mm_to_fc"),
|
|
256
|
+
),
|
|
252
257
|
LowerToResizeNearestNeighbor(),
|
|
253
258
|
LegalizePreDefinedLayoutOperators(),
|
|
254
259
|
LowerPow2ToMul(),
|
|
@@ -287,7 +292,7 @@ def convert_exported_module_to_circle(
|
|
|
287
292
|
|
|
288
293
|
check_unsupported_target(exported_program)
|
|
289
294
|
check_training_ops(exported_program)
|
|
290
|
-
circle_program = build_circle(exported_program)
|
|
295
|
+
circle_program = build_circle(exported_program, config)
|
|
291
296
|
|
|
292
297
|
return circle_program
|
|
293
298
|
|
|
@@ -1,19 +1,18 @@
|
|
|
1
|
-
tico/__init__.py,sha256=
|
|
1
|
+
tico/__init__.py,sha256=SJrnDNsVJlIf-r1ZVzi2Kj_xI68YVwAjm83FWgbiWLE,1883
|
|
2
2
|
tico/pt2_to_circle.py,sha256=gu3MD4Iqc0zMZcCZ2IT8oGbyj21CTSbT3Rgd9s2B_9A,2767
|
|
3
3
|
tico/config/__init__.py,sha256=xZzCXjZ84qE-CsBi-dfaL05bqpQ3stKKfTXhnrJRyVs,142
|
|
4
4
|
tico/config/base.py,sha256=q5xMqGxTUZs4mFqt5c7i_y9U00fYgdMGl9nUqIVMlCo,1248
|
|
5
5
|
tico/config/factory.py,sha256=il0zqB6Lm5NX2LnG-TUhmiP9vVeZ_3TucJMorVZIodY,1324
|
|
6
|
-
tico/config/v1.py,sha256=
|
|
6
|
+
tico/config/v1.py,sha256=AVgOck-HxR1R1FZPVjtN5J82hPLJvUxwzbnyWXIQZWE,1237
|
|
7
7
|
tico/experimental/__init__.py,sha256=IO6FP_xYbGy0dW0HL26GXD3ouxARaxCK7bz9dn4blPQ,26
|
|
8
8
|
tico/experimental/quantization/__init__.py,sha256=IaJPZegVJp0P3luutBo907Kp5sOJensE1Mm-XBG_jBs,122
|
|
9
|
-
tico/experimental/quantization/
|
|
10
|
-
tico/experimental/quantization/
|
|
11
|
-
tico/experimental/quantization/quantizer.py,sha256=_2pDtWFKDCuKfYF2bptOwIYsa0VFNFM1ZNgi8_OGvHM,2365
|
|
9
|
+
tico/experimental/quantization/public_interface.py,sha256=y-iwaeuedBvHwTh5hflQg4u2ZCdqf46IlTl9ntHq8pU,4425
|
|
10
|
+
tico/experimental/quantization/quantizer.py,sha256=pDTQGzR-BcQJeGZ7O4cXRQdCme4q_POpxHetwnv0bYg,2370
|
|
12
11
|
tico/experimental/quantization/algorithm/__init__.py,sha256=IO6FP_xYbGy0dW0HL26GXD3ouxARaxCK7bz9dn4blPQ,26
|
|
13
12
|
tico/experimental/quantization/algorithm/gptq/__init__.py,sha256=IO6FP_xYbGy0dW0HL26GXD3ouxARaxCK7bz9dn4blPQ,26
|
|
14
13
|
tico/experimental/quantization/algorithm/gptq/gptq.py,sha256=Qn9b_2ki7B64DcVEY25NMkww3PdZ5EqYQQXfYhNDQ6I,5555
|
|
15
14
|
tico/experimental/quantization/algorithm/gptq/quant.py,sha256=Rl4wAOCmlE0U09BtNCDbccaSNohRHCNLwFi3zCqZfNo,5127
|
|
16
|
-
tico/experimental/quantization/algorithm/gptq/quantizer.py,sha256=
|
|
15
|
+
tico/experimental/quantization/algorithm/gptq/quantizer.py,sha256=ZKeQQWm6eMUyRgntQxVR-QVjxJOc2pW4Dc_mrEPZA64,11686
|
|
17
16
|
tico/experimental/quantization/algorithm/gptq/utils.py,sha256=leGKayf-xbSjVwwAGTA5RsxUKrhDiklOQdlsLifjdrs,1811
|
|
18
17
|
tico/experimental/quantization/algorithm/pt2e/__init__.py,sha256=IO6FP_xYbGy0dW0HL26GXD3ouxARaxCK7bz9dn4blPQ,26
|
|
19
18
|
tico/experimental/quantization/algorithm/pt2e/quantizer.py,sha256=mdTvsG87bo8fu0GaWqSM8iBCs-4f4EfUlVtk-Ko6M34,2546
|
|
@@ -38,8 +37,13 @@ tico/experimental/quantization/algorithm/pt2e/transformation/__init__.py,sha256=
|
|
|
38
37
|
tico/experimental/quantization/algorithm/pt2e/transformation/convert_scalars_to_attrs.py,sha256=Idtoya2RcGKlgUJgC9WqNz0jH3gf6ViuPmsD9ySHbls,2253
|
|
39
38
|
tico/experimental/quantization/algorithm/smoothquant/__init__.py,sha256=IO6FP_xYbGy0dW0HL26GXD3ouxARaxCK7bz9dn4blPQ,26
|
|
40
39
|
tico/experimental/quantization/algorithm/smoothquant/observer.py,sha256=OWBKQ3ox6PqeqgevxOjpXvb7uApoqE4YbUBelGhVSN8,3435
|
|
41
|
-
tico/experimental/quantization/algorithm/smoothquant/quantizer.py,sha256=
|
|
40
|
+
tico/experimental/quantization/algorithm/smoothquant/quantizer.py,sha256=14-QrKAW-Rw6pIbbNaD5eORcH2fqi40-TNFGaWVakIg,3649
|
|
42
41
|
tico/experimental/quantization/algorithm/smoothquant/smooth_quant.py,sha256=fxCy4m-BsSjraciSVPFlPhgsOT46RjrOgczQGb7B9TA,11561
|
|
42
|
+
tico/experimental/quantization/config/__init__.py,sha256=IO6FP_xYbGy0dW0HL26GXD3ouxARaxCK7bz9dn4blPQ,26
|
|
43
|
+
tico/experimental/quantization/config/base.py,sha256=xg_HCDSuMgYvMd6ENZe4Sm2SYJgMaCBj4cmqaz_lhAs,816
|
|
44
|
+
tico/experimental/quantization/config/gptq.py,sha256=IUIEz5bLhsTXqoBCE1rfPec99zsRjwgpDbPW5YJqOPg,973
|
|
45
|
+
tico/experimental/quantization/config/pt2e.py,sha256=9HCrraTGGZeKEN9puKV-ODi7ncV2Wjc3oe_JCO1D_Rs,850
|
|
46
|
+
tico/experimental/quantization/config/smoothquant.py,sha256=fcyhu3YlOTM7fDW9lGTXh-uJOUD6CeykZj7AMCNVbak,1415
|
|
43
47
|
tico/experimental/quantization/evaluation/__init__.py,sha256=IO6FP_xYbGy0dW0HL26GXD3ouxARaxCK7bz9dn4blPQ,26
|
|
44
48
|
tico/experimental/quantization/evaluation/backend.py,sha256=CZL9rZOA0t8cH7PHp6u9l7dGqWNvTj9bKOvwo0PVul0,692
|
|
45
49
|
tico/experimental/quantization/evaluation/evaluate.py,sha256=kfa_GvFaX6DoSTAmuCImMJqF2jgqtnor5UpC7wVmGPI,7877
|
|
@@ -68,7 +72,7 @@ tico/experimental/quantization/ptq/examples/quantize_linear.py,sha256=8zq-ZJDYga
|
|
|
68
72
|
tico/experimental/quantization/ptq/examples/quantize_llama_attn.py,sha256=cVWUSSzaZWFp5QZkNkrlpHU3kXyP84QtnZbahVml_yQ,4329
|
|
69
73
|
tico/experimental/quantization/ptq/examples/quantize_llama_decoder_layer.py,sha256=mBWrjkyEovYQsPC4Rrsri6Pm1rlFmDb3NiP0DQQhFyM,5751
|
|
70
74
|
tico/experimental/quantization/ptq/examples/quantize_llama_mlp.py,sha256=N1qZQgt1S-xZrdv-PW7OfXEcv0gsO2q9faOF4aD-zKo,4147
|
|
71
|
-
tico/experimental/quantization/ptq/examples/quantize_with_gptq.py,sha256=
|
|
75
|
+
tico/experimental/quantization/ptq/examples/quantize_with_gptq.py,sha256=y-SK56j4wL-9j-0jtuOqQUq4CElZtGOETp-Tg4XivUI,10438
|
|
72
76
|
tico/experimental/quantization/ptq/observers/__init__.py,sha256=IO6FP_xYbGy0dW0HL26GXD3ouxARaxCK7bz9dn4blPQ,26
|
|
73
77
|
tico/experimental/quantization/ptq/observers/affine_base.py,sha256=e2Eba64nrxKQyE4F_WJ7WTSsk3xe6bkdGUKaoLFWGFw,4638
|
|
74
78
|
tico/experimental/quantization/ptq/observers/base.py,sha256=Wons1MzpqK1mfcy-ppl-B2Dum0edXg2dWW2Lw3V18tw,3280
|
|
@@ -84,8 +88,9 @@ tico/experimental/quantization/ptq/wrappers/__init__.py,sha256=IO6FP_xYbGy0dW0HL
|
|
|
84
88
|
tico/experimental/quantization/ptq/wrappers/ptq_wrapper.py,sha256=F9sK_DiRaXiGNHULcwIbs5EUtHz6ZJ7N4r5CWTTfhsM,2442
|
|
85
89
|
tico/experimental/quantization/ptq/wrappers/quant_elementwise.py,sha256=LhEoobfvto6zKrBOKL4gmxfFFc31jHzyQV_zfps-iQM,3604
|
|
86
90
|
tico/experimental/quantization/ptq/wrappers/quant_module_base.py,sha256=vkcDos_knGSS29rIZuEIWkAJLHrENbGz8nCH2-iara8,5969
|
|
87
|
-
tico/experimental/quantization/ptq/wrappers/registry.py,sha256=
|
|
91
|
+
tico/experimental/quantization/ptq/wrappers/registry.py,sha256=OVO5nev6J8Br9zsIX-Ut7ZgWzA9f_jk0Np9bGioXgQM,5171
|
|
88
92
|
tico/experimental/quantization/ptq/wrappers/fairseq/__init__.py,sha256=Mc8FLd9DusyB_IT1vk1OYrRkngOYnYd05IvtA9ORVQc,160
|
|
93
|
+
tico/experimental/quantization/ptq/wrappers/fairseq/quant_decoder_layer.py,sha256=JT79shxOhDtRFgm8jrrN6HKvyVotiytLjMjAxX-Cztg,20416
|
|
89
94
|
tico/experimental/quantization/ptq/wrappers/fairseq/quant_encoder.py,sha256=r9DPUAbL2KRJ8zpMJ39Y9n6Oe79nte-mFcdjG2qEP-w,13809
|
|
90
95
|
tico/experimental/quantization/ptq/wrappers/fairseq/quant_encoder_layer.py,sha256=aGr80Ku75j2H-UZ0elEa0mOQEyaAs2YJ4WJCN0lonn0,6412
|
|
91
96
|
tico/experimental/quantization/ptq/wrappers/fairseq/quant_mha.py,sha256=HsigmOLeacLXc46QNeFqwQ0DwKQhNrtWTKEtLJoqXoc,15562
|
|
@@ -107,8 +112,9 @@ tico/passes/cast_mixed_type_args.py,sha256=Wd3sCDKJZwdb8GiMWKljm8X5CLFRd8eCz-dmW
|
|
|
107
112
|
tico/passes/const_prop_pass.py,sha256=hDxGgJNiRjsgOArdaoeAOcOOA-nKBvA1W1zcMZQA5yg,11531
|
|
108
113
|
tico/passes/convert_conv1d_to_conv2d.py,sha256=ktS3h158y9rg1sQiW8BZZbflV_dk_UdjBPQnuiOKyzg,5303
|
|
109
114
|
tico/passes/convert_layout_op_to_reshape.py,sha256=sCAFjkmVtiKjvDQSAgnjNBHl3_hWXJZElGDXQiTH-7s,2963
|
|
115
|
+
tico/passes/convert_matmul_to_linear.py,sha256=Y_Me8YqrNumfMrB08WT4wwAoKIfKNak5y8Y10ekWe5s,6611
|
|
110
116
|
tico/passes/convert_repeat_to_expand_copy.py,sha256=JbtFTmWyfJS2SSd_higP1IEhQeh7wHdN5dmTbbiFVCs,3237
|
|
111
|
-
tico/passes/convert_to_relu6.py,sha256=
|
|
117
|
+
tico/passes/convert_to_relu6.py,sha256=9B6OLyF72tMvD-ugV7aBx6l1szwERufNBUaX34pkZ4c,6445
|
|
112
118
|
tico/passes/decompose_addmm.py,sha256=KjnpZjSuA0uvNmKaTN_EMwobcOi3CAB81buORzTDxro,3979
|
|
113
119
|
tico/passes/decompose_batch_norm.py,sha256=06LAxhSmpTxFZJmUelwB3I_GipNWrLoM7PfM6ZkxOZY,6512
|
|
114
120
|
tico/passes/decompose_fake_quantize.py,sha256=736srs8SM8K_mLR0WG10LVMMLRkYkBM9OF0k1GCkAW0,5218
|
|
@@ -139,7 +145,7 @@ tico/passes/segment_index_select.py,sha256=VVCKNLtYRkr9n5lGnlzEuQsQ0WVxEYXGchFrD
|
|
|
139
145
|
tico/serialize/__init__.py,sha256=IO6FP_xYbGy0dW0HL26GXD3ouxARaxCK7bz9dn4blPQ,26
|
|
140
146
|
tico/serialize/circle_graph.py,sha256=qvyul_HULoz7B_6RFKQ8s9RjEvMgPq-ynMVkZe8aqE4,12034
|
|
141
147
|
tico/serialize/circle_mapping.py,sha256=c__AIHPi23lPugNJFolgMAKrw8j7gEeMaUQ1LAMSFnY,8542
|
|
142
|
-
tico/serialize/circle_serializer.py,sha256=
|
|
148
|
+
tico/serialize/circle_serializer.py,sha256=tw2xwm8tRjaFzZdaaS8Fa8Jfqz0r7Gn8L6D66m0QA0g,11228
|
|
143
149
|
tico/serialize/pack.py,sha256=5HZ9kX3x6C6CyT_FWS6FRmvx_P7Dx21orjUNQxJ2xlo,1297
|
|
144
150
|
tico/serialize/quant_param.py,sha256=6nbGKdqwMI9Cx9BLXJ9A9JU4qb770S8vTM1vCZRX3Eo,1342
|
|
145
151
|
tico/serialize/operators/__init__.py,sha256=LIvXsNnN4yUCS2CGNQ5XW8p8oXDTV_WHWuOEAw1t6WY,990
|
|
@@ -190,7 +196,7 @@ tico/serialize/operators/op_max_pool2d_with_indices.py,sha256=i4iKZ262ytDKUt7bG9
|
|
|
190
196
|
tico/serialize/operators/op_maximum.py,sha256=JjBr6gWEnuakLuk1_feotTHfIIm3s5YqWmqhUMpSPI0,1873
|
|
191
197
|
tico/serialize/operators/op_mean.py,sha256=rVQZOxCJkHFY4kQBAS1HVK0HkcqxgkSy6zvEDLX_WYQ,2267
|
|
192
198
|
tico/serialize/operators/op_minimum.py,sha256=fASjQVcTPCin02umQwFPdq2ss-Ve7S5A33J3QmmQ_wQ,1873
|
|
193
|
-
tico/serialize/operators/op_mm.py,sha256=
|
|
199
|
+
tico/serialize/operators/op_mm.py,sha256=VJJRLLYn9zAMcR2rsb86o809edyRJ7CW31waAL0ZXeI,2244
|
|
194
200
|
tico/serialize/operators/op_mul.py,sha256=si_VdYNyFbULb50SnXHOINh0dZQ2PhRB6Fzl54ZBj5Y,3049
|
|
195
201
|
tico/serialize/operators/op_ne.py,sha256=xa2WJL2tYksxw7fIJic_D9ltLEseyCII8HpR32Oq8Do,1900
|
|
196
202
|
tico/serialize/operators/op_neg.py,sha256=fkI3ExyD3QF-qtxBcXqQutPNDbNL8g7lZYE7CyD2wLk,2046
|
|
@@ -228,7 +234,7 @@ tico/serialize/operators/utils.py,sha256=lXGpEJW1h8U_-gfc6EWjvvSiq3yJ9P-v1v3EMRT
|
|
|
228
234
|
tico/serialize/operators/adapters/__init__.py,sha256=IO6FP_xYbGy0dW0HL26GXD3ouxARaxCK7bz9dn4blPQ,26
|
|
229
235
|
tico/serialize/operators/adapters/llama_rmsnorm.py,sha256=6t3dhfNpR03eIjsmhymF2JKd6lCf7PvInqMf77c_BOE,1139
|
|
230
236
|
tico/utils/__init__.py,sha256=IO6FP_xYbGy0dW0HL26GXD3ouxARaxCK7bz9dn4blPQ,26
|
|
231
|
-
tico/utils/convert.py,sha256=
|
|
237
|
+
tico/utils/convert.py,sha256=bgk-a_gdRrrcDFFQHS_ElPdzORmfAZAgNendfzEpHOk,13501
|
|
232
238
|
tico/utils/define.py,sha256=Ypgp7YffM4pgPl4Zh6TmogSn1OxGBMRw_e09qYGflZk,1467
|
|
233
239
|
tico/utils/diff_graph.py,sha256=_eDGGPDPYQD4b--MXX0DLoVgSt_wLfNPt47UlolLLR4,5272
|
|
234
240
|
tico/utils/dtype.py,sha256=L5Qb7qgbt0eQ5frUTvHYrRtTJb1dg4-JNEopcxCNg1U,1389
|
|
@@ -252,9 +258,9 @@ tico/utils/mx/__init__.py,sha256=IO6FP_xYbGy0dW0HL26GXD3ouxARaxCK7bz9dn4blPQ,26
|
|
|
252
258
|
tico/utils/mx/elemwise_ops.py,sha256=V6glyAHsVR1joqpsgnNytatCD_ew92xNWZ19UFDoMTA,10281
|
|
253
259
|
tico/utils/mx/formats.py,sha256=uzNWyu-1onUlwQfX5cZ6fZSUfHMRqorper7_T1k3jfk,3404
|
|
254
260
|
tico/utils/mx/mx_ops.py,sha256=RcfUTYVi-wilGB2sC35OeARdwDqnixv7dG5iyZ-fQT8,8555
|
|
255
|
-
tico-0.1.0.
|
|
256
|
-
tico-0.1.0.
|
|
257
|
-
tico-0.1.0.
|
|
258
|
-
tico-0.1.0.
|
|
259
|
-
tico-0.1.0.
|
|
260
|
-
tico-0.1.0.
|
|
261
|
+
tico-0.1.0.dev250921.dist-info/LICENSE,sha256=kp4JLII7bzRhPb0CPD5XTDZMh22BQ7h3k3B7t8TiSbw,12644
|
|
262
|
+
tico-0.1.0.dev250921.dist-info/METADATA,sha256=PKokhTsAtNxesEROg_vhfa6pIcl8WyFzlx-5H7RBcGk,8450
|
|
263
|
+
tico-0.1.0.dev250921.dist-info/WHEEL,sha256=G16H4A3IeoQmnOrYV4ueZGKSjhipXx8zc8nu9FGlvMA,92
|
|
264
|
+
tico-0.1.0.dev250921.dist-info/entry_points.txt,sha256=kBKYSS_IYrSXmUYevmmepqIVPScq5vF8ulQRu3I_Zf0,59
|
|
265
|
+
tico-0.1.0.dev250921.dist-info/top_level.txt,sha256=oqs7UPoNSKZEwqsX8B-KAWdQwfAa7i60pbxW_Jk7P3w,5
|
|
266
|
+
tico-0.1.0.dev250921.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|