tico 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (206) hide show
  1. tico/__init__.py +42 -0
  2. tico/config/__init__.py +4 -0
  3. tico/config/base.py +37 -0
  4. tico/config/factory.py +41 -0
  5. tico/config/v1.py +35 -0
  6. tico/experimental/__init__.py +1 -0
  7. tico/experimental/quantization/__init__.py +1 -0
  8. tico/experimental/quantization/algorithm/__init__.py +1 -0
  9. tico/experimental/quantization/algorithm/gptq/__init__.py +1 -0
  10. tico/experimental/quantization/algorithm/gptq/gptq.py +172 -0
  11. tico/experimental/quantization/algorithm/gptq/quant.py +153 -0
  12. tico/experimental/quantization/algorithm/gptq/quantizer.py +225 -0
  13. tico/experimental/quantization/algorithm/gptq/utils.py +65 -0
  14. tico/experimental/quantization/algorithm/pt2e/__init__.py +1 -0
  15. tico/experimental/quantization/algorithm/pt2e/annotation/__init__.py +1 -0
  16. tico/experimental/quantization/algorithm/pt2e/annotation/annotator.py +215 -0
  17. tico/experimental/quantization/algorithm/pt2e/annotation/config.py +26 -0
  18. tico/experimental/quantization/algorithm/pt2e/annotation/op/__init__.py +21 -0
  19. tico/experimental/quantization/algorithm/pt2e/annotation/op/adaptive_avg_pool2d.py +65 -0
  20. tico/experimental/quantization/algorithm/pt2e/annotation/op/add.py +57 -0
  21. tico/experimental/quantization/algorithm/pt2e/annotation/op/conv2d.py +92 -0
  22. tico/experimental/quantization/algorithm/pt2e/annotation/op/div.py +57 -0
  23. tico/experimental/quantization/algorithm/pt2e/annotation/op/linear.py +94 -0
  24. tico/experimental/quantization/algorithm/pt2e/annotation/op/mean.py +53 -0
  25. tico/experimental/quantization/algorithm/pt2e/annotation/op/mul.py +57 -0
  26. tico/experimental/quantization/algorithm/pt2e/annotation/op/relu6.py +53 -0
  27. tico/experimental/quantization/algorithm/pt2e/annotation/op/rsqrt.py +53 -0
  28. tico/experimental/quantization/algorithm/pt2e/annotation/op/sub.py +57 -0
  29. tico/experimental/quantization/algorithm/pt2e/annotation/spec.py +47 -0
  30. tico/experimental/quantization/algorithm/pt2e/annotation/utils.py +88 -0
  31. tico/experimental/quantization/algorithm/pt2e/quantizer.py +78 -0
  32. tico/experimental/quantization/algorithm/pt2e/transformation/__init__.py +1 -0
  33. tico/experimental/quantization/algorithm/pt2e/transformation/convert_scalars_to_attrs.py +58 -0
  34. tico/experimental/quantization/algorithm/pt2e/utils.py +138 -0
  35. tico/experimental/quantization/algorithm/smoothquant/__init__.py +1 -0
  36. tico/experimental/quantization/algorithm/smoothquant/observer.py +78 -0
  37. tico/experimental/quantization/algorithm/smoothquant/quantizer.py +81 -0
  38. tico/experimental/quantization/algorithm/smoothquant/smooth_quant.py +164 -0
  39. tico/experimental/quantization/config.py +68 -0
  40. tico/experimental/quantization/evaluation/__init__.py +1 -0
  41. tico/experimental/quantization/evaluation/backend.py +20 -0
  42. tico/experimental/quantization/evaluation/evaluate.py +223 -0
  43. tico/experimental/quantization/evaluation/executor/__init__.py +1 -0
  44. tico/experimental/quantization/evaluation/executor/backend_executor.py +54 -0
  45. tico/experimental/quantization/evaluation/executor/circle_executor.py +75 -0
  46. tico/experimental/quantization/evaluation/executor/triv24_executor.py +128 -0
  47. tico/experimental/quantization/evaluation/metric.py +109 -0
  48. tico/experimental/quantization/evaluation/utils.py +185 -0
  49. tico/experimental/quantization/passes/__init__.py +1 -0
  50. tico/experimental/quantization/passes/fold_quant_ops.py +154 -0
  51. tico/experimental/quantization/passes/insert_quantize_on_dtype_mismatch.py +345 -0
  52. tico/experimental/quantization/passes/propagate_qparam_backward.py +91 -0
  53. tico/experimental/quantization/passes/propagate_qparam_forward.py +141 -0
  54. tico/experimental/quantization/passes/quantize_bias.py +123 -0
  55. tico/experimental/quantization/passes/remove_weight_dequant_op.py +177 -0
  56. tico/experimental/quantization/public_interface.py +108 -0
  57. tico/experimental/quantization/quantizer.py +71 -0
  58. tico/interpreter/__init__.py +1 -0
  59. tico/interpreter/infer.py +116 -0
  60. tico/interpreter/interpreter.py +93 -0
  61. tico/passes/__init__.py +1 -0
  62. tico/passes/cast_aten_where_arg_type.py +191 -0
  63. tico/passes/cast_mixed_type_args.py +187 -0
  64. tico/passes/const_prop_pass.py +307 -0
  65. tico/passes/convert_conv1d_to_conv2d.py +160 -0
  66. tico/passes/convert_layout_op_to_reshape.py +85 -0
  67. tico/passes/convert_repeat_to_expand_copy.py +89 -0
  68. tico/passes/convert_to_relu6.py +181 -0
  69. tico/passes/decompose_addmm.py +124 -0
  70. tico/passes/decompose_batch_norm.py +192 -0
  71. tico/passes/decompose_fake_quantize.py +134 -0
  72. tico/passes/decompose_fake_quantize_tensor_qparams.py +294 -0
  73. tico/passes/decompose_group_norm.py +275 -0
  74. tico/passes/decompose_grouped_conv2d.py +209 -0
  75. tico/passes/decompose_slice_scatter.py +169 -0
  76. tico/passes/extract_dtype_kwargs.py +122 -0
  77. tico/passes/fill_meta_val.py +57 -0
  78. tico/passes/fuse_leading_unsqueeze_reshape.py +112 -0
  79. tico/passes/fuse_redundant_reshape_to_mean.py +102 -0
  80. tico/passes/legalize_causal_mask_value.py +108 -0
  81. tico/passes/legalize_predefined_layout_operators.py +386 -0
  82. tico/passes/lower_pow2_to_mul.py +75 -0
  83. tico/passes/lower_to_resize_nearest_neighbor.py +235 -0
  84. tico/passes/lower_to_slice.py +230 -0
  85. tico/passes/merge_consecutive_cat.py +80 -0
  86. tico/passes/ops.py +78 -0
  87. tico/passes/remove_nop.py +84 -0
  88. tico/passes/remove_redundant_assert_nodes.py +51 -0
  89. tico/passes/remove_redundant_expand.py +66 -0
  90. tico/passes/remove_redundant_permute.py +122 -0
  91. tico/passes/remove_redundant_reshape.py +436 -0
  92. tico/passes/remove_redundant_slice.py +62 -0
  93. tico/passes/remove_redundant_to_copy.py +86 -0
  94. tico/passes/restore_linear.py +115 -0
  95. tico/passes/segment_index_select.py +145 -0
  96. tico/pt2_to_circle.py +105 -0
  97. tico/serialize/__init__.py +1 -0
  98. tico/serialize/circle_graph.py +319 -0
  99. tico/serialize/circle_mapping.py +177 -0
  100. tico/serialize/circle_serializer.py +240 -0
  101. tico/serialize/operators/__init__.py +28 -0
  102. tico/serialize/operators/hashable_opcode.py +43 -0
  103. tico/serialize/operators/node_visitor.py +80 -0
  104. tico/serialize/operators/op_abs.py +53 -0
  105. tico/serialize/operators/op_add.py +69 -0
  106. tico/serialize/operators/op_alias_copy.py +64 -0
  107. tico/serialize/operators/op_any.py +150 -0
  108. tico/serialize/operators/op_arange_start_step.py +61 -0
  109. tico/serialize/operators/op_argmax.py +62 -0
  110. tico/serialize/operators/op_avg_pool2d.py +192 -0
  111. tico/serialize/operators/op_bmm.py +62 -0
  112. tico/serialize/operators/op_cat.py +66 -0
  113. tico/serialize/operators/op_clamp.py +126 -0
  114. tico/serialize/operators/op_clone.py +71 -0
  115. tico/serialize/operators/op_constant_pad_nd.py +72 -0
  116. tico/serialize/operators/op_conv2d.py +186 -0
  117. tico/serialize/operators/op_copy.py +164 -0
  118. tico/serialize/operators/op_cos.py +59 -0
  119. tico/serialize/operators/op_cumsum.py +95 -0
  120. tico/serialize/operators/op_depthwise_conv2d.py +199 -0
  121. tico/serialize/operators/op_dequantize_per_channel.py +82 -0
  122. tico/serialize/operators/op_dequantize_per_tensor.py +64 -0
  123. tico/serialize/operators/op_div.py +62 -0
  124. tico/serialize/operators/op_embedding.py +60 -0
  125. tico/serialize/operators/op_eq.py +64 -0
  126. tico/serialize/operators/op_exp.py +60 -0
  127. tico/serialize/operators/op_expand.py +91 -0
  128. tico/serialize/operators/op_full.py +48 -0
  129. tico/serialize/operators/op_full_like.py +55 -0
  130. tico/serialize/operators/op_ge.py +54 -0
  131. tico/serialize/operators/op_gelu.py +59 -0
  132. tico/serialize/operators/op_gt.py +54 -0
  133. tico/serialize/operators/op_index.py +82 -0
  134. tico/serialize/operators/op_index_select.py +64 -0
  135. tico/serialize/operators/op_instance_norm.py +91 -0
  136. tico/serialize/operators/op_leaky_relu.py +60 -0
  137. tico/serialize/operators/op_linear.py +70 -0
  138. tico/serialize/operators/op_log.py +53 -0
  139. tico/serialize/operators/op_log1p.py +86 -0
  140. tico/serialize/operators/op_logical_and.py +63 -0
  141. tico/serialize/operators/op_logical_not.py +62 -0
  142. tico/serialize/operators/op_lt.py +61 -0
  143. tico/serialize/operators/op_max_dim.py +70 -0
  144. tico/serialize/operators/op_max_pool2d_with_indices.py +155 -0
  145. tico/serialize/operators/op_maximum.py +53 -0
  146. tico/serialize/operators/op_mean.py +66 -0
  147. tico/serialize/operators/op_minimum.py +53 -0
  148. tico/serialize/operators/op_mm.py +177 -0
  149. tico/serialize/operators/op_mul.py +99 -0
  150. tico/serialize/operators/op_ne.py +54 -0
  151. tico/serialize/operators/op_neg.py +59 -0
  152. tico/serialize/operators/op_permute.py +65 -0
  153. tico/serialize/operators/op_pow.py +141 -0
  154. tico/serialize/operators/op_prelu.py +54 -0
  155. tico/serialize/operators/op_quantize_per_tensor.py +79 -0
  156. tico/serialize/operators/op_reciprocal.py +64 -0
  157. tico/serialize/operators/op_relu.py +53 -0
  158. tico/serialize/operators/op_relu6.py +52 -0
  159. tico/serialize/operators/op_repeat.py +100 -0
  160. tico/serialize/operators/op_reshape.py +73 -0
  161. tico/serialize/operators/op_resize_nearest_neighbor.py +70 -0
  162. tico/serialize/operators/op_rsqrt.py +53 -0
  163. tico/serialize/operators/op_scalar_tensor.py +51 -0
  164. tico/serialize/operators/op_select_copy.py +65 -0
  165. tico/serialize/operators/op_sigmoid.py +56 -0
  166. tico/serialize/operators/op_sin.py +53 -0
  167. tico/serialize/operators/op_slice.py +155 -0
  168. tico/serialize/operators/op_softmax.py +100 -0
  169. tico/serialize/operators/op_split_with_sizes.py +99 -0
  170. tico/serialize/operators/op_sqrt.py +55 -0
  171. tico/serialize/operators/op_squeeze.py +73 -0
  172. tico/serialize/operators/op_sub.py +71 -0
  173. tico/serialize/operators/op_sum.py +63 -0
  174. tico/serialize/operators/op_tanh.py +54 -0
  175. tico/serialize/operators/op_to_copy.py +105 -0
  176. tico/serialize/operators/op_unsqueeze.py +66 -0
  177. tico/serialize/operators/op_view.py +74 -0
  178. tico/serialize/operators/op_where.py +82 -0
  179. tico/serialize/operators/utils.py +94 -0
  180. tico/serialize/pack.py +35 -0
  181. tico/serialize/quant_param.py +42 -0
  182. tico/utils/__init__.py +1 -0
  183. tico/utils/convert.py +296 -0
  184. tico/utils/define.py +35 -0
  185. tico/utils/diff_graph.py +181 -0
  186. tico/utils/errors.py +35 -0
  187. tico/utils/graph.py +282 -0
  188. tico/utils/logging.py +45 -0
  189. tico/utils/model.py +37 -0
  190. tico/utils/mx/__init__.py +1 -0
  191. tico/utils/mx/elemwise_ops.py +267 -0
  192. tico/utils/mx/formats.py +125 -0
  193. tico/utils/mx/mx_ops.py +270 -0
  194. tico/utils/padding.py +47 -0
  195. tico/utils/passes.py +76 -0
  196. tico/utils/register_custom_op.py +609 -0
  197. tico/utils/serialize.py +42 -0
  198. tico/utils/trace_decorators.py +101 -0
  199. tico/utils/utils.py +406 -0
  200. tico/utils/validate_args_kwargs.py +1149 -0
  201. tico-0.1.0.dist-info/LICENSE +241 -0
  202. tico-0.1.0.dist-info/METADATA +354 -0
  203. tico-0.1.0.dist-info/RECORD +206 -0
  204. tico-0.1.0.dist-info/WHEEL +5 -0
  205. tico-0.1.0.dist-info/entry_points.txt +3 -0
  206. tico-0.1.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,609 @@
1
+ # Copyright (c) 2025 Samsung Electronics Co., Ltd. All Rights Reserved
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ from typing import List, Optional
16
+
17
+ import torch
18
+ from torch._subclasses.fake_tensor import FakeTensor
19
+ from torch.library import custom_op, register_fake
20
+
21
+ from tico.utils.mx.mx_ops import _quantize_mx
22
+
23
+ # Note that an operator assumes input tensor has NHWC format.
24
+ def CircleResizeNearestNeighbor():
25
+ @custom_op("circle_custom::resize_nearest_neighbor", mutates_args=())
26
+ def resize_nearest_neighbor(input_: torch.Tensor, size: List[int]) -> torch.Tensor:
27
+ input_size = input_.size()
28
+ H = input_size[1]
29
+ W = input_size[2]
30
+ H_scale_factor = size[1] / H
31
+ W_scale_factor = size[2] / W
32
+ if H_scale_factor != W_scale_factor:
33
+ raise RuntimeError("Scale factor of H and W should be same.")
34
+ return torch.nn.functional.interpolate(
35
+ input_, scale_factor=H_scale_factor, mode="nearest"
36
+ )
37
+
38
+ @register_fake("circle_custom::resize_nearest_neighbor")
39
+ def _(input_: torch.Tensor, size: List[int]):
40
+ shape = list(input_.size())
41
+ new_shape = [shape[0]] + list(size) + [shape[3]]
42
+ result = torch.empty(new_shape, dtype=input_.dtype)
43
+ return result
44
+
45
+
46
+ def CircleConv2d():
47
+ """
48
+ Note that this op follows the input spec of `aten.conv2d.default` whose number
49
+ of arguments meets (2 <= node.args <= 7) condition.
50
+
51
+ [RESTRICTION]
52
+ Therefore, I tried to define a spec of conv2d as conv2d(input, weight, *args).
53
+ But, custom operators in torch do not support positional-only args. So, I set it
54
+ them as None by default.
55
+ """
56
+
57
+ @custom_op("circle_custom::conv2d", mutates_args=())
58
+ def conv2d(
59
+ input_: torch.Tensor,
60
+ weight: torch.Tensor,
61
+ bias: Optional[torch.Tensor] = None,
62
+ stride: Optional[List[int]] = None,
63
+ padding: Optional[List[int]] = None,
64
+ dilation: Optional[List[int]] = None,
65
+ groups: Optional[int] = None,
66
+ ) -> torch.Tensor:
67
+ """
68
+ Set default values.
69
+
70
+ Custom operators have limited types when it comes to default values.
71
+ So, let's set them by None in input specs, and then, set it by default values.
72
+ https://github.com/pytorch/pytorch/blob/6b05aafc/torch/_library/infer_schema.py#L131-L144
73
+ """
74
+ stride = [1, 1] if stride is None else stride
75
+ padding = [0, 0] if padding is None else padding
76
+ dilation = [1, 1] if dilation is None else dilation
77
+ groups = 1 if groups is None else groups
78
+
79
+ if groups != 1:
80
+ raise RuntimeError(
81
+ f"CircleConv2d only supports 1 'groups'. the node's groups: {groups}"
82
+ )
83
+
84
+ NHWC_to_NCHW = [0, 3, 1, 2]
85
+ OHWI_to_OIHW = [0, 3, 1, 2]
86
+ NCHW_input = torch.ops.aten.permute.default(input_, NHWC_to_NCHW)
87
+ OIHW_weight = torch.ops.aten.permute.default(weight, OHWI_to_OIHW)
88
+
89
+ args = [NCHW_input, OIHW_weight, bias, stride, padding, dilation, groups]
90
+ NCHW_output = torch.ops.aten.conv2d.default(*args)
91
+ NCHW_to_NHWC = [0, 2, 3, 1]
92
+ NHWC_output = torch.ops.aten.permute.default(NCHW_output, NCHW_to_NHWC)
93
+
94
+ return NHWC_output
95
+
96
+ @register_fake("circle_custom::conv2d")
97
+ def _(
98
+ input_: torch.Tensor,
99
+ weight: torch.Tensor,
100
+ bias: Optional[torch.Tensor] = None,
101
+ stride: Optional[List[int]] = None,
102
+ padding: Optional[List[int]] = None,
103
+ dilation: Optional[List[int]] = None,
104
+ groups: Optional[int] = None,
105
+ ):
106
+ """
107
+ Set default values.
108
+
109
+ Custom operators have limited types when it comes to default values.
110
+ So, let's set them by None in input specs, and then, set it by default values.
111
+ https://github.com/pytorch/pytorch/blob/6b05aafc/torch/_library/infer_schema.py#L131-L144
112
+ """
113
+ stride = [1, 1] if stride is None else stride
114
+ padding = [0, 0] if padding is None else padding
115
+ dilation = [1, 1] if dilation is None else dilation
116
+ groups = 1 if groups is None else groups
117
+ if groups != 1:
118
+ raise RuntimeError(
119
+ f"CircleConv2d only supports 1 'groups'. the node's groups: {groups}"
120
+ )
121
+
122
+ NHWC_to_NCHW = [0, 3, 1, 2]
123
+ OHWI_to_OIHW = [0, 3, 1, 2]
124
+ NCHW_input = torch.ops.aten.permute.default(input_, NHWC_to_NCHW)
125
+ OIHW_weight = torch.ops.aten.permute.default(weight, OHWI_to_OIHW)
126
+
127
+ args = [NCHW_input, OIHW_weight, bias, stride, padding, dilation, groups]
128
+ NCHW_output = torch.ops.aten.conv2d.default(*args)
129
+ NCHW_to_NHWC = [0, 2, 3, 1]
130
+ NHWC_output = torch.ops.aten.permute.default(NCHW_output, NCHW_to_NHWC)
131
+
132
+ return NHWC_output
133
+
134
+
135
+ def CircleConv2dPadding():
136
+ """
137
+ Almost same with `CircleConv2d` except padding arugment is a string type.
138
+
139
+ Q) Why create another custom op rather than make `CircleConv2d` cover multiple padding type?
140
+ A) `padding` with Optional[Union[List[int], str]] type is not allowed in torch.
141
+ """
142
+
143
+ @custom_op("circle_custom::conv2d.padding", mutates_args=())
144
+ def conv2d_padding(
145
+ input_: torch.Tensor,
146
+ weight: torch.Tensor,
147
+ bias: Optional[torch.Tensor] = None,
148
+ stride: Optional[List[int]] = None,
149
+ padding: Optional[str] = None,
150
+ dilation: Optional[List[int]] = None,
151
+ groups: Optional[int] = None,
152
+ ) -> torch.Tensor:
153
+ """
154
+ Set default values.
155
+
156
+ Custom operators have limited types when it comes to default values.
157
+ So, let's set them by None in input specs, and then, set it by default values.
158
+ https://github.com/pytorch/pytorch/blob/6b05aafc/torch/_library/infer_schema.py#L131-L144
159
+ """
160
+ stride = [1, 1] if stride is None else stride
161
+ padding = "valid" if padding is None else padding
162
+ dilation = [1, 1] if dilation is None else dilation
163
+ groups = 1 if groups is None else groups
164
+ if groups != 1:
165
+ raise RuntimeError(
166
+ f"CircleConv2d only supports 1 'groups'. the node's groups: {groups}"
167
+ )
168
+
169
+ NHWC_to_NCHW = [0, 3, 1, 2]
170
+ OHWI_to_OIHW = [0, 3, 1, 2]
171
+ NCHW_input = torch.ops.aten.permute.default(input_, NHWC_to_NCHW)
172
+ OIHW_weight = torch.ops.aten.permute.default(weight, OHWI_to_OIHW)
173
+
174
+ args = [NCHW_input, OIHW_weight, bias, stride, padding, dilation, groups]
175
+ NCHW_output = torch.ops.aten.conv2d.padding(*args)
176
+ NCHW_to_NHWC = [0, 2, 3, 1]
177
+ NHWC_output = torch.ops.aten.permute.default(NCHW_output, NCHW_to_NHWC)
178
+
179
+ return NHWC_output
180
+
181
+ @register_fake("circle_custom::conv2d.padding")
182
+ def _(
183
+ input_: torch.Tensor,
184
+ weight: torch.Tensor,
185
+ bias: Optional[torch.Tensor] = None,
186
+ stride: Optional[List[int]] = None,
187
+ padding: Optional[str] = None,
188
+ dilation: Optional[List[int]] = None,
189
+ groups: Optional[int] = None,
190
+ ):
191
+ """
192
+ Set default values.
193
+
194
+ Custom operators have limited types when it comes to default values.
195
+ So, let's set them by None in input specs, and then, set it by default values.
196
+ https://github.com/pytorch/pytorch/blob/6b05aafc/torch/_library/infer_schema.py#L131-L144
197
+ """
198
+ stride = [1, 1] if stride is None else stride
199
+ padding = "valid" if padding is None else padding
200
+ dilation = [1, 1] if dilation is None else dilation
201
+ groups = 1 if groups is None else groups
202
+ if groups != 1:
203
+ raise RuntimeError(
204
+ f"CircleConv2d only supports 1 'groups'. the node's groups: {groups}"
205
+ )
206
+
207
+ NHWC_to_NCHW = [0, 3, 1, 2]
208
+ OHWI_to_OIHW = [0, 3, 1, 2]
209
+ NCHW_input = torch.ops.aten.permute.default(input_, NHWC_to_NCHW)
210
+ OIHW_weight = torch.ops.aten.permute.default(weight, OHWI_to_OIHW)
211
+
212
+ args = [NCHW_input, OIHW_weight, bias, stride, padding, dilation, groups]
213
+ NCHW_output = torch.ops.aten.conv2d.padding(*args)
214
+ NCHW_to_NHWC = [0, 2, 3, 1]
215
+ NHWC_output = torch.ops.aten.permute.default(NCHW_output, NCHW_to_NHWC)
216
+
217
+ return NHWC_output
218
+
219
+
220
+ def CircleDepthwiseConv2d():
221
+ """
222
+ Note that this op follows the input spec of `aten.conv2d.default` whose number
223
+ of arguments meets (2 <= node.args <= 7) condition.
224
+
225
+ [RESTRICTION]
226
+ Therefore, I tried to define a spec of conv2d as conv2d(input, weight, *args).
227
+ But, custom operators in torch do not support positional-only args. So, I set it
228
+ them as None by default.
229
+ """
230
+
231
+ @custom_op("circle_custom::depthwise_conv2d", mutates_args=())
232
+ def depthwise_conv2d(
233
+ input_: torch.Tensor,
234
+ weight: torch.Tensor,
235
+ bias: Optional[torch.Tensor] = None,
236
+ stride: Optional[List[int]] = None,
237
+ padding: Optional[List[int]] = None,
238
+ dilation: Optional[List[int]] = None,
239
+ groups: Optional[int] = None,
240
+ ) -> torch.Tensor:
241
+ """
242
+ Set default values.
243
+
244
+ Custom operators have limited types when it comes to default values.
245
+ So, let's set them by None in input specs, and then, set it by default values.
246
+ https://github.com/pytorch/pytorch/blob/6b05aafc/torch/_library/infer_schema.py#L131-L144
247
+ """
248
+ stride = [1, 1] if stride is None else stride
249
+ padding = [0, 0] if padding is None else padding
250
+ dilation = [1, 1] if dilation is None else dilation
251
+
252
+ assert groups and groups > 1
253
+
254
+ NHWC_to_NCHW = [0, 3, 1, 2]
255
+ OHW1_to_1OHW = [3, 0, 1, 2]
256
+ NCHW_input = torch.ops.aten.permute.default(input_, NHWC_to_NCHW)
257
+ _1OHW_weight = torch.ops.aten.permute.default(weight, OHW1_to_1OHW)
258
+
259
+ args = [NCHW_input, _1OHW_weight, bias, stride, padding, dilation, groups]
260
+ NCHW_output = torch.ops.aten.conv2d.default(*args)
261
+ NCHW_to_NHWC = [0, 2, 3, 1]
262
+ NHWC_output = torch.ops.aten.permute.default(NCHW_output, NCHW_to_NHWC)
263
+
264
+ return NHWC_output
265
+
266
+ @register_fake("circle_custom::depthwise_conv2d")
267
+ def _(
268
+ input_: torch.Tensor,
269
+ weight: torch.Tensor,
270
+ bias: Optional[torch.Tensor] = None,
271
+ stride: Optional[List[int]] = None,
272
+ padding: Optional[List[int]] = None,
273
+ dilation: Optional[List[int]] = None,
274
+ groups: Optional[int] = None,
275
+ ):
276
+ """
277
+ Set default values.
278
+
279
+ Custom operators have limited types when it comes to default values.
280
+ So, let's set them by None in input specs, and then, set it by default values.
281
+ https://github.com/pytorch/pytorch/blob/6b05aafc/torch/_library/infer_schema.py#L131-L144
282
+ """
283
+ stride = [1, 1] if stride is None else stride
284
+ padding = [0, 0] if padding is None else padding
285
+ dilation = [1, 1] if dilation is None else dilation
286
+
287
+ assert groups and groups > 1
288
+
289
+ NHWC_to_NCHW = [0, 3, 1, 2]
290
+ OHW1_to_1OHW = [3, 0, 1, 2]
291
+ NCHW_input = torch.ops.aten.permute.default(input_, NHWC_to_NCHW)
292
+ _1OHW_weight = torch.ops.aten.permute.default(weight, OHW1_to_1OHW)
293
+
294
+ args = [NCHW_input, _1OHW_weight, bias, stride, padding, dilation, groups]
295
+ NCHW_output = torch.ops.aten.conv2d.default(*args)
296
+ NCHW_to_NHWC = [0, 2, 3, 1]
297
+ NHWC_output = torch.ops.aten.permute.default(NCHW_output, NCHW_to_NHWC)
298
+
299
+ return NHWC_output
300
+
301
+
302
+ def CircleDepthwiseConv2dPadding():
303
+ @custom_op("circle_custom::depthwise_conv2d.padding", mutates_args=())
304
+ def depthwise_conv2d_padding(
305
+ input_: torch.Tensor,
306
+ weight: torch.Tensor,
307
+ bias: Optional[torch.Tensor] = None,
308
+ stride: Optional[List[int]] = None,
309
+ padding: Optional[str] = None,
310
+ dilation: Optional[List[int]] = None,
311
+ groups: Optional[int] = None,
312
+ ) -> torch.Tensor:
313
+ """
314
+ Set default values.
315
+
316
+ Custom operators have limited types when it comes to default values.
317
+ So, let's set them by None in input specs, and then, set it by default values.
318
+ https://github.com/pytorch/pytorch/blob/6b05aafc/torch/_library/infer_schema.py#L131-L144
319
+ """
320
+ stride = [1, 1] if stride is None else stride
321
+ padding = "valid" if padding is None else padding
322
+ dilation = [1, 1] if dilation is None else dilation
323
+
324
+ assert groups and groups > 1
325
+
326
+ NHWC_to_NCHW = [0, 3, 1, 2]
327
+ OHW1_to_1OHW = [3, 0, 1, 2]
328
+ NCHW_input = torch.ops.aten.permute.default(input_, NHWC_to_NCHW)
329
+ _1OHW_weight = torch.ops.aten.permute.default(weight, OHW1_to_1OHW)
330
+
331
+ args = [NCHW_input, _1OHW_weight, bias, stride, padding, dilation, groups]
332
+ NCHW_output = torch.ops.aten.conv2d.padding(*args)
333
+ NCHW_to_NHWC = [0, 2, 3, 1]
334
+ NHWC_output = torch.ops.aten.permute.default(NCHW_output, NCHW_to_NHWC)
335
+
336
+ return NHWC_output
337
+
338
+ @register_fake("circle_custom::depthwise_conv2d.padding")
339
+ def _(
340
+ input_: torch.Tensor,
341
+ weight: torch.Tensor,
342
+ bias: Optional[torch.Tensor] = None,
343
+ stride: Optional[List[int]] = None,
344
+ padding: Optional[str] = None,
345
+ dilation: Optional[List[int]] = None,
346
+ groups: Optional[int] = None,
347
+ ):
348
+ """
349
+ Set default values.
350
+
351
+ Custom operators have limited types when it comes to default values.
352
+ So, let's set them by None in input specs, and then, set it by default values.
353
+ https://github.com/pytorch/pytorch/blob/6b05aafc/torch/_library/infer_schema.py#L131-L144
354
+ """
355
+ stride = [1, 1] if stride is None else stride
356
+ padding = "valid" if padding is None else padding
357
+ dilation = [1, 1] if dilation is None else dilation
358
+
359
+ assert groups and groups > 1
360
+
361
+ NHWC_to_NCHW = [0, 3, 1, 2]
362
+ OHW1_to_1OHW = [3, 0, 1, 2]
363
+ NCHW_input = torch.ops.aten.permute.default(input_, NHWC_to_NCHW)
364
+ _1OHW_weight = torch.ops.aten.permute.default(weight, OHW1_to_1OHW)
365
+
366
+ args = [NCHW_input, _1OHW_weight, bias, stride, padding, dilation, groups]
367
+ NCHW_output = torch.ops.aten.conv2d.padding(*args)
368
+ NCHW_to_NHWC = [0, 2, 3, 1]
369
+ NHWC_output = torch.ops.aten.permute.default(NCHW_output, NCHW_to_NHWC)
370
+
371
+ return NHWC_output
372
+
373
+
374
+ def CircleMaxPool2D():
375
+ """
376
+ Note that this op follows the input spec of `aten.max_pool2d_with_indices.default` whose number
377
+ of arguments meets (3 <= node.args <= 6) condition.
378
+
379
+ [RESTRICTION]
380
+ Custom operators in torch do not support positional-only args. So, I set it
381
+ them as None by default.
382
+ """
383
+
384
+ @custom_op("circle_custom::maxpool2d", mutates_args=())
385
+ def maxpool2d(
386
+ input_: torch.Tensor,
387
+ kernel_size: List[int],
388
+ stride: Optional[List[int]] = None,
389
+ padding: Optional[List[int]] = None,
390
+ dilation: Optional[List[int]] = None,
391
+ ceil_mode: Optional[bool] = None,
392
+ ) -> torch.Tensor:
393
+ """
394
+ Set default values.
395
+
396
+ Custom operators have limited types when it comes to default values.
397
+ So, let's set them by None in input specs, and then, set it by default values.
398
+ https://github.com/pytorch/pytorch/blob/6b05aafc/torch/_library/infer_schema.py#L131-L144
399
+ """
400
+ stride = kernel_size if not stride else stride
401
+ padding = [0, 0] if padding is None else padding
402
+ dilation = [1, 1] if dilation is None else dilation
403
+ ceil_mode = False if ceil_mode is None else ceil_mode
404
+
405
+ NHWC_to_NCHW = [0, 3, 1, 2]
406
+ NCHW_input = torch.ops.aten.permute.default(input_, NHWC_to_NCHW)
407
+
408
+ args = [NCHW_input, kernel_size, stride, padding, dilation, ceil_mode]
409
+ NCHW_output = torch.ops.aten.max_pool2d_with_indices.default(*args)
410
+ NCHW_to_NHWC = [0, 2, 3, 1]
411
+ # use first output
412
+ NHWC_output = torch.ops.aten.permute.default(NCHW_output[0], NCHW_to_NHWC)
413
+
414
+ return NHWC_output
415
+
416
+ @register_fake("circle_custom::maxpool2d")
417
+ def _(
418
+ input_: torch.Tensor,
419
+ kernel_size: List[int],
420
+ stride: Optional[List[int]] = None,
421
+ padding: Optional[List[int]] = None,
422
+ dilation: Optional[List[int]] = None,
423
+ ceil_mode: Optional[bool] = None,
424
+ ):
425
+ """
426
+ Set default values.
427
+
428
+ Custom operators have limited types when it comes to default values.
429
+ So, let's set them by None in input specs, and then, set it by default values.
430
+ https://github.com/pytorch/pytorch/blob/6b05aafc/torch/_library/infer_schema.py#L131-L144
431
+ """
432
+ stride = kernel_size if not stride else stride
433
+ padding = [0, 0] if padding is None else padding
434
+ dilation = [1, 1] if dilation is None else dilation
435
+ ceil_mode = False if ceil_mode is None else ceil_mode
436
+
437
+ NHWC_to_NCHW = [0, 3, 1, 2]
438
+ NCHW_input = torch.ops.aten.permute.default(input_, NHWC_to_NCHW)
439
+
440
+ args = [NCHW_input, kernel_size, stride, padding, dilation, ceil_mode]
441
+ NCHW_output = torch.ops.aten.max_pool2d_with_indices.default(*args)
442
+ NCHW_to_NHWC = [0, 2, 3, 1]
443
+ # use first output
444
+ NHWC_output = torch.ops.aten.permute.default(NCHW_output[0], NCHW_to_NHWC)
445
+
446
+ return NHWC_output
447
+
448
+
449
+ def CircleAvgPool2D():
450
+ @custom_op("circle_custom::avgpool2d", mutates_args=())
451
+ def avgpool2d(
452
+ input_: torch.Tensor,
453
+ kernel_size: List[int],
454
+ stride: Optional[List[int]] = None,
455
+ padding: Optional[List[int]] = None,
456
+ ceil_mode: Optional[bool] = None,
457
+ count_include_pad: Optional[bool] = None,
458
+ divisor_override: Optional[int] = None,
459
+ ) -> torch.Tensor:
460
+ stride = kernel_size if not stride else stride
461
+ padding = [0, 0] if padding is None else padding
462
+ ceil_mode = False if ceil_mode is None else ceil_mode
463
+ count_include_pad = True if count_include_pad is None else count_include_pad
464
+ divisor_override = None if divisor_override is None else divisor_override
465
+
466
+ NHWC_to_NCHW = [0, 3, 1, 2]
467
+ NCHW_input = torch.ops.aten.permute.default(input_, NHWC_to_NCHW)
468
+
469
+ args = [
470
+ NCHW_input,
471
+ kernel_size,
472
+ stride,
473
+ padding,
474
+ ceil_mode,
475
+ count_include_pad,
476
+ divisor_override,
477
+ ]
478
+ NCHW_output = torch.ops.aten.avg_pool2d.default(*args)
479
+ NCHW_to_NHWC = [0, 2, 3, 1]
480
+ NHWC_output = torch.ops.aten.permute.default(NCHW_output, NCHW_to_NHWC)
481
+
482
+ return NHWC_output
483
+
484
+ @register_fake("circle_custom::avgpool2d")
485
+ def _(
486
+ input_: torch.Tensor,
487
+ kernel_size: List[int],
488
+ stride: Optional[List[int]] = None,
489
+ padding: Optional[List[int]] = None,
490
+ ceil_mode: Optional[bool] = None,
491
+ count_include_pad: Optional[bool] = None,
492
+ divisor_override: Optional[int] = None,
493
+ ):
494
+ stride = kernel_size if not stride else stride
495
+ padding = [0, 0] if padding is None else padding
496
+ ceil_mode = False if ceil_mode is None else ceil_mode
497
+ count_include_pad = True if count_include_pad is None else count_include_pad
498
+ divisor_override = None if divisor_override is None else divisor_override
499
+
500
+ NHWC_to_NCHW = [0, 3, 1, 2]
501
+ NCHW_input = torch.ops.aten.permute.default(input_, NHWC_to_NCHW)
502
+
503
+ args = [
504
+ NCHW_input,
505
+ kernel_size,
506
+ stride,
507
+ padding,
508
+ ceil_mode,
509
+ count_include_pad,
510
+ divisor_override,
511
+ ]
512
+ NCHW_output = torch.ops.aten.avg_pool2d.default(*args)
513
+ NCHW_to_NHWC = [0, 2, 3, 1]
514
+ NHWC_output = torch.ops.aten.permute.default(NCHW_output, NCHW_to_NHWC)
515
+
516
+ return NHWC_output
517
+
518
+
519
+ def CircleInstanceNorm():
520
+ @custom_op("circle_custom::instance_norm", mutates_args=())
521
+ def instance_norm(
522
+ input_: torch.Tensor,
523
+ weight: Optional[torch.Tensor] = None,
524
+ bias: Optional[torch.Tensor] = None,
525
+ running_mean: Optional[torch.Tensor] = None,
526
+ running_var: Optional[torch.Tensor] = None,
527
+ use_input_stats: bool = False,
528
+ momentum: float = 0.1,
529
+ eps: float = 1e-05,
530
+ cudnn_enabled: bool = False,
531
+ ) -> torch.Tensor:
532
+ NHWC_to_NCHW = [0, 3, 1, 2]
533
+ NCHW_input = torch.ops.aten.permute.default(input_, NHWC_to_NCHW)
534
+
535
+ args = [NCHW_input, weight, bias, None, None, False, momentum, eps, False]
536
+ NCHW_output = torch.ops.aten.instance_norm.default(*args)
537
+ NCHW_to_NHWC = [0, 2, 3, 1]
538
+ NHWC_output = torch.ops.aten.permute.default(NCHW_output, NCHW_to_NHWC)
539
+
540
+ return NHWC_output
541
+
542
+ @register_fake("circle_custom::instance_norm")
543
+ def _(
544
+ input: FakeTensor,
545
+ weight: Optional[FakeTensor] = None,
546
+ bias: Optional[FakeTensor] = None,
547
+ running_mean: Optional[FakeTensor] = None,
548
+ running_var: Optional[FakeTensor] = None,
549
+ use_input_stats: bool = False,
550
+ momentum: float = 0.1,
551
+ eps: float = 1e-05,
552
+ cudnn_enabled: bool = False,
553
+ ):
554
+ # shape is preserved
555
+ return input.new_empty(input.size())
556
+
557
+
558
+ def CircleQuantizeMX():
559
+ # This operator conducts fake-quantization of microscaling
560
+ # NOTE Why using "quantize"_mx not "fake_quantize"_mx?
561
+ # To align with function name of microxcaling repo.
562
+ # https://github.com/microsoft/microxcaling/blob/v1.1.0/mx/mx_ops.py#L173
563
+ @custom_op("circle_custom::quantize_mx", mutates_args=())
564
+ def quantize_mx(
565
+ input_: torch.Tensor,
566
+ elem_format: str,
567
+ axis: int,
568
+ shared_exp_method: str = "max",
569
+ round: str = "nearest",
570
+ ) -> torch.Tensor:
571
+ if elem_format == "int8":
572
+ scale_bits = 8
573
+ block_size = 32
574
+ else:
575
+ raise RuntimeError(f"Unsupported elem_format in quantize_mx: {elem_format}")
576
+
577
+ result = _quantize_mx(
578
+ input_,
579
+ scale_bits=scale_bits,
580
+ elem_format=elem_format,
581
+ axes=[axis],
582
+ block_size=block_size,
583
+ shared_exp_method=shared_exp_method,
584
+ round=round,
585
+ )
586
+ return result
587
+
588
+ @register_fake("circle_custom::quantize_mx")
589
+ def _(
590
+ input_: torch.Tensor,
591
+ elem_format: str,
592
+ axis: int,
593
+ shared_exp_method: str = "max", # Fixed
594
+ round: str = "nearest", # Fixed
595
+ ) -> torch.Tensor:
596
+ return input_
597
+
598
+
599
+ # Add custom ops to the torch namespace
600
+ def RegisterOps():
601
+ CircleResizeNearestNeighbor()
602
+ CircleDepthwiseConv2d()
603
+ CircleDepthwiseConv2dPadding()
604
+ CircleConv2d()
605
+ CircleConv2dPadding()
606
+ CircleMaxPool2D()
607
+ CircleAvgPool2D()
608
+ CircleInstanceNorm()
609
+ CircleQuantizeMX()
@@ -0,0 +1,42 @@
1
+ # Copyright (c) 2025 Samsung Electronics Co., Ltd. All Rights Reserved
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ from typing import Optional
16
+
17
+ import torch
18
+
19
+ from tico.serialize.circle_graph import CircleSubgraph
20
+ from tico.utils.graph import get_module_name_chain
21
+
22
+
23
+ def finalise_tensor_names(
24
+ graph: CircleSubgraph,
25
+ ) -> None:
26
+ """
27
+ Replace every `tensor.name` with the *readable* version
28
+ **after** the graph is fully built.
29
+
30
+ Why late?
31
+ ---------
32
+ - All intermediate steps (add_input, add_output, get_tid…) rely on the
33
+ original technical names in ExportedProgram.
34
+
35
+ The rewrite is *in-place* and touches **only** the `name` field of
36
+ each tensor.
37
+ """
38
+ assert hasattr(graph, "name_to_node")
39
+
40
+ for tensor in graph.tensors:
41
+ if tensor.name in graph.name_to_node:
42
+ tensor.name = f"{get_module_name_chain(graph.name_to_node[tensor.name])}::{tensor.name}"