PyPI - tico - Versions diffs - 0.1.0.dev250411__py3-none-any.whl - Mend

tico 0.1.0.dev250411__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (196) hide show

tico/__init__.py +31 -0
tico/config/__init__.py +4 -0
tico/config/base.py +37 -0
tico/config/factory.py +41 -0
tico/config/v1.py +35 -0
tico/experimental/__init__.py +1 -0
tico/experimental/quantization/__init__.py +1 -0
tico/experimental/quantization/algorithm/__init__.py +1 -0
tico/experimental/quantization/algorithm/gptq/__init__.py +1 -0
tico/experimental/quantization/algorithm/gptq/gptq.py +172 -0
tico/experimental/quantization/algorithm/gptq/quant.py +153 -0
tico/experimental/quantization/algorithm/gptq/quantizer.py +225 -0
tico/experimental/quantization/algorithm/gptq/utils.py +65 -0
tico/experimental/quantization/algorithm/pt2e/__init__.py +1 -0
tico/experimental/quantization/algorithm/pt2e/annotation/__init__.py +1 -0
tico/experimental/quantization/algorithm/pt2e/annotation/annotator.py +215 -0
tico/experimental/quantization/algorithm/pt2e/annotation/config.py +26 -0
tico/experimental/quantization/algorithm/pt2e/annotation/op/__init__.py +21 -0
tico/experimental/quantization/algorithm/pt2e/annotation/op/adaptive_avg_pool2d.py +65 -0
tico/experimental/quantization/algorithm/pt2e/annotation/op/add.py +57 -0
tico/experimental/quantization/algorithm/pt2e/annotation/op/conv2d.py +92 -0
tico/experimental/quantization/algorithm/pt2e/annotation/op/div.py +57 -0
tico/experimental/quantization/algorithm/pt2e/annotation/op/linear.py +94 -0
tico/experimental/quantization/algorithm/pt2e/annotation/op/mean.py +53 -0
tico/experimental/quantization/algorithm/pt2e/annotation/op/mul.py +57 -0
tico/experimental/quantization/algorithm/pt2e/annotation/op/relu6.py +53 -0
tico/experimental/quantization/algorithm/pt2e/annotation/op/rsqrt.py +53 -0
tico/experimental/quantization/algorithm/pt2e/annotation/op/sub.py +57 -0
tico/experimental/quantization/algorithm/pt2e/annotation/spec.py +47 -0
tico/experimental/quantization/algorithm/pt2e/annotation/utils.py +88 -0
tico/experimental/quantization/algorithm/pt2e/quantizer.py +78 -0
tico/experimental/quantization/algorithm/pt2e/transformation/__init__.py +1 -0
tico/experimental/quantization/algorithm/pt2e/transformation/convert_scalars_to_attrs.py +58 -0
tico/experimental/quantization/algorithm/pt2e/utils.py +138 -0
tico/experimental/quantization/algorithm/smoothquant/__init__.py +1 -0
tico/experimental/quantization/algorithm/smoothquant/observer.py +78 -0
tico/experimental/quantization/algorithm/smoothquant/quantizer.py +81 -0
tico/experimental/quantization/algorithm/smoothquant/smooth_quant.py +164 -0
tico/experimental/quantization/config.py +68 -0
tico/experimental/quantization/evaluation/__init__.py +1 -0
tico/experimental/quantization/evaluation/backend.py +20 -0
tico/experimental/quantization/evaluation/evaluate.py +223 -0
tico/experimental/quantization/evaluation/executor/__init__.py +1 -0
tico/experimental/quantization/evaluation/executor/backend_executor.py +54 -0
tico/experimental/quantization/evaluation/executor/circle_executor.py +75 -0
tico/experimental/quantization/evaluation/executor/triv24_executor.py +128 -0
tico/experimental/quantization/evaluation/metric.py +109 -0
tico/experimental/quantization/evaluation/utils.py +185 -0
tico/experimental/quantization/passes/__init__.py +1 -0
tico/experimental/quantization/passes/fold_quant_ops.py +97 -0
tico/experimental/quantization/passes/insert_quantize_on_dtype_mismatch.py +289 -0
tico/experimental/quantization/passes/propagate_qparam_backward.py +91 -0
tico/experimental/quantization/passes/propagate_qparam_forward.py +141 -0
tico/experimental/quantization/passes/remove_weight_dequant_op.py +168 -0
tico/experimental/quantization/public_interface.py +108 -0
tico/experimental/quantization/quantizer.py +71 -0
tico/interpreter/__init__.py +1 -0
tico/interpreter/infer.py +116 -0
tico/interpreter/interpreter.py +93 -0
tico/passes/__init__.py +1 -0
tico/passes/cast_aten_where_arg_type.py +185 -0
tico/passes/cast_mixed_type_args.py +186 -0
tico/passes/const_prop_pass.py +307 -0
tico/passes/convert_conv1d_to_conv2d.py +151 -0
tico/passes/convert_layout_op_to_reshape.py +84 -0
tico/passes/convert_repeat_to_expand_copy.py +90 -0
tico/passes/convert_to_relu6.py +180 -0
tico/passes/decompose_addmm.py +127 -0
tico/passes/decompose_batch_norm.py +198 -0
tico/passes/decompose_fake_quantize.py +126 -0
tico/passes/decompose_fake_quantize_tensor_qparams.py +270 -0
tico/passes/decompose_group_norm.py +258 -0
tico/passes/decompose_grouped_conv2d.py +202 -0
tico/passes/decompose_slice_scatter.py +167 -0
tico/passes/extract_dtype_kwargs.py +121 -0
tico/passes/fill_meta_val.py +57 -0
tico/passes/fuse_redundant_reshape_to_mean.py +102 -0
tico/passes/legalize_causal_mask_value.py +113 -0
tico/passes/legalize_predefined_layout_operators.py +383 -0
tico/passes/lower_pow2_to_mul.py +75 -0
tico/passes/lower_to_resize_nearest_neighbor.py +249 -0
tico/passes/lower_to_slice.py +112 -0
tico/passes/merge_consecutive_cat.py +82 -0
tico/passes/ops.py +75 -0
tico/passes/remove_nop.py +85 -0
tico/passes/remove_redundant_assert_nodes.py +50 -0
tico/passes/remove_redundant_expand.py +70 -0
tico/passes/remove_redundant_permute.py +102 -0
tico/passes/remove_redundant_reshape.py +431 -0
tico/passes/remove_redundant_slice.py +64 -0
tico/passes/remove_redundant_to_copy.py +84 -0
tico/passes/restore_linear.py +113 -0
tico/passes/segment_index_select.py +143 -0
tico/pt2_to_circle.py +101 -0
tico/serialize/__init__.py +1 -0
tico/serialize/circle_graph.py +264 -0
tico/serialize/circle_mapping.py +177 -0
tico/serialize/circle_serializer.py +232 -0
tico/serialize/operators/__init__.py +28 -0
tico/serialize/operators/hashable_opcode.py +43 -0
tico/serialize/operators/node_visitor.py +80 -0
tico/serialize/operators/op_add.py +69 -0
tico/serialize/operators/op_alias_copy.py +64 -0
tico/serialize/operators/op_any.py +142 -0
tico/serialize/operators/op_arange_start_step.py +61 -0
tico/serialize/operators/op_argmax.py +62 -0
tico/serialize/operators/op_avg_pool2d.py +112 -0
tico/serialize/operators/op_bmm.py +62 -0
tico/serialize/operators/op_cat.py +66 -0
tico/serialize/operators/op_clamp.py +123 -0
tico/serialize/operators/op_clone.py +71 -0
tico/serialize/operators/op_constant_pad_nd.py +72 -0
tico/serialize/operators/op_conv2d.py +181 -0
tico/serialize/operators/op_copy.py +162 -0
tico/serialize/operators/op_cos.py +59 -0
tico/serialize/operators/op_cumsum.py +92 -0
tico/serialize/operators/op_depthwise_conv2d.py +198 -0
tico/serialize/operators/op_dequantize_per_channel.py +82 -0
tico/serialize/operators/op_dequantize_per_tensor.py +64 -0
tico/serialize/operators/op_div.py +62 -0
tico/serialize/operators/op_embedding.py +60 -0
tico/serialize/operators/op_eq.py +64 -0
tico/serialize/operators/op_exp.py +60 -0
tico/serialize/operators/op_expand.py +91 -0
tico/serialize/operators/op_full.py +48 -0
tico/serialize/operators/op_full_like.py +55 -0
tico/serialize/operators/op_ge.py +54 -0
tico/serialize/operators/op_gelu.py +59 -0
tico/serialize/operators/op_gt.py +54 -0
tico/serialize/operators/op_index.py +82 -0
tico/serialize/operators/op_index_select.py +64 -0
tico/serialize/operators/op_instance_norm.py +91 -0
tico/serialize/operators/op_linear.py +70 -0
tico/serialize/operators/op_log.py +53 -0
tico/serialize/operators/op_log1p.py +83 -0
tico/serialize/operators/op_logical_and.py +63 -0
tico/serialize/operators/op_logical_not.py +62 -0
tico/serialize/operators/op_lt.py +61 -0
tico/serialize/operators/op_max_pool2d_with_indices.py +140 -0
tico/serialize/operators/op_maximum.py +53 -0
tico/serialize/operators/op_mean.py +66 -0
tico/serialize/operators/op_minimum.py +53 -0
tico/serialize/operators/op_mm.py +174 -0
tico/serialize/operators/op_mul.py +99 -0
tico/serialize/operators/op_ne.py +54 -0
tico/serialize/operators/op_neg.py +59 -0
tico/serialize/operators/op_permute.py +65 -0
tico/serialize/operators/op_pow.py +138 -0
tico/serialize/operators/op_prelu.py +54 -0
tico/serialize/operators/op_quantize_per_tensor.py +79 -0
tico/serialize/operators/op_reciprocal.py +64 -0
tico/serialize/operators/op_relu.py +53 -0
tico/serialize/operators/op_relu6.py +52 -0
tico/serialize/operators/op_repeat.py +99 -0
tico/serialize/operators/op_reshape.py +73 -0
tico/serialize/operators/op_resize_nearest_neighbor.py +70 -0
tico/serialize/operators/op_rsqrt.py +53 -0
tico/serialize/operators/op_scalar_tensor.py +51 -0
tico/serialize/operators/op_select_copy.py +65 -0
tico/serialize/operators/op_sigmoid.py +56 -0
tico/serialize/operators/op_sin.py +53 -0
tico/serialize/operators/op_slice.py +155 -0
tico/serialize/operators/op_softmax.py +100 -0
tico/serialize/operators/op_split_with_sizes.py +96 -0
tico/serialize/operators/op_sqrt.py +55 -0
tico/serialize/operators/op_squeeze.py +73 -0
tico/serialize/operators/op_sub.py +71 -0
tico/serialize/operators/op_sum.py +63 -0
tico/serialize/operators/op_tanh.py +54 -0
tico/serialize/operators/op_to_copy.py +105 -0
tico/serialize/operators/op_unsqueeze.py +66 -0
tico/serialize/operators/op_view.py +74 -0
tico/serialize/operators/op_where.py +82 -0
tico/serialize/operators/utils.py +51 -0
tico/serialize/pack.py +35 -0
tico/serialize/quant_param.py +42 -0
tico/utils/__init__.py +1 -0
tico/utils/convert.py +292 -0
tico/utils/define.py +35 -0
tico/utils/diff_graph.py +181 -0
tico/utils/errors.py +35 -0
tico/utils/graph.py +200 -0
tico/utils/logging.py +45 -0
tico/utils/model.py +37 -0
tico/utils/padding.py +47 -0
tico/utils/passes.py +76 -0
tico/utils/register_custom_op.py +562 -0
tico/utils/trace_decorators.py +101 -0
tico/utils/utils.py +314 -0
tico/utils/validate_args_kwargs.py +1114 -0
tico-0.1.0.dev250411.dist-info/LICENSE +241 -0
tico-0.1.0.dev250411.dist-info/METADATA +17 -0
tico-0.1.0.dev250411.dist-info/RECORD +196 -0
tico-0.1.0.dev250411.dist-info/WHEEL +5 -0
tico-0.1.0.dev250411.dist-info/entry_points.txt +3 -0
tico-0.1.0.dev250411.dist-info/top_level.txt +1 -0

tico/passes/decompose_grouped_conv2d.py ADDED Viewed

@@ -0,0 +1,202 @@
+# Copyright (c) 2025 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from typing import TYPE_CHECKING
+if TYPE_CHECKING:
+    import torch.fx
+import torch
+from torch.export import ExportedProgram
+from tico.passes import ops
+from tico.serialize.circle_mapping import extract_shape
+from tico.utils import logging
+from tico.utils.errors import InvalidArgumentError, NotYetSupportedError
+from tico.utils.graph import add_placeholder
+from tico.utils.passes import PassBase, PassResult
+from tico.utils.trace_decorators import trace_graph_diff_on_pass
+from tico.utils.validate_args_kwargs import Conv2DArgs
+@trace_graph_diff_on_pass
+class DecomposeGroupedConv2d(PassBase):
+    """
+    This pass decomposes grouped Conv2d operator as multiple Conv2d operator whose groups=1.
+    Grouped Conv2d denotes a Conv2d operator whose `groups` argument is not equal to input channels nor 1.
+    [before]
+        input       weight       bias
+          |           |           |
+          +-----------+-----------+
+                      |
+                    Conv2d (groups != IN_CHANNEL && groups != 1)
+                      |
+                    output
+    [after]
+    The below `slice` operators slice the input tensor, weight and bias along the channel axis by the number of `groups`.
+    In addition, the numbered input, weight and bias denotes sliced input tensor, weight and bias respectively.
+        input
+          |       weight
+        slice       |        bias
+          |       slice       |
+          |         |       slice
+          |         |         |
+          +---------------------------+---------------------------+
+          |         |         |       |                           |
+          |         +---------------------------+---------------------------+
+          |         |         |       |         |                 |         |
+          |         |         +---------------------------+---------------------------+
+          |         |         |       |         |         |       |         |         |
+        input_1     |         |      ...        |         |     input_N     |         |
+          |      weight_1     |       |        ...        |       |      weight_N     |
+          |         |       bias_1    |         |        ...      |         |       bias_N
+          +---------+---------+       +---------+---------+       +---------+---------+
+                    |                           |                           |
+                Conv2d_1                       ...                      Conv2d_N
+                    |                           |                           |
+                    +---------------------------+---------------------------+
+                                                |
+                                              concat
+                                                |
+                                              output
+    """
+    def __init__(self):
+        super().__init__()
+    def call(self, exported_program: ExportedProgram) -> PassResult:
+        logger = logging.getLogger(__name__)
+        gm = exported_program.graph_module
+        graph: torch.fx.Graph = gm.graph
+        modified = False
+        for node in graph.nodes:
+            if node.op != "call_function":
+                continue
+            if not node.target in ops.aten.conv2d:
+                continue
+            args = Conv2DArgs(*node.args)
+            input_ = args.input
+            weight = args.weight
+            bias = args.bias
+            stride = args.stride
+            padding = args.padding
+            dilation = args.dilation
+            groups = args.groups
+            input_shape = extract_shape(input_)
+            if not len(input_shape) == 4:
+                raise NotYetSupportedError(
+                    f"Only support 4D input tensor: node's input shape: {input_shape}"
+                )
+            in_channels = input_shape[1]
+            if groups == 1 or groups == in_channels:
+                continue
+            assert (
+                in_channels % groups == 0
+            ), f"in_channels should be divisible by groups: in_channels: {in_channels}, groups: {groups}"
+            output_shape = extract_shape(node)
+            assert len(output_shape) == 4, len(output_shape)
+            out_channels = output_shape[1]
+            assert (
+                out_channels % groups == 0
+            ), f"out_channels should be divisible by groups: out_channels: {out_channels}, groups: {groups}"
+            weight_shape = extract_shape(weight)
+            assert len(weight_shape) == 4, len(weight_shape)
+            assert (
+                weight_shape[0] == out_channels
+            ), f"weight shape[0]: {weight_shape[0]}, out channels: {out_channels}"
+            assert (
+                weight_shape[1] == in_channels // groups
+            ), f"weight shape[1]: {weight_shape[1]}, in channels: {in_channels}"
+            if bias is not None:
+                bias_shape = extract_shape(bias)
+                assert (
+                    bias_shape[0] == out_channels
+                ), f"bias shape[0]: {bias_shape[0]}, out channels: {out_channels}"
+            else:  # Make dummy bias tensor
+                bias = add_placeholder(
+                    exported_program, torch.zeros(out_channels), "bias"
+                )
+            group_size = in_channels // groups
+            out_group_size = out_channels // groups
+            with gm.graph.inserting_before(node):
+                conv2d_op = None
+                if isinstance(padding, list) and all(
+                    isinstance(element, int) for element in padding
+                ):
+                    conv2d_op = torch.ops.aten.conv2d.default
+                elif isinstance(padding, str):
+                    conv2d_op = torch.ops.aten.conv2d.padding
+                else:
+                    raise InvalidArgumentError(
+                        f"Unsupported padding type: {padding}"
+                    )  # Unreachable to here
+                conv2d_tensors = []
+                for i in range(groups):
+                    sliced_input = graph.call_function(
+                        torch.ops.aten.slice.Tensor,
+                        (input_, 1, group_size * i, group_size * (i + 1), 1),
+                    )
+                    sliced_weight = graph.call_function(
+                        torch.ops.aten.slice.Tensor,
+                        (weight, 0, out_group_size * i, out_group_size * (i + 1), 1),
+                    )
+                    sliced_bias = graph.call_function(
+                        torch.ops.aten.slice.Tensor,
+                        (bias, 0, out_group_size * i, out_group_size * (i + 1), 1),
+                    )
+                    conv2d_tensor = graph.call_function(
+                        conv2d_op,
+                        (
+                            sliced_input,
+                            sliced_weight,
+                            sliced_bias,
+                            stride,
+                            padding,
+                            dilation,
+                            1,
+                        ),
+                    )
+                    conv2d_tensors.append(conv2d_tensor)
+                concat_output = graph.call_function(
+                    torch.ops.aten.cat.default, (conv2d_tensors, 1)
+                )
+                node.replace_all_uses_with(concat_output, propagate_meta=True)
+            modified = True
+            logger.debug(
+                f"{node.name} is replaced with groups of conv2d: The number of groups: {groups}, groups size: {group_size}"
+            )
+        graph.eliminate_dead_code()
+        gm.recompile()
+        return PassResult(modified)

tico/passes/decompose_slice_scatter.py ADDED Viewed

@@ -0,0 +1,167 @@
+# Copyright (c) 2025 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from dataclasses import dataclass
+from typing import Optional, TYPE_CHECKING
+if TYPE_CHECKING:
+    import torch.fx
+import torch
+from torch.export import ExportedProgram
+from tico.serialize.circle_mapping import extract_shape
+from tico.utils import logging
+from tico.utils.passes import PassBase, PassResult
+from tico.utils.trace_decorators import trace_graph_diff_on_pass
+from tico.utils.utils import enforce_type
+@trace_graph_diff_on_pass
+class DecomposeSliceScatter(PassBase):
+    """
+    Let's decompose slice_scatter.default to cat.
+    slice_scatter with step=1 embeds src tensor to input tensor
+    We can replace it with (1) slicing input tensors and (2) concatenating all tensors
+    [1] When step = 1,
+        (1) Split input to input_0 and input_1 (either of them can be zero-size)
+        (2) Concatenate input_0, src, input_1
+        Before)
+            input                  src
+            |                      |
+            |                      |
+            |                      |
+            +--> slice_scatter <---+
+        After)
+            input
+            |-------------------------
+            |                         |
+            |                         |
+            |                         |
+            slice_copy                slice_copy
+            |                         |
+            |                         |
+            |                         |
+            slice_0*      src          slice_1*
+            |            |            |
+            |            |            |
+            |            |            |
+            +---------> cat <---------+
+            *Either of slice_0 or slice_1 could be empty. Then it's ignored.
+    [2] When step > 1, not supported yet. (TBD)
+    """
+    def __init__(self):
+        super().__init__()
+    def call(self, exported_program: ExportedProgram) -> PassResult:
+        logger = logging.getLogger(__name__)
+        graph_module = exported_program.graph_module
+        graph: torch.fx.Graph = graph_module.graph
+        modified = False
+        for node in graph.nodes:
+            if node.op != "call_function":
+                continue
+            if node.target != torch.ops.aten.slice_scatter.default:
+                continue
+            @enforce_type
+            @dataclass
+            class Args:
+                """
+                input (Tensor) the input tensor.
+                src (Tensor) The tensor to embed into input
+                dim (int) the dimension to insert the slice into
+                start (Optional[int]) the start index of where to insert the slice
+                end (Optional[int]) the end index of where to insert the slice
+                step (int) the how many elements to skip in
+                """
+                input: torch.fx.Node
+                src: torch.fx.Node
+                dim: int = 0
+                start: Optional[int] = None
+                end: Optional[int] = None
+                step: int = 1
+            args = Args(*node.args, **node.kwargs)  # type: ignore[arg-type]
+            input = args.input
+            src = args.src
+            dim = args.dim
+            s = args.start
+            e = args.end
+            step = args.step
+            # TODO Support step > 1 cases
+            if step > 1:
+                raise RuntimeError(
+                    f"slice_scatter with step > 1 is not yet supported. Node: {node}"
+                )
+            start: int = 0 if s is None else s
+            end: int = (
+                extract_shape(src)[dim]
+                if e is None
+                else min(extract_shape(src)[dim], e)
+            )
+            with graph.inserting_before(node):
+                slices = []
+                if 0 < start:
+                    slice_0 = graph.call_function(
+                        torch.ops.aten.slice_copy.Tensor,
+                        args=(input, dim, 0, start, 1),
+                    )
+                    slices.append(slice_0)
+                slices.append(src)
+                if start + end < extract_shape(input)[dim]:
+                    slice_1 = graph.call_function(
+                        torch.ops.aten.slice_copy.Tensor,
+                        args=(
+                            input,
+                            dim,
+                            start + end,
+                            extract_shape(input)[dim],
+                            1,
+                        ),
+                    )
+                    slices.append(slice_1)
+                concat = graph.call_function(
+                    torch.ops.aten.cat.default, args=(slices, dim)
+                )
+                # Not set meta for propagating replacing node's meta.
+                node.replace_all_uses_with(concat, propagate_meta=True)
+            modified = True
+            logger.debug(f"{node.name} is replaced with slice_copy + concat")
+        graph.eliminate_dead_code()
+        graph_module.recompile()
+        return PassResult(modified)

tico/passes/extract_dtype_kwargs.py ADDED Viewed

@@ -0,0 +1,121 @@
+# Copyright (c) 2025 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from typing import TYPE_CHECKING
+if TYPE_CHECKING:
+    import torch.fx
+import torch
+from torch.export import ExportedProgram
+from torch.utils import _pytree as pytree
+from tico.utils import logging
+from tico.utils.passes import PassBase, PassResult
+from tico.utils.trace_decorators import trace_graph_diff_on_pass
+def _extract_to_output(node: torch.fx.Node, graph: torch.fx.Graph) -> bool:
+    """
+    This extracts dtype kwargs to node's output direction
+    So, op(..., dtype = X) is converted to op(...).to(X)
+    Return true if modified
+    NOTE
+    [1] This function always returns true. Return value is introduced for extension
+    [2] This conversion is not safe for some Ops whose inputs should also be casted to X (ex: Mean).
+    """
+    logger = logging.getLogger(__name__)
+    node_kwargs = node.kwargs
+    # Remove "dtype" from node's kwargs
+    new_kwargs = {}
+    for k, v in node_kwargs.items():
+        if k == "dtype":
+            continue
+        new_kwargs[k] = v
+    node.kwargs = new_kwargs
+    # Create new val for node
+    # `node.target()` needs only `Tensor` for its arguments. Therefore, let's retrieve `FakeTensor` if it is `torch.fx.Node`.
+    args, kwargs = pytree.tree_map_only(
+        torch.fx.Node, lambda x: x.meta["val"], (node.args, node.kwargs)
+    )
+    new_val = node.target(*args, **kwargs)  # type: ignore[operator]
+    # Set args, kwargs of `to_copy`
+    to_args = (node,)
+    to_kwargs = {"dtype": node_kwargs["dtype"]}
+    with graph.inserting_after(node):
+        to_copy = graph.call_function(torch.ops.aten._to_copy.default, (), {})
+        node.replace_all_uses_with(to_copy, propagate_meta=True)
+        # Q) Why lazy-update args, kwargs of the `to_copy`?
+        # A) `replace_all_uses_with` replace all the uses of `node`. If `to_copy` args is set to
+        #   (node, ) before `replace_all_uses_with`, the function would even replace the args of
+        #   `to_copy` with `to_copy`.
+        to_copy.args = to_args
+        to_copy.kwargs = to_kwargs
+        # Update meta["val"] to change dtype
+        node.meta["val"] = new_val
+    logger.debug(f"{node.name}'s dtype kwargs is extracted into {to_copy.name}")
+    return True
+@trace_graph_diff_on_pass
+class ExtractDtypeKwargsPass(PassBase):
+    """
+    This pass extracts "dtype" keyword argument from nodes.
+    Sometimes, torch api receives "dtype" keyword argument.
+    E.g. x_bool = torch.full_like(x, 0, dtype=torch.bool)
+    But, this argument makes circle build logic complicated because many operators has
+      same type with their inputs'.
+    So, this pass changes `op(dtype)` to `op + to(dtype)`.
+    NOTE
+    [1] There are some ops that are natural to have "dtype" kwargs. The pass is not applied to those ops.
+    [2] If node.kwargs["dtype"] is redundant `op(dtype).dtype == op().dtype`, the pass is not applied.
+    """
+    def __init__(self):
+        super().__init__()
+        # List of Ops whose "dtype" kwargs is extracted
+        self.target_ops = dict()
+        self.target_ops[torch.ops.aten.full_like.default] = _extract_to_output
+    def call(self, exported_program: ExportedProgram) -> PassResult:
+        graph_module = exported_program.graph_module
+        graph: torch.fx.Graph = graph_module.graph
+        modified = False
+        for node in graph.nodes:
+            if not node.op == "call_function" or node.target not in self.target_ops:
+                continue
+            if "dtype" not in node.kwargs:
+                continue
+            modified |= self.target_ops[node.target](node, graph)
+        graph.eliminate_dead_code()
+        graph.lint()
+        graph_module.recompile()
+        return PassResult(modified)

tico/passes/fill_meta_val.py ADDED Viewed

@@ -0,0 +1,57 @@
+# Copyright (c) 2025 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from torch.export import ExportedProgram
+from tico.utils import logging
+from tico.utils.passes import PassBase, PassResult
+from tico.utils.trace_decorators import trace_graph_diff_on_pass
+from tico.utils.utils import set_new_meta_val
+@trace_graph_diff_on_pass
+class FillMetaVal(PassBase):
+    """
+    Let's set new meta['val'] for nodes which don't have meta['val']
+    """
+    def __init__(self):
+        super().__init__()
+    def call(self, exported_program: ExportedProgram) -> PassResult:
+        logger = logging.getLogger(__name__)
+        graph_module = exported_program.graph_module
+        graph = graph_module.graph
+        modified = False
+        # To make sure graph is topologically sorted
+        graph.lint()
+        for node in graph.nodes:
+            if not node.op == "call_function":
+                continue
+            if hasattr(node, "meta") and "val" in node.meta:
+                continue
+            set_new_meta_val(node)
+            modified = True
+            logger.debug(f"{node.name} has new meta values.")
+        graph.eliminate_dead_code()
+        graph.lint()
+        graph_module.recompile()
+        return PassResult(modified)

tico/passes/fuse_redundant_reshape_to_mean.py ADDED Viewed

@@ -0,0 +1,102 @@
+# Copyright (c) 2025 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from typing import TYPE_CHECKING
+if TYPE_CHECKING:
+    import torch.fx
+import torch
+from torch.export import ExportedProgram
+from torch.utils import _pytree as pytree
+from tico.passes import ops
+from tico.serialize.circle_mapping import extract_shape
+from tico.utils import logging
+from tico.utils.passes import PassBase, PassResult
+from tico.utils.trace_decorators import trace_graph_diff_on_pass
+@trace_graph_diff_on_pass
+class FuseRedundantReshapeToMean(PassBase):
+    """
+    This pass removes redundant `aten.reshape` operators that can be fused to `aten.mean` with `keep_dims`.
+    Shape(aten.reshape(aten.mean(input))) == Shape(aten.mean(input, keep_dims=True))
+    """
+    def __init__(self):
+        super().__init__()
+    def call(self, exported_program: ExportedProgram) -> PassResult:
+        logger = logging.getLogger(__name__)
+        graph_module = exported_program.graph_module
+        graph = graph_module.graph
+        modified = False
+        for node in graph.nodes:
+            if not node.op == "call_function":
+                continue
+            if node.target != torch.ops.aten.mean.dim:
+                continue
+            # If mean is being used in other nodes, do not fuse it.
+            if len(node.users) != 1:
+                continue
+            user_node = next(iter(node.users))
+            if user_node.target not in ops.aten.reshape:
+                continue
+            mean_args, mean_kwargs = pytree.tree_map_only(
+                torch.fx.Node,
+                lambda n: n.meta["val"],
+                (node.args, node.kwargs),
+            )
+            # Signature of aten.mean.dim is as follows.
+            # mean.dim(Tensor self, int[1]? dim, bool keepdim=False, *, ScalarType? dtype=None) -> Tensor
+            # `keepdim` in `node.kwargs` is moved to `node.args` in `run_decompositions`.
+            # `dtype` in `node.kwargs` is not moved
+            assert len(mean_args) == 3 or len(mean_args) == 2  # keepdim exists or not
+            assert len(mean_kwargs) <= 1  # dtype exists or not
+            fused_mean_args = mean_args
+            keep_dims = True
+            if len(mean_args) == 2:
+                fused_mean_args += (keep_dims,)
+            fused_val = node.target(*fused_mean_args, **mean_kwargs)
+            # Check if both shapes are same
+            # 1. Shape(aten.reshape(aten.mean))
+            # 2. Shape(aten.mean(keep_dims=True))
+            if fused_val.size() != extract_shape(user_node):
+                continue
+            # update args
+            if len(mean_args) == 2:
+                updated_args = node.args + (keep_dims,)
+            elif len(mean_args) == 3:
+                updated_args = node.args
+            node.args = updated_args
+            node.meta["val"] = fused_val
+            user_node.replace_all_uses_with(node, propagate_meta=False)
+            modified = True
+            logger.debug(f"{user_node.name} is replaced with {node.name}")
+        graph.eliminate_dead_code()
+        graph.lint()
+        graph_module.recompile()
+        return PassResult(modified)