PyPI - tico - Versions diffs - 0.1.0__py3-none-any.whl - Mend

tico 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (206) hide show

tico/__init__.py +42 -0
tico/config/__init__.py +4 -0
tico/config/base.py +37 -0
tico/config/factory.py +41 -0
tico/config/v1.py +35 -0
tico/experimental/__init__.py +1 -0
tico/experimental/quantization/__init__.py +1 -0
tico/experimental/quantization/algorithm/__init__.py +1 -0
tico/experimental/quantization/algorithm/gptq/__init__.py +1 -0
tico/experimental/quantization/algorithm/gptq/gptq.py +172 -0
tico/experimental/quantization/algorithm/gptq/quant.py +153 -0
tico/experimental/quantization/algorithm/gptq/quantizer.py +225 -0
tico/experimental/quantization/algorithm/gptq/utils.py +65 -0
tico/experimental/quantization/algorithm/pt2e/__init__.py +1 -0
tico/experimental/quantization/algorithm/pt2e/annotation/__init__.py +1 -0
tico/experimental/quantization/algorithm/pt2e/annotation/annotator.py +215 -0
tico/experimental/quantization/algorithm/pt2e/annotation/config.py +26 -0
tico/experimental/quantization/algorithm/pt2e/annotation/op/__init__.py +21 -0
tico/experimental/quantization/algorithm/pt2e/annotation/op/adaptive_avg_pool2d.py +65 -0
tico/experimental/quantization/algorithm/pt2e/annotation/op/add.py +57 -0
tico/experimental/quantization/algorithm/pt2e/annotation/op/conv2d.py +92 -0
tico/experimental/quantization/algorithm/pt2e/annotation/op/div.py +57 -0
tico/experimental/quantization/algorithm/pt2e/annotation/op/linear.py +94 -0
tico/experimental/quantization/algorithm/pt2e/annotation/op/mean.py +53 -0
tico/experimental/quantization/algorithm/pt2e/annotation/op/mul.py +57 -0
tico/experimental/quantization/algorithm/pt2e/annotation/op/relu6.py +53 -0
tico/experimental/quantization/algorithm/pt2e/annotation/op/rsqrt.py +53 -0
tico/experimental/quantization/algorithm/pt2e/annotation/op/sub.py +57 -0
tico/experimental/quantization/algorithm/pt2e/annotation/spec.py +47 -0
tico/experimental/quantization/algorithm/pt2e/annotation/utils.py +88 -0
tico/experimental/quantization/algorithm/pt2e/quantizer.py +78 -0
tico/experimental/quantization/algorithm/pt2e/transformation/__init__.py +1 -0
tico/experimental/quantization/algorithm/pt2e/transformation/convert_scalars_to_attrs.py +58 -0
tico/experimental/quantization/algorithm/pt2e/utils.py +138 -0
tico/experimental/quantization/algorithm/smoothquant/__init__.py +1 -0
tico/experimental/quantization/algorithm/smoothquant/observer.py +78 -0
tico/experimental/quantization/algorithm/smoothquant/quantizer.py +81 -0
tico/experimental/quantization/algorithm/smoothquant/smooth_quant.py +164 -0
tico/experimental/quantization/config.py +68 -0
tico/experimental/quantization/evaluation/__init__.py +1 -0
tico/experimental/quantization/evaluation/backend.py +20 -0
tico/experimental/quantization/evaluation/evaluate.py +223 -0
tico/experimental/quantization/evaluation/executor/__init__.py +1 -0
tico/experimental/quantization/evaluation/executor/backend_executor.py +54 -0
tico/experimental/quantization/evaluation/executor/circle_executor.py +75 -0
tico/experimental/quantization/evaluation/executor/triv24_executor.py +128 -0
tico/experimental/quantization/evaluation/metric.py +109 -0
tico/experimental/quantization/evaluation/utils.py +185 -0
tico/experimental/quantization/passes/__init__.py +1 -0
tico/experimental/quantization/passes/fold_quant_ops.py +154 -0
tico/experimental/quantization/passes/insert_quantize_on_dtype_mismatch.py +345 -0
tico/experimental/quantization/passes/propagate_qparam_backward.py +91 -0
tico/experimental/quantization/passes/propagate_qparam_forward.py +141 -0
tico/experimental/quantization/passes/quantize_bias.py +123 -0
tico/experimental/quantization/passes/remove_weight_dequant_op.py +177 -0
tico/experimental/quantization/public_interface.py +108 -0
tico/experimental/quantization/quantizer.py +71 -0
tico/interpreter/__init__.py +1 -0
tico/interpreter/infer.py +116 -0
tico/interpreter/interpreter.py +93 -0
tico/passes/__init__.py +1 -0
tico/passes/cast_aten_where_arg_type.py +191 -0
tico/passes/cast_mixed_type_args.py +187 -0
tico/passes/const_prop_pass.py +307 -0
tico/passes/convert_conv1d_to_conv2d.py +160 -0
tico/passes/convert_layout_op_to_reshape.py +85 -0
tico/passes/convert_repeat_to_expand_copy.py +89 -0
tico/passes/convert_to_relu6.py +181 -0
tico/passes/decompose_addmm.py +124 -0
tico/passes/decompose_batch_norm.py +192 -0
tico/passes/decompose_fake_quantize.py +134 -0
tico/passes/decompose_fake_quantize_tensor_qparams.py +294 -0
tico/passes/decompose_group_norm.py +275 -0
tico/passes/decompose_grouped_conv2d.py +209 -0
tico/passes/decompose_slice_scatter.py +169 -0
tico/passes/extract_dtype_kwargs.py +122 -0
tico/passes/fill_meta_val.py +57 -0
tico/passes/fuse_leading_unsqueeze_reshape.py +112 -0
tico/passes/fuse_redundant_reshape_to_mean.py +102 -0
tico/passes/legalize_causal_mask_value.py +108 -0
tico/passes/legalize_predefined_layout_operators.py +386 -0
tico/passes/lower_pow2_to_mul.py +75 -0
tico/passes/lower_to_resize_nearest_neighbor.py +235 -0
tico/passes/lower_to_slice.py +230 -0
tico/passes/merge_consecutive_cat.py +80 -0
tico/passes/ops.py +78 -0
tico/passes/remove_nop.py +84 -0
tico/passes/remove_redundant_assert_nodes.py +51 -0
tico/passes/remove_redundant_expand.py +66 -0
tico/passes/remove_redundant_permute.py +122 -0
tico/passes/remove_redundant_reshape.py +436 -0
tico/passes/remove_redundant_slice.py +62 -0
tico/passes/remove_redundant_to_copy.py +86 -0
tico/passes/restore_linear.py +115 -0
tico/passes/segment_index_select.py +145 -0
tico/pt2_to_circle.py +105 -0
tico/serialize/__init__.py +1 -0
tico/serialize/circle_graph.py +319 -0
tico/serialize/circle_mapping.py +177 -0
tico/serialize/circle_serializer.py +240 -0
tico/serialize/operators/__init__.py +28 -0
tico/serialize/operators/hashable_opcode.py +43 -0
tico/serialize/operators/node_visitor.py +80 -0
tico/serialize/operators/op_abs.py +53 -0
tico/serialize/operators/op_add.py +69 -0
tico/serialize/operators/op_alias_copy.py +64 -0
tico/serialize/operators/op_any.py +150 -0
tico/serialize/operators/op_arange_start_step.py +61 -0
tico/serialize/operators/op_argmax.py +62 -0
tico/serialize/operators/op_avg_pool2d.py +192 -0
tico/serialize/operators/op_bmm.py +62 -0
tico/serialize/operators/op_cat.py +66 -0
tico/serialize/operators/op_clamp.py +126 -0
tico/serialize/operators/op_clone.py +71 -0
tico/serialize/operators/op_constant_pad_nd.py +72 -0
tico/serialize/operators/op_conv2d.py +186 -0
tico/serialize/operators/op_copy.py +164 -0
tico/serialize/operators/op_cos.py +59 -0
tico/serialize/operators/op_cumsum.py +95 -0
tico/serialize/operators/op_depthwise_conv2d.py +199 -0
tico/serialize/operators/op_dequantize_per_channel.py +82 -0
tico/serialize/operators/op_dequantize_per_tensor.py +64 -0
tico/serialize/operators/op_div.py +62 -0
tico/serialize/operators/op_embedding.py +60 -0
tico/serialize/operators/op_eq.py +64 -0
tico/serialize/operators/op_exp.py +60 -0
tico/serialize/operators/op_expand.py +91 -0
tico/serialize/operators/op_full.py +48 -0
tico/serialize/operators/op_full_like.py +55 -0
tico/serialize/operators/op_ge.py +54 -0
tico/serialize/operators/op_gelu.py +59 -0
tico/serialize/operators/op_gt.py +54 -0
tico/serialize/operators/op_index.py +82 -0
tico/serialize/operators/op_index_select.py +64 -0
tico/serialize/operators/op_instance_norm.py +91 -0
tico/serialize/operators/op_leaky_relu.py +60 -0
tico/serialize/operators/op_linear.py +70 -0
tico/serialize/operators/op_log.py +53 -0
tico/serialize/operators/op_log1p.py +86 -0
tico/serialize/operators/op_logical_and.py +63 -0
tico/serialize/operators/op_logical_not.py +62 -0
tico/serialize/operators/op_lt.py +61 -0
tico/serialize/operators/op_max_dim.py +70 -0
tico/serialize/operators/op_max_pool2d_with_indices.py +155 -0
tico/serialize/operators/op_maximum.py +53 -0
tico/serialize/operators/op_mean.py +66 -0
tico/serialize/operators/op_minimum.py +53 -0
tico/serialize/operators/op_mm.py +177 -0
tico/serialize/operators/op_mul.py +99 -0
tico/serialize/operators/op_ne.py +54 -0
tico/serialize/operators/op_neg.py +59 -0
tico/serialize/operators/op_permute.py +65 -0
tico/serialize/operators/op_pow.py +141 -0
tico/serialize/operators/op_prelu.py +54 -0
tico/serialize/operators/op_quantize_per_tensor.py +79 -0
tico/serialize/operators/op_reciprocal.py +64 -0
tico/serialize/operators/op_relu.py +53 -0
tico/serialize/operators/op_relu6.py +52 -0
tico/serialize/operators/op_repeat.py +100 -0
tico/serialize/operators/op_reshape.py +73 -0
tico/serialize/operators/op_resize_nearest_neighbor.py +70 -0
tico/serialize/operators/op_rsqrt.py +53 -0
tico/serialize/operators/op_scalar_tensor.py +51 -0
tico/serialize/operators/op_select_copy.py +65 -0
tico/serialize/operators/op_sigmoid.py +56 -0
tico/serialize/operators/op_sin.py +53 -0
tico/serialize/operators/op_slice.py +155 -0
tico/serialize/operators/op_softmax.py +100 -0
tico/serialize/operators/op_split_with_sizes.py +99 -0
tico/serialize/operators/op_sqrt.py +55 -0
tico/serialize/operators/op_squeeze.py +73 -0
tico/serialize/operators/op_sub.py +71 -0
tico/serialize/operators/op_sum.py +63 -0
tico/serialize/operators/op_tanh.py +54 -0
tico/serialize/operators/op_to_copy.py +105 -0
tico/serialize/operators/op_unsqueeze.py +66 -0
tico/serialize/operators/op_view.py +74 -0
tico/serialize/operators/op_where.py +82 -0
tico/serialize/operators/utils.py +94 -0
tico/serialize/pack.py +35 -0
tico/serialize/quant_param.py +42 -0
tico/utils/__init__.py +1 -0
tico/utils/convert.py +296 -0
tico/utils/define.py +35 -0
tico/utils/diff_graph.py +181 -0
tico/utils/errors.py +35 -0
tico/utils/graph.py +282 -0
tico/utils/logging.py +45 -0
tico/utils/model.py +37 -0
tico/utils/mx/__init__.py +1 -0
tico/utils/mx/elemwise_ops.py +267 -0
tico/utils/mx/formats.py +125 -0
tico/utils/mx/mx_ops.py +270 -0
tico/utils/padding.py +47 -0
tico/utils/passes.py +76 -0
tico/utils/register_custom_op.py +609 -0
tico/utils/serialize.py +42 -0
tico/utils/trace_decorators.py +101 -0
tico/utils/utils.py +406 -0
tico/utils/validate_args_kwargs.py +1149 -0
tico-0.1.0.dist-info/LICENSE +241 -0
tico-0.1.0.dist-info/METADATA +354 -0
tico-0.1.0.dist-info/RECORD +206 -0
tico-0.1.0.dist-info/WHEEL +5 -0
tico-0.1.0.dist-info/entry_points.txt +3 -0
tico-0.1.0.dist-info/top_level.txt +1 -0

tico/passes/lower_to_resize_nearest_neighbor.py ADDED Viewed

@@ -0,0 +1,235 @@
+# Copyright (c) 2025 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from typing import Optional, TYPE_CHECKING
+if TYPE_CHECKING:
+    import torch.fx
+import torch
+from torch.export import ExportedProgram
+from tico.serialize.circle_mapping import extract_shape
+from tico.utils import logging
+from tico.utils.errors import NotYetSupportedError
+from tico.utils.graph import create_node
+from tico.utils.passes import PassBase, PassResult
+from tico.utils.trace_decorators import trace_graph_diff_on_pass
+from tico.utils.utils import is_target_node
+from tico.utils.validate_args_kwargs import IndexArgs, UpsampleNearest2DVecArgs
+@trace_graph_diff_on_pass
+class LowerToResizeNearestNeighbor(PassBase):
+    """
+    This pass lowers `aten.index` and `aten.upsample_nearest2d.vec` to `circle_custom.resize_nearest_neighbor` when it is possible.
+    Until torch 2.7, `torch.nn.functional.interpolate` is converted to `aten.index` op.
+        [BEFORE PASS]
+        input - aten.index - output
+        [AFTER PASS]
+        input - aten.permute(NCHW_to_NHWC) - circle_custom.resize_nearest_neighbor - aten.permute(NHWC_to_NCHW) - output
+    Since torch 2.8, `torch.nn.functional.interpolate` is converted to aten.upsample_nearest2d.vec` op.
+        [BEFORE PASS]
+        input - aten.upsample_nearest2d.vec - output
+        [AFTER PASS]
+        input - aten.permute(NCHW_to_NHWC) - circle_custom.resize_nearest_neighbor - aten.permute(NHWC_to_NCHW) - output
+    """
+    def __init__(self):
+        super().__init__()
+    def convert_index_to_resize_nearest_neighbor(
+        self, exported_program, node
+    ) -> Optional[torch.fx.Node]:
+        graph_module = exported_program.graph_module
+        graph = graph_module.graph
+        args = IndexArgs(*node.args, **node.kwargs)
+        input_tensor = args.input
+        indices = args.indices
+        # Only support 4-D tensor
+        if len(indices) != 4:
+            return None
+        # indices = [None, None, H index, W index]
+        N, C, H, W = indices
+        if N != None or C != None:
+            return None
+        if not isinstance(H, torch.fx.Node):
+            return None
+        if not isinstance(W, torch.fx.Node):
+            return None
+        constants_dict = exported_program.constants
+        if (H.name not in constants_dict) or (W.name not in constants_dict):
+            return None
+        H_index, W_index = constants_dict[H.name], constants_dict[W.name]
+        input_tensor_shape = extract_shape(input_tensor)
+        input_tensor_H, input_tensor_W = (
+            input_tensor_shape[2],
+            input_tensor_shape[3],
+        )
+        if H_index.size()[0] % input_tensor_H != 0:
+            return None
+        scale_factor = int(H_index.size()[0] / input_tensor_H)
+        # H and W should be resized with same ratio.
+        if scale_factor != W_index.size()[0] / input_tensor_W:
+            return None
+        expected_H_index = []
+        expected_W_index = []
+        # Please refer to above `_prop_tensor_constant1` constant in the example.
+        for i in range(input_tensor_H):
+            expected_H_index += [[i]] * scale_factor
+        # Please refer to above `_prop_tensor_constant0` constant in the example.
+        for i in range(input_tensor_W):
+            expected_W_index += [i] * scale_factor
+        if not torch.all(
+            torch.eq(H_index, torch.tensor(expected_H_index))
+        ) or not torch.all(torch.eq(W_index, torch.tensor(expected_W_index))):
+            return None
+        expected_shape = [
+            input_tensor_shape[0],
+            input_tensor_shape[1],
+            len(expected_H_index),
+            len(expected_W_index),
+        ]
+        assert expected_shape == list(extract_shape(node))
+        with graph.inserting_before(node):
+            nchw_to_nhwc = create_node(
+                graph,
+                torch.ops.aten.permute.default,
+                args=(input_tensor, [0, 2, 3, 1]),
+                origin=input_tensor,
+            )
+            resize_nearest_neighbor = create_node(
+                graph,
+                torch.ops.circle_custom.resize_nearest_neighbor,
+                args=(nchw_to_nhwc, [len(expected_H_index), len(expected_W_index)]),
+                origin=node,
+            )
+            nhwc_to_nchw = create_node(
+                graph,
+                torch.ops.aten.permute.default,
+                args=(resize_nearest_neighbor, [0, 3, 1, 2]),
+            )
+            node.replace_all_uses_with(nhwc_to_nchw, propagate_meta=True)
+        return resize_nearest_neighbor
+    def convert_upsample_nearest2d_to_resize_nearest_neighbor(
+        self, exported_program, node
+    ) -> Optional[torch.fx.Node]:
+        graph_module = exported_program.graph_module
+        graph = graph_module.graph
+        args = UpsampleNearest2DVecArgs(*node.args, **node.kwargs)
+        input_tensor = args.input
+        output_size = args.output_size
+        scale_factors = args.scale_factors
+        input_tensor_shape = extract_shape(input_tensor)
+        input_tensor_H, input_tensor_W = (
+            input_tensor_shape[2],
+            input_tensor_shape[3],
+        )
+        if output_size is not None:
+            raise NotYetSupportedError("output_size is not supported yet")
+        if scale_factors is None:
+            raise NotYetSupportedError("scale_factors is None")
+        # TODO Support output_size case. Currently only scale_factors case is supported.
+        assert (
+            isinstance(scale_factors[0], float)
+            and isinstance(scale_factors[1], float)
+            and scale_factors[0] > 0
+            and scale_factors[1] > 0
+        )
+        def close_enough(x, y, epsilon=1e-10):
+            return abs(x - y) < epsilon
+        expected_H = int(input_tensor_H * scale_factors[0])
+        if not close_enough(expected_H, input_tensor_H * scale_factors[0]):
+            raise NotYetSupportedError(
+                f"Cannot support input_tensor_H ({input_tensor_H}) with scaling factor ({scale_factors[0]})"
+            )
+        expected_W = int(input_tensor_W * scale_factors[1])
+        if not close_enough(expected_W, input_tensor_W * scale_factors[1]):
+            raise NotYetSupportedError(
+                f"Cannot support input_tensor_W ({input_tensor_W}) with scaling factor ({scale_factors[1]})"
+            )
+        with graph.inserting_before(node):
+            nchw_to_nhwc = create_node(
+                graph,
+                torch.ops.aten.permute.default,
+                args=(input_tensor, [0, 2, 3, 1]),
+                origin=input_tensor,
+            )
+            resize_nearest_neighbor = create_node(
+                graph,
+                torch.ops.circle_custom.resize_nearest_neighbor,
+                args=(nchw_to_nhwc, [expected_H, expected_W]),
+                origin=node,
+            )
+            nhwc_to_nchw = create_node(
+                graph,
+                torch.ops.aten.permute.default,
+                args=(resize_nearest_neighbor, [0, 3, 1, 2]),
+            )
+            node.replace_all_uses_with(nhwc_to_nchw, propagate_meta=True)
+            return resize_nearest_neighbor
+    def call(self, exported_program: ExportedProgram) -> PassResult:
+        logger = logging.getLogger(__name__)
+        modified = False
+        graph_module = exported_program.graph_module
+        graph = graph_module.graph
+        for node in graph.nodes:
+            if not is_target_node(
+                node,
+                [torch.ops.aten.index.Tensor, torch.ops.aten.upsample_nearest2d.vec],
+            ):
+                continue
+            resize_nearest_neighbor = None
+            if node.target == torch.ops.aten.index.Tensor:
+                resize_nearest_neighbor = self.convert_index_to_resize_nearest_neighbor(
+                    exported_program, node
+                )
+            elif node.target == torch.ops.aten.upsample_nearest2d.vec:
+                resize_nearest_neighbor = (
+                    self.convert_upsample_nearest2d_to_resize_nearest_neighbor(
+                        exported_program, node
+                    )
+                )
+            if resize_nearest_neighbor:
+                modified = True
+                logger.debug(
+                    f"{node.name} is replaced with {resize_nearest_neighbor.name} operator"
+                )
+        graph.eliminate_dead_code()
+        graph.lint()
+        graph_module.recompile()
+        return PassResult(modified)

tico/passes/lower_to_slice.py ADDED Viewed

@@ -0,0 +1,230 @@
+# Copyright (c) 2025 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from typing import TYPE_CHECKING
+if TYPE_CHECKING:
+    import torch.fx
+import torch
+from torch._export.utils import (
+    get_buffer,
+    get_lifted_tensor_constant,
+    get_param,
+    is_buffer,
+    is_lifted_tensor_constant,
+    is_param,
+)
+from torch.export import ExportedProgram
+from tico.passes import ops
+from tico.serialize.circle_graph import extract_shape
+from tico.utils import logging
+from tico.utils.graph import create_node, is_single_value_tensor
+from tico.utils.passes import PassBase, PassResult
+from tico.utils.trace_decorators import trace_const_diff_on_pass
+from tico.utils.utils import is_target_node
+from tico.utils.validate_args_kwargs import IndexSelectArgs, SelectCopyIntArgs
+def passes():
+    """
+    This pass lowers aten.ops.select/selct_copy.int to aten.ops.slice.
+    We support only when it is index in args, which is a constant tensor.
+    Since the index in node'args isn't constant tensor, we can't support converting the below op list yet.
+    TODO Support below with const indices
+    - torch.ops.aten.embedding.default
+    - torch.ops.aten.index.Tensor
+    """
+    return [
+        LowerSelectCopyToSlice(),
+        LowerIndexSelectToSlice(),
+    ]
+@trace_const_diff_on_pass
+class LowerSelectCopyToSlice(PassBase):
+    """
+    [before]
+            input
+                |
+            select (tensor, dim, *index)
+                |
+            output
+    [after]
+            input
+                |
+            slice (input=tensor, dim=dim, start=index, end=index+1, step=1)
+                |
+            reshape (input=slice_copy, size=select_shape)
+                |
+            output
+    """
+    def __init__(self):
+        super().__init__()
+    def call(self, exported_program: ExportedProgram) -> PassResult:
+        logger = logging.getLogger(__name__)
+        graph_module = exported_program.graph_module
+        graph = graph_module.graph
+        modified = False
+        for node in graph.nodes:
+            if not is_target_node(node, ops.aten.select):
+                continue
+            args = SelectCopyIntArgs(*node.args, **node.kwargs)
+            input = args.input
+            dim = args.dim
+            index = args.index
+            input_shape = extract_shape(input)
+            if dim < 0:
+                dim = dim % len(input_shape)
+            start = index
+            end = index + 1
+            step = 1
+            slice_copy_args = (input, dim, start, end, step)
+            with graph.inserting_after(node):
+                # slice
+                slice_node = create_node(
+                    graph,
+                    torch.ops.aten.slice.Tensor,
+                    args=slice_copy_args,
+                    origin=node,
+                )
+                node_shape = extract_shape(node)
+            with graph.inserting_after(slice_node):
+                # reshape
+                reshape_args = (slice_node, list(node_shape))
+                reshape_node = create_node(
+                    graph,
+                    torch.ops.aten.reshape.default,
+                    args=reshape_args,
+                )
+                node.replace_all_uses_with(reshape_node, propagate_meta=True)
+            modified = True
+            logger.debug(
+                f"{node.name} is replaced with {slice_node.name} and {reshape_node.name} operators"
+            )
+        graph.eliminate_dead_code()
+        graph.lint()
+        graph_module.recompile()
+        return PassResult(modified)
+@trace_const_diff_on_pass
+class LowerIndexSelectToSlice(PassBase):
+    """
+    [before]
+            input
+                |
+            index_select.default  (tensor, dim, *index)
+                |
+            output
+    [after]
+            input
+                |
+            slice (input=tensor, dim=dim, start=index, end=index+1, step=1)
+                |
+            reshape (input=slice_copy, size=select_shape)
+                |
+            output
+    """
+    def __init__(self):
+        super().__init__()
+    def call(self, exported_program: ExportedProgram) -> PassResult:
+        logger = logging.getLogger(__name__)
+        graph_module = exported_program.graph_module
+        graph = graph_module.graph
+        modified = False
+        for node in graph.nodes:
+            if not is_target_node(node, ops.aten.index_select):
+                continue
+            args = IndexSelectArgs(*node.args, **node.kwargs)
+            input = args.input
+            dim = args.dim
+            index = args.index
+            input_shape = extract_shape(input)
+            if dim < 0:
+                dim = dim % len(input_shape)
+            if isinstance(index, torch.fx.Node):
+                if is_lifted_tensor_constant(exported_program, index):
+                    index = get_lifted_tensor_constant(exported_program, index)  # type: ignore[assignment]
+                elif is_param(exported_program, index):
+                    index = get_param(exported_program, index)  # type: ignore[assignment]
+                elif is_buffer(exported_program, index):
+                    index = get_buffer(exported_program, index)  # type: ignore[assignment]
+                else:
+                    continue
+            if not isinstance(index, torch.Tensor):
+                continue
+            if not is_single_value_tensor(index):
+                # need to be lowered by LowerIndexSelect pass
+                continue
+            index_int = index.item()  # convert scalar tensor to int
+            start = index_int
+            end = index_int + 1
+            step = 1
+            slice_copy_args = (input, dim, start, end, step)
+            with graph.inserting_after(node):
+                # slice
+                slice_node = create_node(
+                    graph,
+                    torch.ops.aten.slice.Tensor,
+                    args=slice_copy_args,
+                    origin=node,
+                )
+                node_shape = extract_shape(node)
+            with graph.inserting_after(slice_node):
+                # reshape
+                reshape_args = (slice_node, list(node_shape))
+                reshape_node = create_node(
+                    graph,
+                    torch.ops.aten.reshape.default,
+                    args=reshape_args,
+                )
+                node.replace_all_uses_with(reshape_node, propagate_meta=True)
+            modified = True
+            logger.debug(
+                f"{node.name} is replaced with {slice_node.name} and {reshape_node.name} operators"
+            )
+        graph.eliminate_dead_code()
+        graph.lint()
+        graph_module.recompile()
+        return PassResult(modified)

tico/passes/merge_consecutive_cat.py ADDED Viewed

@@ -0,0 +1,80 @@
+# Copyright (c) 2025 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from torch.export import ExportedProgram
+from tico.passes import ops
+from tico.utils import logging
+from tico.utils.passes import PassBase, PassResult
+from tico.utils.trace_decorators import trace_graph_diff_on_pass
+from tico.utils.utils import is_target_node
+from tico.utils.validate_args_kwargs import CatArgs
+@trace_graph_diff_on_pass
+class MergeConsecutiveCat(PassBase):
+    """
+    This pass merges consecutive `aten.cat` operators when they can be merged into single operator.
+    """
+    def __init__(self):
+        super().__init__()
+    def call(self, exported_program: ExportedProgram) -> PassResult:
+        logger = logging.getLogger(__name__)
+        graph_module = exported_program.graph_module
+        graph = graph_module.graph
+        modified = False
+        for cat in graph.nodes:
+            if not is_target_node(cat, ops.aten.cat):
+                continue
+            args = CatArgs(*cat.args, **cat.kwargs)  # type: ignore[arg-type]
+            inputs = args.tensors
+            dim = args.dim
+            new_inputs = []
+            for prev_cat in inputs:
+                new_inputs.append(prev_cat)
+                if not prev_cat.op == "call_function":
+                    continue
+                if not prev_cat.target in ops.aten.cat:
+                    continue
+                prev_args = CatArgs(*prev_cat.args, **prev_cat.kwargs)  # type: ignore[arg-type]
+                prev_inputs = prev_args.tensors
+                prev_dim = prev_args.dim
+                if not prev_dim == dim:
+                    continue
+                new_inputs.pop()
+                for prev_input in prev_inputs:
+                    new_inputs.append(prev_input)
+            if len(new_inputs) > len(inputs):
+                cat.args = (new_inputs, dim)
+                modified = True
+                logger.debug(
+                    f"Consecutive cat nodes before {cat.name} are merged into {cat.name}"
+                )
+        graph.eliminate_dead_code()
+        graph.lint()
+        graph_module.recompile()
+        return PassResult(modified)

tico/passes/ops.py ADDED Viewed

@@ -0,0 +1,78 @@
+# Copyright (c) 2025 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import torch
+"""
+This module contains Op lists used for finding the target Ops in passes.
+The module is introduced to reduce duplicate codes.
+It should be guaranteed that Ops in the same list have the same input/output signature.
+"""
+class AtenOps:
+    def __init__(self):
+        # In alphabetical order
+        self.add = [torch.ops.aten.add.Tensor]
+        self.alias = [torch.ops.aten.alias.default, torch.ops.aten.alias_copy.default]
+        self.cat = [torch.ops.aten.cat.default]
+        self.clamp = [torch.ops.aten.clamp.default, torch.ops.aten.clamp.Tensor]
+        self.clone = [torch.ops.aten.clone.default]
+        self.conv2d = [
+            torch.ops.aten.conv2d.default,
+            torch.ops.aten.conv2d.padding,
+        ]
+        self.conv1d = [
+            torch.ops.aten.conv1d.default,
+            torch.ops.aten.conv1d.padding,
+        ]
+        self.detach = [
+            torch.ops.aten.detach_.default,
+            torch.ops.aten.detach.default,
+        ]
+        self.expand = [
+            torch.ops.aten.expand.default,
+            torch.ops.aten.expand_copy.default,
+        ]
+        self.index_select = [torch.ops.aten.index_select.default]
+        self.mean = [torch.ops.aten.mean.dim]
+        self.mul_scalar = [torch.ops.aten.mul.Scalar]
+        self.mul_tensor = [torch.ops.aten.mul.Tensor]
+        self.permute = [torch.ops.aten.permute.default]
+        self.reshape = [torch.ops.aten.reshape.default]
+        self.select = [torch.ops.aten.select_copy.int, torch.ops.aten.select.int]
+        self.slice = [torch.ops.aten.slice.Tensor, torch.ops.aten.slice_copy.Tensor]
+        self.softmax = [
+            torch.ops.aten._softmax.default,
+            torch.ops.aten._safe_softmax.default,
+        ]
+        self.squeeze = [torch.ops.aten.squeeze.dims, torch.ops.aten.squeeze_copy.dims]
+        self.to_copy = [
+            torch.ops.aten._to_copy.default,
+            torch.ops.aten.to.dtype,
+            torch.ops.aten.to.dtype_layout,
+        ]
+        self.unsqueeze = [
+            torch.ops.aten.unsqueeze.default,
+            torch.ops.aten.unsqueeze_copy.default,
+        ]
+        self.view = [
+            torch.ops.aten.view,
+            torch.ops.aten.view.default,
+            torch.ops.aten.view_copy.default,
+        ]
+aten = AtenOps()

tico/passes/remove_nop.py ADDED Viewed

@@ -0,0 +1,84 @@
+# Copyright (c) 2025 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from typing import TYPE_CHECKING
+if TYPE_CHECKING:
+    import torch.fx
+import torch
+from torch.export import ExportedProgram
+from tico.passes import ops
+from tico.utils import logging
+from tico.utils.passes import PassBase, PassResult
+from tico.utils.trace_decorators import trace_graph_diff_on_pass
+from tico.utils.utils import is_target_node
+@trace_graph_diff_on_pass
+class RemoveNop(PassBase):
+    """
+    Let's remove noops by propagation.
+    """
+    target_ops = (
+        [
+            torch.ops.prims.view_of.default,
+        ]
+        + ops.aten.alias
+        + ops.aten.clone
+        + ops.aten.detach
+        + [torch.ops.aten.lift_fresh_copy.default]
+    )
+    def __init__(self):
+        super().__init__()
+    def call(self, exported_program: ExportedProgram) -> PassResult:
+        logger = logging.getLogger(__name__)
+        graph_module = exported_program.graph_module
+        graph = graph_module.graph
+        modified = False
+        for node in graph.nodes:
+            if not is_target_node(node, RemoveNop.target_ops):
+                continue
+            # TODO Consider memory format
+            if node.target in ops.aten.clone and "memory_format" in node.kwargs:
+                if node.kwargs["memory_format"] not in [
+                    torch.preserve_format,
+                    # Converting non-contiguous layout to contiguous only updates
+                    # strides of tensor. This is not visible on circle, so we can
+                    # safely ignore this operation.
+                    torch.contiguous_format,
+                ]:
+                    continue
+            assert len(node.args) == 1
+            src = node.args[0]
+            assert isinstance(src, torch.fx.Node)
+            with graph.inserting_after(node):
+                node.replace_all_uses_with(src, propagate_meta=False)
+            modified = True
+            logger.debug(f"{node.name} is replaced with {src}")
+        graph.eliminate_dead_code()
+        graph.lint()
+        graph_module.recompile()
+        return PassResult(modified)