PyPI - tico - Versions diffs - 0.1.0.dev250411__py3-none-any.whl - Mend

tico 0.1.0.dev250411__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (196) hide show

tico/__init__.py +31 -0
tico/config/__init__.py +4 -0
tico/config/base.py +37 -0
tico/config/factory.py +41 -0
tico/config/v1.py +35 -0
tico/experimental/__init__.py +1 -0
tico/experimental/quantization/__init__.py +1 -0
tico/experimental/quantization/algorithm/__init__.py +1 -0
tico/experimental/quantization/algorithm/gptq/__init__.py +1 -0
tico/experimental/quantization/algorithm/gptq/gptq.py +172 -0
tico/experimental/quantization/algorithm/gptq/quant.py +153 -0
tico/experimental/quantization/algorithm/gptq/quantizer.py +225 -0
tico/experimental/quantization/algorithm/gptq/utils.py +65 -0
tico/experimental/quantization/algorithm/pt2e/__init__.py +1 -0
tico/experimental/quantization/algorithm/pt2e/annotation/__init__.py +1 -0
tico/experimental/quantization/algorithm/pt2e/annotation/annotator.py +215 -0
tico/experimental/quantization/algorithm/pt2e/annotation/config.py +26 -0
tico/experimental/quantization/algorithm/pt2e/annotation/op/__init__.py +21 -0
tico/experimental/quantization/algorithm/pt2e/annotation/op/adaptive_avg_pool2d.py +65 -0
tico/experimental/quantization/algorithm/pt2e/annotation/op/add.py +57 -0
tico/experimental/quantization/algorithm/pt2e/annotation/op/conv2d.py +92 -0
tico/experimental/quantization/algorithm/pt2e/annotation/op/div.py +57 -0
tico/experimental/quantization/algorithm/pt2e/annotation/op/linear.py +94 -0
tico/experimental/quantization/algorithm/pt2e/annotation/op/mean.py +53 -0
tico/experimental/quantization/algorithm/pt2e/annotation/op/mul.py +57 -0
tico/experimental/quantization/algorithm/pt2e/annotation/op/relu6.py +53 -0
tico/experimental/quantization/algorithm/pt2e/annotation/op/rsqrt.py +53 -0
tico/experimental/quantization/algorithm/pt2e/annotation/op/sub.py +57 -0
tico/experimental/quantization/algorithm/pt2e/annotation/spec.py +47 -0
tico/experimental/quantization/algorithm/pt2e/annotation/utils.py +88 -0
tico/experimental/quantization/algorithm/pt2e/quantizer.py +78 -0
tico/experimental/quantization/algorithm/pt2e/transformation/__init__.py +1 -0
tico/experimental/quantization/algorithm/pt2e/transformation/convert_scalars_to_attrs.py +58 -0
tico/experimental/quantization/algorithm/pt2e/utils.py +138 -0
tico/experimental/quantization/algorithm/smoothquant/__init__.py +1 -0
tico/experimental/quantization/algorithm/smoothquant/observer.py +78 -0
tico/experimental/quantization/algorithm/smoothquant/quantizer.py +81 -0
tico/experimental/quantization/algorithm/smoothquant/smooth_quant.py +164 -0
tico/experimental/quantization/config.py +68 -0
tico/experimental/quantization/evaluation/__init__.py +1 -0
tico/experimental/quantization/evaluation/backend.py +20 -0
tico/experimental/quantization/evaluation/evaluate.py +223 -0
tico/experimental/quantization/evaluation/executor/__init__.py +1 -0
tico/experimental/quantization/evaluation/executor/backend_executor.py +54 -0
tico/experimental/quantization/evaluation/executor/circle_executor.py +75 -0
tico/experimental/quantization/evaluation/executor/triv24_executor.py +128 -0
tico/experimental/quantization/evaluation/metric.py +109 -0
tico/experimental/quantization/evaluation/utils.py +185 -0
tico/experimental/quantization/passes/__init__.py +1 -0
tico/experimental/quantization/passes/fold_quant_ops.py +97 -0
tico/experimental/quantization/passes/insert_quantize_on_dtype_mismatch.py +289 -0
tico/experimental/quantization/passes/propagate_qparam_backward.py +91 -0
tico/experimental/quantization/passes/propagate_qparam_forward.py +141 -0
tico/experimental/quantization/passes/remove_weight_dequant_op.py +168 -0
tico/experimental/quantization/public_interface.py +108 -0
tico/experimental/quantization/quantizer.py +71 -0
tico/interpreter/__init__.py +1 -0
tico/interpreter/infer.py +116 -0
tico/interpreter/interpreter.py +93 -0
tico/passes/__init__.py +1 -0
tico/passes/cast_aten_where_arg_type.py +185 -0
tico/passes/cast_mixed_type_args.py +186 -0
tico/passes/const_prop_pass.py +307 -0
tico/passes/convert_conv1d_to_conv2d.py +151 -0
tico/passes/convert_layout_op_to_reshape.py +84 -0
tico/passes/convert_repeat_to_expand_copy.py +90 -0
tico/passes/convert_to_relu6.py +180 -0
tico/passes/decompose_addmm.py +127 -0
tico/passes/decompose_batch_norm.py +198 -0
tico/passes/decompose_fake_quantize.py +126 -0
tico/passes/decompose_fake_quantize_tensor_qparams.py +270 -0
tico/passes/decompose_group_norm.py +258 -0
tico/passes/decompose_grouped_conv2d.py +202 -0
tico/passes/decompose_slice_scatter.py +167 -0
tico/passes/extract_dtype_kwargs.py +121 -0
tico/passes/fill_meta_val.py +57 -0
tico/passes/fuse_redundant_reshape_to_mean.py +102 -0
tico/passes/legalize_causal_mask_value.py +113 -0
tico/passes/legalize_predefined_layout_operators.py +383 -0
tico/passes/lower_pow2_to_mul.py +75 -0
tico/passes/lower_to_resize_nearest_neighbor.py +249 -0
tico/passes/lower_to_slice.py +112 -0
tico/passes/merge_consecutive_cat.py +82 -0
tico/passes/ops.py +75 -0
tico/passes/remove_nop.py +85 -0
tico/passes/remove_redundant_assert_nodes.py +50 -0
tico/passes/remove_redundant_expand.py +70 -0
tico/passes/remove_redundant_permute.py +102 -0
tico/passes/remove_redundant_reshape.py +431 -0
tico/passes/remove_redundant_slice.py +64 -0
tico/passes/remove_redundant_to_copy.py +84 -0
tico/passes/restore_linear.py +113 -0
tico/passes/segment_index_select.py +143 -0
tico/pt2_to_circle.py +101 -0
tico/serialize/__init__.py +1 -0
tico/serialize/circle_graph.py +264 -0
tico/serialize/circle_mapping.py +177 -0
tico/serialize/circle_serializer.py +232 -0
tico/serialize/operators/__init__.py +28 -0
tico/serialize/operators/hashable_opcode.py +43 -0
tico/serialize/operators/node_visitor.py +80 -0
tico/serialize/operators/op_add.py +69 -0
tico/serialize/operators/op_alias_copy.py +64 -0
tico/serialize/operators/op_any.py +142 -0
tico/serialize/operators/op_arange_start_step.py +61 -0
tico/serialize/operators/op_argmax.py +62 -0
tico/serialize/operators/op_avg_pool2d.py +112 -0
tico/serialize/operators/op_bmm.py +62 -0
tico/serialize/operators/op_cat.py +66 -0
tico/serialize/operators/op_clamp.py +123 -0
tico/serialize/operators/op_clone.py +71 -0
tico/serialize/operators/op_constant_pad_nd.py +72 -0
tico/serialize/operators/op_conv2d.py +181 -0
tico/serialize/operators/op_copy.py +162 -0
tico/serialize/operators/op_cos.py +59 -0
tico/serialize/operators/op_cumsum.py +92 -0
tico/serialize/operators/op_depthwise_conv2d.py +198 -0
tico/serialize/operators/op_dequantize_per_channel.py +82 -0
tico/serialize/operators/op_dequantize_per_tensor.py +64 -0
tico/serialize/operators/op_div.py +62 -0
tico/serialize/operators/op_embedding.py +60 -0
tico/serialize/operators/op_eq.py +64 -0
tico/serialize/operators/op_exp.py +60 -0
tico/serialize/operators/op_expand.py +91 -0
tico/serialize/operators/op_full.py +48 -0
tico/serialize/operators/op_full_like.py +55 -0
tico/serialize/operators/op_ge.py +54 -0
tico/serialize/operators/op_gelu.py +59 -0
tico/serialize/operators/op_gt.py +54 -0
tico/serialize/operators/op_index.py +82 -0
tico/serialize/operators/op_index_select.py +64 -0
tico/serialize/operators/op_instance_norm.py +91 -0
tico/serialize/operators/op_linear.py +70 -0
tico/serialize/operators/op_log.py +53 -0
tico/serialize/operators/op_log1p.py +83 -0
tico/serialize/operators/op_logical_and.py +63 -0
tico/serialize/operators/op_logical_not.py +62 -0
tico/serialize/operators/op_lt.py +61 -0
tico/serialize/operators/op_max_pool2d_with_indices.py +140 -0
tico/serialize/operators/op_maximum.py +53 -0
tico/serialize/operators/op_mean.py +66 -0
tico/serialize/operators/op_minimum.py +53 -0
tico/serialize/operators/op_mm.py +174 -0
tico/serialize/operators/op_mul.py +99 -0
tico/serialize/operators/op_ne.py +54 -0
tico/serialize/operators/op_neg.py +59 -0
tico/serialize/operators/op_permute.py +65 -0
tico/serialize/operators/op_pow.py +138 -0
tico/serialize/operators/op_prelu.py +54 -0
tico/serialize/operators/op_quantize_per_tensor.py +79 -0
tico/serialize/operators/op_reciprocal.py +64 -0
tico/serialize/operators/op_relu.py +53 -0
tico/serialize/operators/op_relu6.py +52 -0
tico/serialize/operators/op_repeat.py +99 -0
tico/serialize/operators/op_reshape.py +73 -0
tico/serialize/operators/op_resize_nearest_neighbor.py +70 -0
tico/serialize/operators/op_rsqrt.py +53 -0
tico/serialize/operators/op_scalar_tensor.py +51 -0
tico/serialize/operators/op_select_copy.py +65 -0
tico/serialize/operators/op_sigmoid.py +56 -0
tico/serialize/operators/op_sin.py +53 -0
tico/serialize/operators/op_slice.py +155 -0
tico/serialize/operators/op_softmax.py +100 -0
tico/serialize/operators/op_split_with_sizes.py +96 -0
tico/serialize/operators/op_sqrt.py +55 -0
tico/serialize/operators/op_squeeze.py +73 -0
tico/serialize/operators/op_sub.py +71 -0
tico/serialize/operators/op_sum.py +63 -0
tico/serialize/operators/op_tanh.py +54 -0
tico/serialize/operators/op_to_copy.py +105 -0
tico/serialize/operators/op_unsqueeze.py +66 -0
tico/serialize/operators/op_view.py +74 -0
tico/serialize/operators/op_where.py +82 -0
tico/serialize/operators/utils.py +51 -0
tico/serialize/pack.py +35 -0
tico/serialize/quant_param.py +42 -0
tico/utils/__init__.py +1 -0
tico/utils/convert.py +292 -0
tico/utils/define.py +35 -0
tico/utils/diff_graph.py +181 -0
tico/utils/errors.py +35 -0
tico/utils/graph.py +200 -0
tico/utils/logging.py +45 -0
tico/utils/model.py +37 -0
tico/utils/padding.py +47 -0
tico/utils/passes.py +76 -0
tico/utils/register_custom_op.py +562 -0
tico/utils/trace_decorators.py +101 -0
tico/utils/utils.py +314 -0
tico/utils/validate_args_kwargs.py +1114 -0
tico-0.1.0.dev250411.dist-info/LICENSE +241 -0
tico-0.1.0.dev250411.dist-info/METADATA +17 -0
tico-0.1.0.dev250411.dist-info/RECORD +196 -0
tico-0.1.0.dev250411.dist-info/WHEEL +5 -0
tico-0.1.0.dev250411.dist-info/entry_points.txt +3 -0
tico-0.1.0.dev250411.dist-info/top_level.txt +1 -0

tico/passes/lower_to_resize_nearest_neighbor.py ADDED Viewed

@@ -0,0 +1,249 @@
+# Copyright (c) 2025 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from typing import TYPE_CHECKING
+if TYPE_CHECKING:
+    import torch.fx
+from typing import Optional
+import torch
+from torch.export import ExportedProgram
+from tico.serialize.circle_mapping import extract_shape
+from tico.utils import logging
+from tico.utils.errors import NotYetSupportedError
+from tico.utils.passes import PassBase, PassResult
+from tico.utils.trace_decorators import trace_graph_diff_on_pass
+from tico.utils.validate_args_kwargs import IndexArgs, UpsampleNearest2DVecArgs
+@trace_graph_diff_on_pass
+class LowerToResizeNearestNeighbor(PassBase):
+    """
+    This pass lowers `aten.index` and `aten.upsample_nearest2d.vec` to `circle_custom.resize_nearest_neighbor` when it is possible.
+    Until torch 2.7, `torch.nn.functional.interpolate` is converted to `aten.index` op.
+        [EXAMPLE]
+        class InterpolateDouble(torch.nn.Module):
+            def __init__(self):
+                super().__init__()
+            def forward(self, x):
+                return torch.nn.functional.interpolate(x, scale_factor=2.0, mode="nearest")
+            def get_example_inputs(self):
+                return (torch.randn(1, 2, 3, 4),)
+        [EXPORTED GRAPH]
+        [constants]
+            _prop_tensor_constant0 = tensor([0, 0, 1, 1, 2, 2, 3, 3]
+            _prop_tensor_constant1 = tensor([[0], [0], [1], [1], [2], [2]])
+        [graph]
+            %_prop_tensor_constant0 : [num_users=1] = placeholder[target=_prop_tensor_constant0]
+            %_prop_tensor_constant1 : [num_users=1] = placeholder[target=_prop_tensor_constant1]
+            %x : [num_users=1] = placeholder[target=x]
+            %_to_copy : [num_users=1] = call_function[target=torch.ops.aten._to_copy.default](args = (%x,), kwargs = {dtype: torch.float32})
+            %index : [num_users=1] = call_function[target=torch.ops.aten.index.Tensor](args = (%_to_copy, [None, None, %_prop_tensor_constant1, %_prop_tensor_constant0]), kwargs = {})
+            %_to_copy_3 : [num_users=1] = call_function[target=torch.ops.aten._to_copy.default](args = (%index,), kwargs = {dtype: torch.float32})
+            return (_to_copy_3,)
+        [BEFORE PASS]
+        input - aten.index - output
+        [AFTER PASS]
+        input - aten.permute(NCHW_to_NHWC) - circle_custom.resize_nearest_neighbor - aten.permute(NHWC_to_NCHW) - output
+    Since torch 2.8, `torch.nn.functional.interpolate` is converted to aten.upsample_nearest2d.vec` op.
+    """
+    def __init__(self):
+        super().__init__()
+    def convert_index_to_resize_nearest_neighbor(
+        self, exported_program, node
+    ) -> Optional[torch.fx.Node]:
+        graph_module = exported_program.graph_module
+        graph = graph_module.graph
+        args = IndexArgs(*node.args, **node.kwargs)
+        input_tensor = args.input
+        indices = args.indices
+        # Only support 4-D tensor
+        if len(indices) != 4:
+            return None
+        # indices = [None, None, H index, W index]
+        N, C, H, W = indices
+        if N != None or C != None:
+            return None
+        if not isinstance(H, torch.fx.Node):
+            return None
+        if not isinstance(W, torch.fx.Node):
+            return None
+        constants_dict = exported_program.constants
+        if (H.name not in constants_dict) or (W.name not in constants_dict):
+            return None
+        H_index, W_index = constants_dict[H.name], constants_dict[W.name]
+        input_tensor_shape = extract_shape(input_tensor)
+        input_tensor_H, input_tensor_W = (
+            input_tensor_shape[2],
+            input_tensor_shape[3],
+        )
+        if H_index.size()[0] % input_tensor_H != 0:
+            return None
+        scale_factor = int(H_index.size()[0] / input_tensor_H)
+        # H and W should be resized with same ratio.
+        if scale_factor != W_index.size()[0] / input_tensor_W:
+            return None
+        expected_H_index = []
+        expected_W_index = []
+        # Please refer to above `_prop_tensor_constant1` constant in the example.
+        for i in range(input_tensor_H):
+            expected_H_index += [[i]] * scale_factor
+        # Please refer to above `_prop_tensor_constant0` constant in the example.
+        for i in range(input_tensor_W):
+            expected_W_index += [i] * scale_factor
+        if not torch.all(
+            torch.eq(H_index, torch.tensor(expected_H_index))
+        ) or not torch.all(torch.eq(W_index, torch.tensor(expected_W_index))):
+            return None
+        expected_shape = [
+            input_tensor_shape[0],
+            input_tensor_shape[1],
+            len(expected_H_index),
+            len(expected_W_index),
+        ]
+        assert expected_shape == list(extract_shape(node))
+        with graph.inserting_before(node):
+            nchw_to_nhwc = graph.call_function(
+                torch.ops.aten.permute.default, args=(input_tensor, [0, 2, 3, 1])
+            )
+            resize_nearest_neighbor = graph.call_function(
+                torch.ops.circle_custom.resize_nearest_neighbor,
+                args=(nchw_to_nhwc, [len(expected_H_index), len(expected_W_index)]),
+            )
+            nhwc_to_nchw = graph.call_function(
+                torch.ops.aten.permute.default,
+                args=(resize_nearest_neighbor, [0, 3, 1, 2]),
+            )
+            # Not set meta for propagating replacing node's meta.
+            node.replace_all_uses_with(nhwc_to_nchw, propagate_meta=True)
+        return resize_nearest_neighbor
+    def convert_upsample_nearest2d_to_resize_nearest_neighbor(
+        self, exported_program, node
+    ) -> Optional[torch.fx.Node]:
+        graph_module = exported_program.graph_module
+        graph = graph_module.graph
+        args = UpsampleNearest2DVecArgs(*node.args, **node.kwargs)
+        input_tensor = args.input
+        output_size = args.output_size
+        scale_factors = args.scale_factors
+        input_tensor_shape = extract_shape(input_tensor)
+        input_tensor_H, input_tensor_W = (
+            input_tensor_shape[2],
+            input_tensor_shape[3],
+        )
+        if output_size is not None:
+            raise NotYetSupportedError("output_size is not supported yet")
+        if scale_factors is None:
+            raise NotYetSupportedError("scale_factors is None")
+        # TODO Support output_size case. Currently only scale_factors case is supported.
+        assert (
+            isinstance(scale_factors[0], float)
+            and isinstance(scale_factors[1], float)
+            and scale_factors[0] > 0
+            and scale_factors[1] > 0
+        )
+        def close_enough(x, y, epsilon=1e-10):
+            return abs(x - y) < epsilon
+        expected_H = int(input_tensor_H * scale_factors[0])
+        if not close_enough(expected_H, input_tensor_H * scale_factors[0]):
+            raise NotYetSupportedError(
+                f"Cannot support input_tensor_H ({input_tensor_H}) with scaling factor ({scale_factors[0]})"
+            )
+        expected_W = int(input_tensor_W * scale_factors[1])
+        if not close_enough(expected_W, input_tensor_W * scale_factors[1]):
+            raise NotYetSupportedError(
+                f"Cannot support input_tensor_W ({input_tensor_W}) with scaling factor ({scale_factors[1]})"
+            )
+        with graph.inserting_before(node):
+            nchw_to_nhwc = graph.call_function(
+                torch.ops.aten.permute.default, args=(input_tensor, [0, 2, 3, 1])
+            )
+            resize_nearest_neighbor = graph.call_function(
+                torch.ops.circle_custom.resize_nearest_neighbor,
+                args=(nchw_to_nhwc, [expected_H, expected_W]),
+            )
+            nhwc_to_nchw = graph.call_function(
+                torch.ops.aten.permute.default,
+                args=(resize_nearest_neighbor, [0, 3, 1, 2]),
+            )
+            # Not set meta for propagating replacing node's meta.
+            node.replace_all_uses_with(nhwc_to_nchw, propagate_meta=True)
+            return resize_nearest_neighbor
+    def call(self, exported_program: ExportedProgram) -> PassResult:
+        logger = logging.getLogger(__name__)
+        modified = False
+        graph_module = exported_program.graph_module
+        graph = graph_module.graph
+        for node in graph.nodes:
+            if not node.op == "call_function":
+                continue
+            if node.target not in [
+                torch.ops.aten.index.Tensor,
+                torch.ops.aten.upsample_nearest2d.vec,
+            ]:
+                continue
+            resize_nearest_neighbor = None
+            if node.target == torch.ops.aten.index.Tensor:
+                resize_nearest_neighbor = self.convert_index_to_resize_nearest_neighbor(
+                    exported_program, node
+                )
+            elif node.target == torch.ops.aten.upsample_nearest2d.vec:
+                resize_nearest_neighbor = (
+                    self.convert_upsample_nearest2d_to_resize_nearest_neighbor(
+                        exported_program, node
+                    )
+                )
+            if resize_nearest_neighbor:
+                modified = True
+                logger.debug(
+                    f"{node.name} is replaced with {resize_nearest_neighbor.name} operator"
+                )
+        graph.eliminate_dead_code()
+        graph.lint()
+        graph_module.recompile()
+        return PassResult(modified)

tico/passes/lower_to_slice.py ADDED Viewed

@@ -0,0 +1,112 @@
+# Copyright (c) 2025 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from typing import TYPE_CHECKING
+if TYPE_CHECKING:
+    import torch.fx
+import torch
+from torch.export import ExportedProgram
+from tico.passes import ops
+from tico.serialize.circle_graph import extract_shape
+from tico.utils import logging
+from tico.utils.passes import PassBase, PassResult
+from tico.utils.trace_decorators import trace_const_diff_on_pass
+from tico.utils.validate_args_kwargs import SelectCopyIntArgs
+@trace_const_diff_on_pass
+class LowerToSlice(PassBase):
+    """
+    This pass lowers aten.ops.select/selct_copy.int to aten.ops.slice.
+    We support only when it is index in args, which is a constant tensor.
+    Since the index in node'args isn't constant tensor, we can't support converting the below op list yet.
+    - torch.ops.aten.index_select.default
+    - torch.ops.aten.embedding.default
+    - torch.ops.aten.index.Tensor
+    [before]
+            input (tensor, dim, *index)
+                |
+            select
+                |
+            output
+    [after]
+            input (tensor, dim, *index)
+                |
+            slice (input=tensor, dim=dim, start=index, end=index+1, step=1)
+                |
+            reshape (input=slice_copy, size=select_shape)
+                |
+            output
+    """
+    def __init__(self):
+        super().__init__()
+    def call(self, exported_program: ExportedProgram) -> PassResult:
+        logger = logging.getLogger(__name__)
+        graph_module = exported_program.graph_module
+        graph = graph_module.graph
+        modified = False
+        for node in graph.nodes:
+            if not node.op == "call_function":
+                continue
+            if not node.target in ops.aten.select:
+                continue
+            args = SelectCopyIntArgs(*node.args, **node.kwargs)
+            input = args.input
+            dim = args.dim
+            index = args.index
+            input_shape = extract_shape(input)
+            if dim < 0:
+                dim = dim % len(input_shape)
+            start = index
+            end = index + 1
+            step = 1
+            slice_copy_args = (input, dim, start, end, step)
+            with graph.inserting_after(node):
+                # slice
+                slice_node = graph.call_function(
+                    torch.ops.aten.slice.Tensor, args=slice_copy_args
+                )
+                node_shape = extract_shape(node)
+            with graph.inserting_after(slice_node):
+                # reshape
+                reshape_args = (slice_node, list(node_shape))
+                reshape_node = graph.call_function(
+                    torch.ops.aten.reshape.default, args=reshape_args
+                )
+                node.replace_all_uses_with(reshape_node, propagate_meta=False)
+            modified = True
+            logger.debug(
+                f"{node.name} is replaced with {slice_node.name} and {reshape_node.name} operators"
+            )
+        graph.eliminate_dead_code()
+        graph.lint()
+        graph_module.recompile()
+        return PassResult(modified)

tico/passes/merge_consecutive_cat.py ADDED Viewed

@@ -0,0 +1,82 @@
+# Copyright (c) 2025 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from torch.export import ExportedProgram
+from tico.passes import ops
+from tico.utils import logging
+from tico.utils.passes import PassBase, PassResult
+from tico.utils.trace_decorators import trace_graph_diff_on_pass
+from tico.utils.validate_args_kwargs import CatArgs
+@trace_graph_diff_on_pass
+class MergeConsecutiveCat(PassBase):
+    """
+    This pass merges consecutive `aten.cat` operators when they can be merged into single operator.
+    """
+    def __init__(self):
+        super().__init__()
+    def call(self, exported_program: ExportedProgram) -> PassResult:
+        logger = logging.getLogger(__name__)
+        graph_module = exported_program.graph_module
+        graph = graph_module.graph
+        modified = False
+        for cat in graph.nodes:
+            if not cat.op == "call_function":
+                continue
+            if not cat.target in ops.aten.cat:
+                continue
+            args = CatArgs(*cat.args, **cat.kwargs)  # type: ignore[arg-type]
+            inputs = args.tensors
+            dim = args.dim
+            new_inputs = []
+            for prev_cat in inputs:
+                new_inputs.append(prev_cat)
+                if not prev_cat.op == "call_function":
+                    continue
+                if not prev_cat.target in ops.aten.cat:
+                    continue
+                prev_args = CatArgs(*prev_cat.args, **prev_cat.kwargs)  # type: ignore[arg-type]
+                prev_inputs = prev_args.tensors
+                prev_dim = prev_args.dim
+                if not prev_dim == dim:
+                    continue
+                new_inputs.pop()
+                for prev_input in prev_inputs:
+                    new_inputs.append(prev_input)
+            if len(new_inputs) > len(inputs):
+                cat.args = (new_inputs, dim)
+                modified = True
+                logger.debug(
+                    f"Consecutive cat nodes before {cat.name} are merged into {cat.name}"
+                )
+        graph.eliminate_dead_code()
+        graph.lint()
+        graph_module.recompile()
+        return PassResult(modified)

tico/passes/ops.py ADDED Viewed

@@ -0,0 +1,75 @@
+# Copyright (c) 2025 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import torch
+"""
+This module contains Op lists used for finding the target Ops in passes.
+The module is introduced to reduce duplicate codes.
+It should be guaranteed that Ops in the same list have the same input/output signature.
+"""
+class AtenOps:
+    def __init__(self):
+        # In alphabetical order
+        self.add = [torch.ops.aten.add.Tensor]
+        self.alias = [torch.ops.aten.alias.default, torch.ops.aten.alias_copy.default]
+        self.cat = [torch.ops.aten.cat.default]
+        self.clamp = [torch.ops.aten.clamp.default, torch.ops.aten.clamp.Tensor]
+        self.clone = [torch.ops.aten.clone.default]
+        self.conv2d = [
+            torch.ops.aten.conv2d.default,
+            torch.ops.aten.conv2d.padding,
+        ]
+        self.conv1d = [
+            torch.ops.aten.conv1d.default,
+            torch.ops.aten.conv1d.padding,
+        ]
+        self.detach = [
+            torch.ops.aten.detach_.default,
+            torch.ops.aten.detach.default,
+        ]
+        self.expand = [
+            torch.ops.aten.expand.default,
+            torch.ops.aten.expand_copy.default,
+        ]
+        self.index_select = [torch.ops.aten.index_select.default]
+        self.mean = [torch.ops.aten.mean.dim]
+        self.mul_scalar = [torch.ops.aten.mul.Scalar]
+        self.mul_tensor = [torch.ops.aten.mul.Tensor]
+        self.permute = [torch.ops.aten.permute.default]
+        self.reshape = [torch.ops.aten.reshape.default]
+        self.select = [torch.ops.aten.select_copy.int, torch.ops.aten.select.int]
+        self.slice = [torch.ops.aten.slice.Tensor, torch.ops.aten.slice_copy.Tensor]
+        self.softmax = [torch.ops.aten._softmax.default]
+        self.squeeze = [torch.ops.aten.squeeze.dims, torch.ops.aten.squeeze_copy.dims]
+        self.to_copy = [
+            torch.ops.aten._to_copy.default,
+            torch.ops.aten.to.dtype,
+            torch.ops.aten.to.dtype_layout,
+        ]
+        self.unsqueeze = [
+            torch.ops.aten.unsqueeze.default,
+            torch.ops.aten.unsqueeze_copy.default,
+        ]
+        self.view = [
+            torch.ops.aten.view,
+            torch.ops.aten.view.default,
+            torch.ops.aten.view_copy.default,
+        ]
+aten = AtenOps()

tico/passes/remove_nop.py ADDED Viewed

@@ -0,0 +1,85 @@
+# Copyright (c) 2025 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from typing import TYPE_CHECKING
+if TYPE_CHECKING:
+    import torch.fx
+import torch
+from torch.export import ExportedProgram
+from tico.passes import ops
+from tico.utils import logging
+from tico.utils.passes import PassBase, PassResult
+from tico.utils.trace_decorators import trace_graph_diff_on_pass
+@trace_graph_diff_on_pass
+class RemoveNop(PassBase):
+    """
+    Let's remove noops by propagation.
+    """
+    target_ops = (
+        [
+            torch.ops.prims.view_of.default,
+        ]
+        + ops.aten.alias
+        + ops.aten.clone
+        + ops.aten.detach
+        + [torch.ops.aten.lift_fresh_copy.default]
+    )
+    def __init__(self):
+        super().__init__()
+    def call(self, exported_program: ExportedProgram) -> PassResult:
+        logger = logging.getLogger(__name__)
+        graph_module = exported_program.graph_module
+        graph = graph_module.graph
+        modified = False
+        for node in graph.nodes:
+            if not node.op == "call_function":
+                continue
+            if not node.target in RemoveNop.target_ops:
+                continue
+            # TODO Consider memory format
+            if node.target in ops.aten.clone and "memory_format" in node.kwargs:
+                if node.kwargs["memory_format"] not in [
+                    torch.preserve_format,
+                    # Converting non-contiguous layout to contiguous only updates
+                    # strides of tensor. This is not visible on circle, so we can
+                    # safely ignore this operation.
+                    torch.contiguous_format,
+                ]:
+                    continue
+            assert len(node.args) == 1
+            src = node.args[0]
+            assert isinstance(src, torch.fx.Node)
+            with graph.inserting_after(node):
+                node.replace_all_uses_with(src, propagate_meta=False)
+            modified = True
+            logger.debug(f"{node.name} is replaced with {src}")
+        graph.eliminate_dead_code()
+        graph.lint()
+        graph_module.recompile()
+        return PassResult(modified)

tico/passes/remove_redundant_assert_nodes.py ADDED Viewed

@@ -0,0 +1,50 @@
+# Copyright (c) 2025 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import torch
+from torch.export import ExportedProgram
+from tico.utils.passes import PassBase, PassResult
+from tico.utils.trace_decorators import trace_graph_diff_on_pass
+assert_node_targets = [
+    torch.ops.aten._assert_tensor_metadata.default,
+]
+@trace_graph_diff_on_pass
+class RemoveRedundantAssertionNodes(PassBase):
+    """
+    This removes redundant assertion nodes.
+    - `aten.assert_tensor_meta.default`
+    """
+    def __init__(self):
+        super().__init__()
+    def call(self, exported_program: ExportedProgram) -> PassResult:
+        graph_module = exported_program.graph_module
+        graph = graph_module.graph
+        modified = False
+        for node in graph.nodes:
+            if node.op == "call_function" and node.target in assert_node_targets:
+                graph.erase_node(node)
+                modified = True
+        graph.eliminate_dead_code()
+        graph.lint()
+        graph_module.recompile()
+        return PassResult(modified)