PyPI - tico - Versions diffs - 0.1.0__py3-none-any.whl - Mend

tico 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (206) hide show

tico/__init__.py +42 -0
tico/config/__init__.py +4 -0
tico/config/base.py +37 -0
tico/config/factory.py +41 -0
tico/config/v1.py +35 -0
tico/experimental/__init__.py +1 -0
tico/experimental/quantization/__init__.py +1 -0
tico/experimental/quantization/algorithm/__init__.py +1 -0
tico/experimental/quantization/algorithm/gptq/__init__.py +1 -0
tico/experimental/quantization/algorithm/gptq/gptq.py +172 -0
tico/experimental/quantization/algorithm/gptq/quant.py +153 -0
tico/experimental/quantization/algorithm/gptq/quantizer.py +225 -0
tico/experimental/quantization/algorithm/gptq/utils.py +65 -0
tico/experimental/quantization/algorithm/pt2e/__init__.py +1 -0
tico/experimental/quantization/algorithm/pt2e/annotation/__init__.py +1 -0
tico/experimental/quantization/algorithm/pt2e/annotation/annotator.py +215 -0
tico/experimental/quantization/algorithm/pt2e/annotation/config.py +26 -0
tico/experimental/quantization/algorithm/pt2e/annotation/op/__init__.py +21 -0
tico/experimental/quantization/algorithm/pt2e/annotation/op/adaptive_avg_pool2d.py +65 -0
tico/experimental/quantization/algorithm/pt2e/annotation/op/add.py +57 -0
tico/experimental/quantization/algorithm/pt2e/annotation/op/conv2d.py +92 -0
tico/experimental/quantization/algorithm/pt2e/annotation/op/div.py +57 -0
tico/experimental/quantization/algorithm/pt2e/annotation/op/linear.py +94 -0
tico/experimental/quantization/algorithm/pt2e/annotation/op/mean.py +53 -0
tico/experimental/quantization/algorithm/pt2e/annotation/op/mul.py +57 -0
tico/experimental/quantization/algorithm/pt2e/annotation/op/relu6.py +53 -0
tico/experimental/quantization/algorithm/pt2e/annotation/op/rsqrt.py +53 -0
tico/experimental/quantization/algorithm/pt2e/annotation/op/sub.py +57 -0
tico/experimental/quantization/algorithm/pt2e/annotation/spec.py +47 -0
tico/experimental/quantization/algorithm/pt2e/annotation/utils.py +88 -0
tico/experimental/quantization/algorithm/pt2e/quantizer.py +78 -0
tico/experimental/quantization/algorithm/pt2e/transformation/__init__.py +1 -0
tico/experimental/quantization/algorithm/pt2e/transformation/convert_scalars_to_attrs.py +58 -0
tico/experimental/quantization/algorithm/pt2e/utils.py +138 -0
tico/experimental/quantization/algorithm/smoothquant/__init__.py +1 -0
tico/experimental/quantization/algorithm/smoothquant/observer.py +78 -0
tico/experimental/quantization/algorithm/smoothquant/quantizer.py +81 -0
tico/experimental/quantization/algorithm/smoothquant/smooth_quant.py +164 -0
tico/experimental/quantization/config.py +68 -0
tico/experimental/quantization/evaluation/__init__.py +1 -0
tico/experimental/quantization/evaluation/backend.py +20 -0
tico/experimental/quantization/evaluation/evaluate.py +223 -0
tico/experimental/quantization/evaluation/executor/__init__.py +1 -0
tico/experimental/quantization/evaluation/executor/backend_executor.py +54 -0
tico/experimental/quantization/evaluation/executor/circle_executor.py +75 -0
tico/experimental/quantization/evaluation/executor/triv24_executor.py +128 -0
tico/experimental/quantization/evaluation/metric.py +109 -0
tico/experimental/quantization/evaluation/utils.py +185 -0
tico/experimental/quantization/passes/__init__.py +1 -0
tico/experimental/quantization/passes/fold_quant_ops.py +154 -0
tico/experimental/quantization/passes/insert_quantize_on_dtype_mismatch.py +345 -0
tico/experimental/quantization/passes/propagate_qparam_backward.py +91 -0
tico/experimental/quantization/passes/propagate_qparam_forward.py +141 -0
tico/experimental/quantization/passes/quantize_bias.py +123 -0
tico/experimental/quantization/passes/remove_weight_dequant_op.py +177 -0
tico/experimental/quantization/public_interface.py +108 -0
tico/experimental/quantization/quantizer.py +71 -0
tico/interpreter/__init__.py +1 -0
tico/interpreter/infer.py +116 -0
tico/interpreter/interpreter.py +93 -0
tico/passes/__init__.py +1 -0
tico/passes/cast_aten_where_arg_type.py +191 -0
tico/passes/cast_mixed_type_args.py +187 -0
tico/passes/const_prop_pass.py +307 -0
tico/passes/convert_conv1d_to_conv2d.py +160 -0
tico/passes/convert_layout_op_to_reshape.py +85 -0
tico/passes/convert_repeat_to_expand_copy.py +89 -0
tico/passes/convert_to_relu6.py +181 -0
tico/passes/decompose_addmm.py +124 -0
tico/passes/decompose_batch_norm.py +192 -0
tico/passes/decompose_fake_quantize.py +134 -0
tico/passes/decompose_fake_quantize_tensor_qparams.py +294 -0
tico/passes/decompose_group_norm.py +275 -0
tico/passes/decompose_grouped_conv2d.py +209 -0
tico/passes/decompose_slice_scatter.py +169 -0
tico/passes/extract_dtype_kwargs.py +122 -0
tico/passes/fill_meta_val.py +57 -0
tico/passes/fuse_leading_unsqueeze_reshape.py +112 -0
tico/passes/fuse_redundant_reshape_to_mean.py +102 -0
tico/passes/legalize_causal_mask_value.py +108 -0
tico/passes/legalize_predefined_layout_operators.py +386 -0
tico/passes/lower_pow2_to_mul.py +75 -0
tico/passes/lower_to_resize_nearest_neighbor.py +235 -0
tico/passes/lower_to_slice.py +230 -0
tico/passes/merge_consecutive_cat.py +80 -0
tico/passes/ops.py +78 -0
tico/passes/remove_nop.py +84 -0
tico/passes/remove_redundant_assert_nodes.py +51 -0
tico/passes/remove_redundant_expand.py +66 -0
tico/passes/remove_redundant_permute.py +122 -0
tico/passes/remove_redundant_reshape.py +436 -0
tico/passes/remove_redundant_slice.py +62 -0
tico/passes/remove_redundant_to_copy.py +86 -0
tico/passes/restore_linear.py +115 -0
tico/passes/segment_index_select.py +145 -0
tico/pt2_to_circle.py +105 -0
tico/serialize/__init__.py +1 -0
tico/serialize/circle_graph.py +319 -0
tico/serialize/circle_mapping.py +177 -0
tico/serialize/circle_serializer.py +240 -0
tico/serialize/operators/__init__.py +28 -0
tico/serialize/operators/hashable_opcode.py +43 -0
tico/serialize/operators/node_visitor.py +80 -0
tico/serialize/operators/op_abs.py +53 -0
tico/serialize/operators/op_add.py +69 -0
tico/serialize/operators/op_alias_copy.py +64 -0
tico/serialize/operators/op_any.py +150 -0
tico/serialize/operators/op_arange_start_step.py +61 -0
tico/serialize/operators/op_argmax.py +62 -0
tico/serialize/operators/op_avg_pool2d.py +192 -0
tico/serialize/operators/op_bmm.py +62 -0
tico/serialize/operators/op_cat.py +66 -0
tico/serialize/operators/op_clamp.py +126 -0
tico/serialize/operators/op_clone.py +71 -0
tico/serialize/operators/op_constant_pad_nd.py +72 -0
tico/serialize/operators/op_conv2d.py +186 -0
tico/serialize/operators/op_copy.py +164 -0
tico/serialize/operators/op_cos.py +59 -0
tico/serialize/operators/op_cumsum.py +95 -0
tico/serialize/operators/op_depthwise_conv2d.py +199 -0
tico/serialize/operators/op_dequantize_per_channel.py +82 -0
tico/serialize/operators/op_dequantize_per_tensor.py +64 -0
tico/serialize/operators/op_div.py +62 -0
tico/serialize/operators/op_embedding.py +60 -0
tico/serialize/operators/op_eq.py +64 -0
tico/serialize/operators/op_exp.py +60 -0
tico/serialize/operators/op_expand.py +91 -0
tico/serialize/operators/op_full.py +48 -0
tico/serialize/operators/op_full_like.py +55 -0
tico/serialize/operators/op_ge.py +54 -0
tico/serialize/operators/op_gelu.py +59 -0
tico/serialize/operators/op_gt.py +54 -0
tico/serialize/operators/op_index.py +82 -0
tico/serialize/operators/op_index_select.py +64 -0
tico/serialize/operators/op_instance_norm.py +91 -0
tico/serialize/operators/op_leaky_relu.py +60 -0
tico/serialize/operators/op_linear.py +70 -0
tico/serialize/operators/op_log.py +53 -0
tico/serialize/operators/op_log1p.py +86 -0
tico/serialize/operators/op_logical_and.py +63 -0
tico/serialize/operators/op_logical_not.py +62 -0
tico/serialize/operators/op_lt.py +61 -0
tico/serialize/operators/op_max_dim.py +70 -0
tico/serialize/operators/op_max_pool2d_with_indices.py +155 -0
tico/serialize/operators/op_maximum.py +53 -0
tico/serialize/operators/op_mean.py +66 -0
tico/serialize/operators/op_minimum.py +53 -0
tico/serialize/operators/op_mm.py +177 -0
tico/serialize/operators/op_mul.py +99 -0
tico/serialize/operators/op_ne.py +54 -0
tico/serialize/operators/op_neg.py +59 -0
tico/serialize/operators/op_permute.py +65 -0
tico/serialize/operators/op_pow.py +141 -0
tico/serialize/operators/op_prelu.py +54 -0
tico/serialize/operators/op_quantize_per_tensor.py +79 -0
tico/serialize/operators/op_reciprocal.py +64 -0
tico/serialize/operators/op_relu.py +53 -0
tico/serialize/operators/op_relu6.py +52 -0
tico/serialize/operators/op_repeat.py +100 -0
tico/serialize/operators/op_reshape.py +73 -0
tico/serialize/operators/op_resize_nearest_neighbor.py +70 -0
tico/serialize/operators/op_rsqrt.py +53 -0
tico/serialize/operators/op_scalar_tensor.py +51 -0
tico/serialize/operators/op_select_copy.py +65 -0
tico/serialize/operators/op_sigmoid.py +56 -0
tico/serialize/operators/op_sin.py +53 -0
tico/serialize/operators/op_slice.py +155 -0
tico/serialize/operators/op_softmax.py +100 -0
tico/serialize/operators/op_split_with_sizes.py +99 -0
tico/serialize/operators/op_sqrt.py +55 -0
tico/serialize/operators/op_squeeze.py +73 -0
tico/serialize/operators/op_sub.py +71 -0
tico/serialize/operators/op_sum.py +63 -0
tico/serialize/operators/op_tanh.py +54 -0
tico/serialize/operators/op_to_copy.py +105 -0
tico/serialize/operators/op_unsqueeze.py +66 -0
tico/serialize/operators/op_view.py +74 -0
tico/serialize/operators/op_where.py +82 -0
tico/serialize/operators/utils.py +94 -0
tico/serialize/pack.py +35 -0
tico/serialize/quant_param.py +42 -0
tico/utils/__init__.py +1 -0
tico/utils/convert.py +296 -0
tico/utils/define.py +35 -0
tico/utils/diff_graph.py +181 -0
tico/utils/errors.py +35 -0
tico/utils/graph.py +282 -0
tico/utils/logging.py +45 -0
tico/utils/model.py +37 -0
tico/utils/mx/__init__.py +1 -0
tico/utils/mx/elemwise_ops.py +267 -0
tico/utils/mx/formats.py +125 -0
tico/utils/mx/mx_ops.py +270 -0
tico/utils/padding.py +47 -0
tico/utils/passes.py +76 -0
tico/utils/register_custom_op.py +609 -0
tico/utils/serialize.py +42 -0
tico/utils/trace_decorators.py +101 -0
tico/utils/utils.py +406 -0
tico/utils/validate_args_kwargs.py +1149 -0
tico-0.1.0.dist-info/LICENSE +241 -0
tico-0.1.0.dist-info/METADATA +354 -0
tico-0.1.0.dist-info/RECORD +206 -0
tico-0.1.0.dist-info/WHEEL +5 -0
tico-0.1.0.dist-info/entry_points.txt +3 -0
tico-0.1.0.dist-info/top_level.txt +1 -0

tico/experimental/quantization/quantizer.py ADDED Viewed

@@ -0,0 +1,71 @@
+# Copyright (c) 2025 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from abc import ABC, abstractmethod
+from typing import Any, Dict, Optional
+import torch
+from tico.experimental.quantization.config import BaseConfig
+class BaseQuantizer(ABC):
+    """
+    Abstract base class for quantizers that apply a quantization algorithm to a target model.
+    """
+    def __init__(self, config: BaseConfig):
+        """
+        Initialize the quantizer with the given configuration.
+        Parameters:
+            config (BaseConfig): Quantization configuration parameters.
+        """
+        self.config = config
+    @abstractmethod
+    def prepare(
+        self,
+        model: torch.nn.Module,
+        args: Optional[Any] = None,
+        kwargs: Optional[Dict[str, Any]] = None,
+    ):
+        """
+        Prepare the given model for quantization based on the provided algorithm-specific
+         configuration. This involves setting up necessary observers or hooks, and may
+        optionally use example inputs—particularly useful for activation quantization.
+        Parameters:
+            model: The target PyTorch model.
+            args (Any, optional): Positional example inputs required for activation quantization.
+            kwargs (Dict[str, Any], optional): Keyword example inputs required for activation quantization.
+        Returns:
+            The prepared model.
+        """
+        pass
+    @abstractmethod
+    def convert(self, model):
+        """
+        Convert the prepared (or calibrated) model into its quantized form. This function leverages
+         the statistics collected during calibration to perform the quantization transformation.
+        Parameters:
+            model: The prepared PyTorch model.
+        Returns:
+            The quantized model.
+        """
+        pass

tico/interpreter/__init__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ # DO NOT REMOVE THIS FILE

tico/interpreter/infer.py ADDED Viewed

@@ -0,0 +1,116 @@
+# Copyright (c) 2025 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from typing import Any
+import numpy as np
+import torch
+from circle_schema import circle
+from tico.interpreter.interpreter import Interpreter
+from tico.serialize.circle_mapping import np_dtype_from_circle_dtype, to_circle_dtype
+def preprocess_inputs(inputs: Any):
+    """
+    Preprocess user inputs for circle inference.
+    1. None inputs are ignored.
+    2. A list/tuple input is flatten when a torch module is exported.
+      e.g. inputs = (torch.Tensor, [2,3,4]) -> inputs = (torch.Tensor, 2, 3, 4)
+    """
+    l = []
+    for value in inputs:
+        if value == None:
+            continue
+        if isinstance(value, (tuple, list)):
+            for val in value:
+                l.append(val)
+        else:
+            l.append(value)
+    # Check if it is a list of a list.
+    if any(isinstance(item, (tuple, list)) for item in l):
+        l = preprocess_inputs(l)
+    return tuple(l)
+def infer(circle_binary: bytes, *args: Any, **kwargs: Any) -> Any:
+    # When converting a model, it is assumed that the order of keyword arguments is maintained.
+    user_inputs = args + tuple(kwargs.values())
+    user_inputs = preprocess_inputs(user_inputs)
+    # Cast them to torch.Tensor to make it simple.
+    user_inputs = tuple(
+        torch.tensor(user_input) if type(user_input) != torch.Tensor else user_input
+        for user_input in user_inputs
+    )
+    # Get input spec from circle binary.
+    model = circle.Model.Model.GetRootAsModel(circle_binary, 0)
+    assert model.SubgraphsLength() == 1
+    graph = model.Subgraphs(0)
+    model_input_tensors = [
+        graph.Tensors(graph.Inputs(o)) for o in range(graph.InputsLength())
+    ]
+    model_input_shapes_np = [t.ShapeAsNumpy() for t in model_input_tensors]
+    model_input_types_cm = [t.Type() for t in model_input_tensors]
+    # Check if given inputs' dtype and shape from users match the inputs' from model binary.
+    if len(model_input_shapes_np) != len(user_inputs):
+        raise RuntimeError(
+            f"Mismatch input length: input({len(user_inputs)}) != circle model({len(model_input_shapes_np)})"
+        )
+    for input_idx, user_input in enumerate(user_inputs):
+        # Shape
+        if list(user_input.shape) != list(model_input_shapes_np[input_idx]):
+            raise RuntimeError(
+                f"Mismatch input {input_idx} shape : input({user_input.shape}) != circle model({model_input_shapes_np[input_idx]})"
+            )
+        # Data type
+        user_input_type_cm = to_circle_dtype(user_input.dtype)
+        if user_input_type_cm != model_input_types_cm[input_idx]:
+            raise RuntimeError(
+                f"Mismatch input {input_idx} data type : input({user_input_type_cm}) != circle model({model_input_types_cm[input_idx]})"
+            )
+    # Initialize interpreter
+    intp = Interpreter(circle_binary)
+    # Set input
+    for input_idx, user_input in enumerate(user_inputs):
+        intp.writeInputTensor(input_idx, user_input)
+    # Interpret
+    intp.interpret()
+    # Retrieve outputs' dtype and shape from circle model
+    model_output_tensors = [
+        graph.Tensors(graph.Outputs(o)) for o in range(graph.OutputsLength())
+    ]
+    model_output_shapes_np = [t.ShapeAsNumpy() for t in model_output_tensors]
+    model_output_types_cm = [t.Type() for t in model_output_tensors]
+    output = []
+    # Get output
+    for output_idx in range(len(model_output_tensors)):
+        result: np.ndarray = np.empty(
+            model_output_shapes_np[output_idx],
+            dtype=np_dtype_from_circle_dtype(model_output_types_cm[output_idx]),
+        )
+        intp.readOutputTensor(output_idx, result)
+        output.append(result)
+    if len(output) == 1:
+        return output[0]
+    else:
+        return output

tico/interpreter/interpreter.py ADDED Viewed

@@ -0,0 +1,93 @@
+# Copyright (c) 2025 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from pathlib import Path
+import numpy as np
+import torch
+from cffi import FFI
+class Interpreter:
+    """
+    Python wrapper for C++ luci-interperter class in ONE using CFFI.
+    This class provides a Python interface to the underlying C++ luci-interpreter class in ONE,
+     preserving the original C++ API. Each method corresponds to a method in the C++ class,
+    with additional error handling implemented to ensure that C++ exceptions are captured and
+    translated into Python errors.
+    Note that each method includes `check_for_errors` at the end of the body to catch any C++
+     exceptions and translate them into Python exceptions. This ensures that errors in the C++
+    library do not cause undefined behavior in Python.
+    """
+    def __init__(self, circle_binary: bytes):
+        self.ffi = FFI()
+        self.ffi.cdef(
+            """
+          typedef struct InterpreterWrapper InterpreterWrapper;
+          const char *get_last_error(void);
+          void clear_last_error(void);
+          InterpreterWrapper *Interpreter_new(const uint8_t *data, const size_t data_size);
+          void Interpreter_delete(InterpreterWrapper *intp);
+          void Interpreter_interpret(InterpreterWrapper *intp);
+          void Interpreter_writeInputTensor(InterpreterWrapper *intp, const int input_idx, const void *data, size_t input_size);
+          void Interpreter_readOutputTensor(InterpreterWrapper *intp, const int output_idx, void *output, size_t output_size);
+        """
+        )
+        # TODO Check if one-compiler version is compatible. Whether it has .so file or not for CFFI.
+        intp_lib_path = Path("/usr/share/one/lib/libcircle_interpreter_cffi.so")
+        if not intp_lib_path.is_file():
+            raise RuntimeError("Please install one-compiler for circle inference.")
+        self.C = self.ffi.dlopen(str(intp_lib_path))
+        # Initialize interpreter
+        self.intp = self.C.Interpreter_new(circle_binary, len(circle_binary))
+        self.check_for_errors()
+    def delete(self):
+        self.C.Interpreter_delete(self.intp)
+        self.check_for_errors()
+    def interpret(self):
+        self.C.Interpreter_interpret(self.intp)
+        self.check_for_errors()
+    def writeInputTensor(self, input_idx: int, input_data: torch.Tensor):
+        input_as_numpy = input_data.numpy()
+        # cffi.from_buffer() only accepts C-contiguous array.
+        input_as_numpy = np.ascontiguousarray(input_as_numpy)
+        c_input = self.ffi.from_buffer(input_as_numpy)
+        self.C.Interpreter_writeInputTensor(
+            self.intp, input_idx, c_input, input_data.nbytes
+        )
+        self.check_for_errors()
+    def readOutputTensor(self, output_idx: int, output: np.ndarray):
+        c_output = self.ffi.from_buffer(output)
+        self.C.Interpreter_readOutputTensor(
+            self.intp, output_idx, c_output, output.nbytes
+        )
+        self.check_for_errors()
+    def check_for_errors(self):
+        error_message = self.ffi.string(self.C.get_last_error()).decode("utf-8")
+        if error_message:
+            self.C.clear_last_error()
+            raise RuntimeError(f"C++ Exception: {error_message}")
+    def __del__(self):
+        self.delete()

tico/passes/__init__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ # DO NOT REMOVE THIS FILE

tico/passes/cast_aten_where_arg_type.py ADDED Viewed

@@ -0,0 +1,191 @@
+# Copyright (c) 2025 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from typing import Tuple, TYPE_CHECKING
+if TYPE_CHECKING:
+    import torch.fx
+import torch
+from torch.export import ExportedProgram
+from tico.serialize.circle_mapping import extract_torch_dtype
+from tico.utils import logging
+from tico.utils.graph import create_node
+from tico.utils.passes import PassBase, PassResult
+from tico.utils.trace_decorators import (
+    trace_const_diff_on_pass,
+    trace_graph_diff_on_pass,
+)
+from tico.utils.utils import is_target_node, set_new_meta_val
+from tico.utils.validate_args_kwargs import WhereSelfArgs
+dtype_ranking = {
+    torch.int32: 0,
+    torch.int64: 1,
+    torch.float32: 2,
+}
+def sort_by_dtype(
+    result_true: torch.fx.Node, result_false: torch.fx.Node
+) -> Tuple[torch.fx.Node, torch.fx.Node]:
+    true_dtype = extract_torch_dtype(result_true)
+    false_dtype = extract_torch_dtype(result_false)
+    if dtype_ranking[true_dtype] > dtype_ranking[false_dtype]:
+        return result_true, result_false
+    if dtype_ranking[true_dtype] < dtype_ranking[false_dtype]:
+        return result_false, result_true
+    assert False, "There is no case that the dtype_ranking of the nodes are the same"
+def check_if_covered_by_float(tensor: torch.Tensor) -> bool:
+    # About the min/max range, please refer to https://en.wikipedia.org/wiki/Single-precision_floating-point_format#Precision_limitations_on_integer_values
+    if tensor.min() < -(2**24) or tensor.max() > 2**24:
+        return False
+    return True
+@trace_graph_diff_on_pass
+@trace_const_diff_on_pass
+class CastATenWhereArgType(PassBase):
+    """
+    This pass casts the data type of `aten.where.self` operation's argument.
+    This pass is applied when the data type of `aten.where.self` operation's argument is different.
+    If the data type of arguments, which are denoted `result_true` and `result_false` in below graph are identical, this pass is not applied.
+    In addition, this pass casts the data type as the direction that avoids data loss.
+    For example, if the data type of `result_true` is `float32` and the data type of `result_false` is `int32`,
+    then the data type of `result_false` will be casted to `float32`.
+    Moreover, in this case, it should be checked whether the contents of `result_false` are within the range of `float32`.
+    If so, the data type of `result_true` will be casted to `float32`.
+    If not, RuntimeError will be raised.
+    After this pass, the arguments of `aten.where.self` should have same data type.
+    The graph before this pass and the graph after this pass are shown below.
+    NOTE Below example denotes the case when the `result_false` was casted.
+    (before)
+    [condition]   [result_true]   [result_false]
+        |               |                |
+        |               |                |
+        +---------------+----------------+
+                        |
+                        |
+                     [where]
+                        |
+                        |
+                    [output]
+    (after)
+                                  [result_false]
+    [condition]   [result_true]          |
+        |               |             [cast]
+        |               |                |
+        +---------------+----------------+
+                        |
+                        |
+                     [where]
+                        |
+                        |
+                    [output]
+    """
+    def __init__(self):
+        super().__init__()
+    def call(self, exported_program: ExportedProgram) -> PassResult:
+        logger = logging.getLogger(__name__)
+        graph_module = exported_program.graph_module
+        graph = graph_module.graph
+        modified = False
+        for node in graph.nodes:
+            if not is_target_node(node, torch.ops.aten.where.self):
+                continue
+            where_args = WhereSelfArgs(*node.args, **node.kwargs)  # type: ignore[arg-type]
+            result_true, result_false = where_args.input, where_args.other
+            if not isinstance(result_true, torch.fx.Node) or not isinstance(
+                result_false, torch.fx.Node
+            ):
+                continue
+            ep = exported_program
+            assert isinstance(result_true, torch.fx.Node)
+            assert isinstance(result_false, torch.fx.Node)
+            if not (
+                result_true.name in ep.graph_signature.inputs_to_buffers
+                and result_false.name in ep.graph_signature.inputs_to_buffers
+            ):
+                continue
+            # Check if they have different data types
+            true_dtype = extract_torch_dtype(result_true)
+            false_dtype = extract_torch_dtype(result_false)
+            if true_dtype == false_dtype:
+                continue
+            node_to_dtype = {result_true: true_dtype, result_false: false_dtype}
+            not_to_cast, to_cast = sort_by_dtype(result_true, result_false)
+            buf_name_to_data = {name: buf for name, buf in ep.named_buffers()}
+            buf_name = ep.graph_signature.inputs_to_buffers[to_cast.name]
+            buf_data = buf_name_to_data[buf_name]
+            assert isinstance(buf_data, torch.Tensor)
+            dtype_to_cast = node_to_dtype[not_to_cast]
+            if dtype_to_cast == torch.float32:
+                if not check_if_covered_by_float(buf_data):
+                    raise RuntimeError(
+                        f"{to_cast.name}({buf_data.dtype}) data range is out of {dtype_to_cast} range"
+                    )
+            with graph_module.graph.inserting_after(to_cast):
+                cast = create_node(
+                    graph,
+                    torch.ops.aten._to_copy.default,
+                    args=(to_cast,),
+                    kwargs={"dtype": dtype_to_cast},
+                    origin=to_cast,
+                )
+            # set new meta["val"] in advance because we will use it below for checking if type promotion is valid.
+            set_new_meta_val(cast)
+            node.update_arg(node.args.index(to_cast), cast)
+            # check if type promotion is valid.
+            node_dtype_ori = extract_torch_dtype(node)
+            set_new_meta_val(node)
+            node_dtype = extract_torch_dtype(node)
+            assert (
+                node_dtype == node_dtype_ori
+            ), f"Type casting doesn't change node's dtype."
+            logger.debug(
+                f"{to_cast.name}'s dtype was casted from {buf_data.dtype} to {dtype_to_cast}"
+            )
+            modified = True
+        graph.eliminate_dead_code()
+        graph.lint()
+        graph_module.recompile()
+        return PassResult(modified)

tico/passes/cast_mixed_type_args.py ADDED Viewed

@@ -0,0 +1,187 @@
+# Portions of this file are adapted from code originally authored by
+# Meta Platforms, Inc. and affiliates, licensed under the BSD-style
+# license found in the LICENSE file in the root directory of their source tree.
+# Copyright (c) 2025 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from typing import TYPE_CHECKING
+if TYPE_CHECKING:
+    import torch.fx
+import torch
+from torch._prims_common import elementwise_dtypes, ELEMENTWISE_TYPE_PROMOTION_KIND
+from torch.export import ExportedProgram
+from tico.serialize.circle_mapping import extract_torch_dtype
+from tico.utils import logging
+from tico.utils.graph import create_node
+from tico.utils.passes import PassBase, PassResult
+from tico.utils.trace_decorators import trace_graph_diff_on_pass
+from tico.utils.utils import is_target_node, set_new_meta_val
+ops_to_promote = {
+    torch.ops.aten.add.Tensor: ELEMENTWISE_TYPE_PROMOTION_KIND.DEFAULT,
+    torch.ops.aten.div.Tensor: ELEMENTWISE_TYPE_PROMOTION_KIND.DEFAULT,
+    torch.ops.aten.eq.Scalar: ELEMENTWISE_TYPE_PROMOTION_KIND.DEFAULT,
+    torch.ops.aten.eq.Tensor: ELEMENTWISE_TYPE_PROMOTION_KIND.DEFAULT,
+    torch.ops.aten.ge.Scalar: ELEMENTWISE_TYPE_PROMOTION_KIND.DEFAULT,
+    torch.ops.aten.ge.Tensor: ELEMENTWISE_TYPE_PROMOTION_KIND.DEFAULT,
+    torch.ops.aten.gt.Scalar: ELEMENTWISE_TYPE_PROMOTION_KIND.DEFAULT,
+    torch.ops.aten.gt.Tensor: ELEMENTWISE_TYPE_PROMOTION_KIND.DEFAULT,
+    torch.ops.aten.mul.Tensor: ELEMENTWISE_TYPE_PROMOTION_KIND.DEFAULT,
+    torch.ops.aten.minimum.default: ELEMENTWISE_TYPE_PROMOTION_KIND.DEFAULT,
+    torch.ops.aten.ne.Scalar: ELEMENTWISE_TYPE_PROMOTION_KIND.DEFAULT,
+    torch.ops.aten.ne.Tensor: ELEMENTWISE_TYPE_PROMOTION_KIND.DEFAULT,
+    torch.ops.aten.pow.Tensor_Scalar: ELEMENTWISE_TYPE_PROMOTION_KIND.DEFAULT,
+    torch.ops.aten.sub.Tensor: ELEMENTWISE_TYPE_PROMOTION_KIND.DEFAULT,
+}
+def has_same_dtype(lhs, rhs):
+    if isinstance(lhs, torch.fx.Node):
+        lhs_dtype = lhs.meta["val"].dtype
+    elif isinstance(lhs, torch.Tensor):
+        lhs_dtype = lhs.dtype
+    else:
+        lhs_dtype = torch.tensor(lhs).dtype
+    if isinstance(rhs, torch.fx.Node):
+        rhs_dtype = rhs.meta["val"].dtype
+    elif isinstance(rhs, torch.Tensor):
+        rhs_dtype = rhs.dtype
+    else:
+        rhs_dtype = torch.tensor(rhs).dtype
+    if lhs_dtype == rhs_dtype:
+        return True
+    return False
+def to_numeric_type(torch_dtype: torch.dtype):
+    dmap = {
+        torch.float32: float,
+        torch.float: float,
+        torch.int64: int,
+        torch.bool: bool,
+    }
+    if torch_dtype not in dmap:
+        return None
+    return dmap[torch_dtype]
+@trace_graph_diff_on_pass
+class CastMixedTypeArgs(PassBase):
+    def __init__(self, preserve_ep_invariant=True):
+        super().__init__()
+        self.preserve_ep_invariant = preserve_ep_invariant
+    # TODO Folding float and int values before this pass
+    def call(self, exported_program: ExportedProgram) -> PassResult:
+        logger = logging.getLogger(__name__)
+        graph_module = exported_program.graph_module
+        graph = graph_module.graph
+        modified = False
+        for node in graph.nodes:
+            if not is_target_node(node, list(ops_to_promote.keys())):
+                continue
+            assert len(node.args) == 2
+            lhs, rhs = node.args
+            assert isinstance(lhs, (torch.fx.Node, torch.Tensor, float, int)), type(lhs)
+            assert isinstance(rhs, (torch.fx.Node, torch.Tensor, float, int)), type(rhs)
+            if has_same_dtype(lhs, rhs):
+                continue
+            lhs_val = (
+                lhs.meta["val"] if isinstance(lhs, torch.fx.Node) else torch.tensor(lhs)
+            )
+            rhs_val = (
+                rhs.meta["val"] if isinstance(rhs, torch.fx.Node) else torch.tensor(rhs)
+            )
+            type_to_promote: torch.dtype = elementwise_dtypes(
+                lhs_val, rhs_val, type_promotion_kind=ops_to_promote[node.target]
+            )[1]
+            arg_to_promote = None
+            ori_type = None
+            if lhs_val.dtype == type_to_promote:
+                ori_type = rhs_val.dtype
+                arg_to_promote = rhs
+            if rhs_val.dtype == type_to_promote:
+                ori_type = lhs_val.dtype
+                arg_to_promote = lhs
+            assert arg_to_promote != None
+            if isinstance(arg_to_promote, torch.fx.Node):
+                with graph.inserting_after(arg_to_promote):
+                    to_copy = create_node(
+                        graph,
+                        torch.ops.aten._to_copy.default,
+                        (arg_to_promote,),
+                        {"dtype": type_to_promote},
+                        origin=arg_to_promote,
+                    )
+                    # set new meta["val"] in advance because we will use it below for checking if type promotion is valid.
+                    set_new_meta_val(to_copy)
+                    node.update_arg(node.args.index(arg_to_promote), to_copy)
+                modified = True
+                logger.debug(
+                    f"{arg_to_promote.name}'s dtype was casted from {ori_type} to {type_to_promote}"
+                )
+            else:
+                index_to_promote = node.args.index(arg_to_promote)
+                if isinstance(arg_to_promote, torch.Tensor):
+                    arg_to_promote = arg_to_promote.to(type_to_promote)
+                else:
+                    # numerical types
+                    numeric_type = to_numeric_type(type_to_promote)
+                    if numeric_type is not None:
+                        arg_to_promote = numeric_type(arg_to_promote)
+                    else:
+                        if self.preserve_ep_invariant:
+                            # ExportedProgram (EP) requires to add a placeholder when
+                            # a tensor is created, which complicates EP structure but
+                            # not necessary for circle serialization. We skip this case if
+                            # preserve_ep_invariant = True.
+                            continue
+                        else:
+                            # Create tensor without placeholder
+                            # NOTE This breaks EP invariant
+                            arg_to_promote = torch.tensor(arg_to_promote).to(
+                                type_to_promote
+                            )
+                node.update_arg(index_to_promote, arg_to_promote)
+                modified = True
+                logger.debug(
+                    f"{arg_to_promote}'s dtype was casted from {ori_type} to {type_to_promote}"
+                )
+            # check if type promotion is valid.
+            node_dtype_ori = extract_torch_dtype(node)
+            set_new_meta_val(node)
+            node_dtype = extract_torch_dtype(node)
+            assert (
+                node_dtype == node_dtype_ori
+            ), f"Type casting doesn't change node's dtype."
+        graph.eliminate_dead_code()
+        graph.lint()
+        graph_module.recompile()
+        return PassResult(modified)