PyPI - tico - Versions diffs - 0.1.0.dev250411__py3-none-any.whl - Mend

tico 0.1.0.dev250411__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (196) hide show

tico/__init__.py +31 -0
tico/config/__init__.py +4 -0
tico/config/base.py +37 -0
tico/config/factory.py +41 -0
tico/config/v1.py +35 -0
tico/experimental/__init__.py +1 -0
tico/experimental/quantization/__init__.py +1 -0
tico/experimental/quantization/algorithm/__init__.py +1 -0
tico/experimental/quantization/algorithm/gptq/__init__.py +1 -0
tico/experimental/quantization/algorithm/gptq/gptq.py +172 -0
tico/experimental/quantization/algorithm/gptq/quant.py +153 -0
tico/experimental/quantization/algorithm/gptq/quantizer.py +225 -0
tico/experimental/quantization/algorithm/gptq/utils.py +65 -0
tico/experimental/quantization/algorithm/pt2e/__init__.py +1 -0
tico/experimental/quantization/algorithm/pt2e/annotation/__init__.py +1 -0
tico/experimental/quantization/algorithm/pt2e/annotation/annotator.py +215 -0
tico/experimental/quantization/algorithm/pt2e/annotation/config.py +26 -0
tico/experimental/quantization/algorithm/pt2e/annotation/op/__init__.py +21 -0
tico/experimental/quantization/algorithm/pt2e/annotation/op/adaptive_avg_pool2d.py +65 -0
tico/experimental/quantization/algorithm/pt2e/annotation/op/add.py +57 -0
tico/experimental/quantization/algorithm/pt2e/annotation/op/conv2d.py +92 -0
tico/experimental/quantization/algorithm/pt2e/annotation/op/div.py +57 -0
tico/experimental/quantization/algorithm/pt2e/annotation/op/linear.py +94 -0
tico/experimental/quantization/algorithm/pt2e/annotation/op/mean.py +53 -0
tico/experimental/quantization/algorithm/pt2e/annotation/op/mul.py +57 -0
tico/experimental/quantization/algorithm/pt2e/annotation/op/relu6.py +53 -0
tico/experimental/quantization/algorithm/pt2e/annotation/op/rsqrt.py +53 -0
tico/experimental/quantization/algorithm/pt2e/annotation/op/sub.py +57 -0
tico/experimental/quantization/algorithm/pt2e/annotation/spec.py +47 -0
tico/experimental/quantization/algorithm/pt2e/annotation/utils.py +88 -0
tico/experimental/quantization/algorithm/pt2e/quantizer.py +78 -0
tico/experimental/quantization/algorithm/pt2e/transformation/__init__.py +1 -0
tico/experimental/quantization/algorithm/pt2e/transformation/convert_scalars_to_attrs.py +58 -0
tico/experimental/quantization/algorithm/pt2e/utils.py +138 -0
tico/experimental/quantization/algorithm/smoothquant/__init__.py +1 -0
tico/experimental/quantization/algorithm/smoothquant/observer.py +78 -0
tico/experimental/quantization/algorithm/smoothquant/quantizer.py +81 -0
tico/experimental/quantization/algorithm/smoothquant/smooth_quant.py +164 -0
tico/experimental/quantization/config.py +68 -0
tico/experimental/quantization/evaluation/__init__.py +1 -0
tico/experimental/quantization/evaluation/backend.py +20 -0
tico/experimental/quantization/evaluation/evaluate.py +223 -0
tico/experimental/quantization/evaluation/executor/__init__.py +1 -0
tico/experimental/quantization/evaluation/executor/backend_executor.py +54 -0
tico/experimental/quantization/evaluation/executor/circle_executor.py +75 -0
tico/experimental/quantization/evaluation/executor/triv24_executor.py +128 -0
tico/experimental/quantization/evaluation/metric.py +109 -0
tico/experimental/quantization/evaluation/utils.py +185 -0
tico/experimental/quantization/passes/__init__.py +1 -0
tico/experimental/quantization/passes/fold_quant_ops.py +97 -0
tico/experimental/quantization/passes/insert_quantize_on_dtype_mismatch.py +289 -0
tico/experimental/quantization/passes/propagate_qparam_backward.py +91 -0
tico/experimental/quantization/passes/propagate_qparam_forward.py +141 -0
tico/experimental/quantization/passes/remove_weight_dequant_op.py +168 -0
tico/experimental/quantization/public_interface.py +108 -0
tico/experimental/quantization/quantizer.py +71 -0
tico/interpreter/__init__.py +1 -0
tico/interpreter/infer.py +116 -0
tico/interpreter/interpreter.py +93 -0
tico/passes/__init__.py +1 -0
tico/passes/cast_aten_where_arg_type.py +185 -0
tico/passes/cast_mixed_type_args.py +186 -0
tico/passes/const_prop_pass.py +307 -0
tico/passes/convert_conv1d_to_conv2d.py +151 -0
tico/passes/convert_layout_op_to_reshape.py +84 -0
tico/passes/convert_repeat_to_expand_copy.py +90 -0
tico/passes/convert_to_relu6.py +180 -0
tico/passes/decompose_addmm.py +127 -0
tico/passes/decompose_batch_norm.py +198 -0
tico/passes/decompose_fake_quantize.py +126 -0
tico/passes/decompose_fake_quantize_tensor_qparams.py +270 -0
tico/passes/decompose_group_norm.py +258 -0
tico/passes/decompose_grouped_conv2d.py +202 -0
tico/passes/decompose_slice_scatter.py +167 -0
tico/passes/extract_dtype_kwargs.py +121 -0
tico/passes/fill_meta_val.py +57 -0
tico/passes/fuse_redundant_reshape_to_mean.py +102 -0
tico/passes/legalize_causal_mask_value.py +113 -0
tico/passes/legalize_predefined_layout_operators.py +383 -0
tico/passes/lower_pow2_to_mul.py +75 -0
tico/passes/lower_to_resize_nearest_neighbor.py +249 -0
tico/passes/lower_to_slice.py +112 -0
tico/passes/merge_consecutive_cat.py +82 -0
tico/passes/ops.py +75 -0
tico/passes/remove_nop.py +85 -0
tico/passes/remove_redundant_assert_nodes.py +50 -0
tico/passes/remove_redundant_expand.py +70 -0
tico/passes/remove_redundant_permute.py +102 -0
tico/passes/remove_redundant_reshape.py +431 -0
tico/passes/remove_redundant_slice.py +64 -0
tico/passes/remove_redundant_to_copy.py +84 -0
tico/passes/restore_linear.py +113 -0
tico/passes/segment_index_select.py +143 -0
tico/pt2_to_circle.py +101 -0
tico/serialize/__init__.py +1 -0
tico/serialize/circle_graph.py +264 -0
tico/serialize/circle_mapping.py +177 -0
tico/serialize/circle_serializer.py +232 -0
tico/serialize/operators/__init__.py +28 -0
tico/serialize/operators/hashable_opcode.py +43 -0
tico/serialize/operators/node_visitor.py +80 -0
tico/serialize/operators/op_add.py +69 -0
tico/serialize/operators/op_alias_copy.py +64 -0
tico/serialize/operators/op_any.py +142 -0
tico/serialize/operators/op_arange_start_step.py +61 -0
tico/serialize/operators/op_argmax.py +62 -0
tico/serialize/operators/op_avg_pool2d.py +112 -0
tico/serialize/operators/op_bmm.py +62 -0
tico/serialize/operators/op_cat.py +66 -0
tico/serialize/operators/op_clamp.py +123 -0
tico/serialize/operators/op_clone.py +71 -0
tico/serialize/operators/op_constant_pad_nd.py +72 -0
tico/serialize/operators/op_conv2d.py +181 -0
tico/serialize/operators/op_copy.py +162 -0
tico/serialize/operators/op_cos.py +59 -0
tico/serialize/operators/op_cumsum.py +92 -0
tico/serialize/operators/op_depthwise_conv2d.py +198 -0
tico/serialize/operators/op_dequantize_per_channel.py +82 -0
tico/serialize/operators/op_dequantize_per_tensor.py +64 -0
tico/serialize/operators/op_div.py +62 -0
tico/serialize/operators/op_embedding.py +60 -0
tico/serialize/operators/op_eq.py +64 -0
tico/serialize/operators/op_exp.py +60 -0
tico/serialize/operators/op_expand.py +91 -0
tico/serialize/operators/op_full.py +48 -0
tico/serialize/operators/op_full_like.py +55 -0
tico/serialize/operators/op_ge.py +54 -0
tico/serialize/operators/op_gelu.py +59 -0
tico/serialize/operators/op_gt.py +54 -0
tico/serialize/operators/op_index.py +82 -0
tico/serialize/operators/op_index_select.py +64 -0
tico/serialize/operators/op_instance_norm.py +91 -0
tico/serialize/operators/op_linear.py +70 -0
tico/serialize/operators/op_log.py +53 -0
tico/serialize/operators/op_log1p.py +83 -0
tico/serialize/operators/op_logical_and.py +63 -0
tico/serialize/operators/op_logical_not.py +62 -0
tico/serialize/operators/op_lt.py +61 -0
tico/serialize/operators/op_max_pool2d_with_indices.py +140 -0
tico/serialize/operators/op_maximum.py +53 -0
tico/serialize/operators/op_mean.py +66 -0
tico/serialize/operators/op_minimum.py +53 -0
tico/serialize/operators/op_mm.py +174 -0
tico/serialize/operators/op_mul.py +99 -0
tico/serialize/operators/op_ne.py +54 -0
tico/serialize/operators/op_neg.py +59 -0
tico/serialize/operators/op_permute.py +65 -0
tico/serialize/operators/op_pow.py +138 -0
tico/serialize/operators/op_prelu.py +54 -0
tico/serialize/operators/op_quantize_per_tensor.py +79 -0
tico/serialize/operators/op_reciprocal.py +64 -0
tico/serialize/operators/op_relu.py +53 -0
tico/serialize/operators/op_relu6.py +52 -0
tico/serialize/operators/op_repeat.py +99 -0
tico/serialize/operators/op_reshape.py +73 -0
tico/serialize/operators/op_resize_nearest_neighbor.py +70 -0
tico/serialize/operators/op_rsqrt.py +53 -0
tico/serialize/operators/op_scalar_tensor.py +51 -0
tico/serialize/operators/op_select_copy.py +65 -0
tico/serialize/operators/op_sigmoid.py +56 -0
tico/serialize/operators/op_sin.py +53 -0
tico/serialize/operators/op_slice.py +155 -0
tico/serialize/operators/op_softmax.py +100 -0
tico/serialize/operators/op_split_with_sizes.py +96 -0
tico/serialize/operators/op_sqrt.py +55 -0
tico/serialize/operators/op_squeeze.py +73 -0
tico/serialize/operators/op_sub.py +71 -0
tico/serialize/operators/op_sum.py +63 -0
tico/serialize/operators/op_tanh.py +54 -0
tico/serialize/operators/op_to_copy.py +105 -0
tico/serialize/operators/op_unsqueeze.py +66 -0
tico/serialize/operators/op_view.py +74 -0
tico/serialize/operators/op_where.py +82 -0
tico/serialize/operators/utils.py +51 -0
tico/serialize/pack.py +35 -0
tico/serialize/quant_param.py +42 -0
tico/utils/__init__.py +1 -0
tico/utils/convert.py +292 -0
tico/utils/define.py +35 -0
tico/utils/diff_graph.py +181 -0
tico/utils/errors.py +35 -0
tico/utils/graph.py +200 -0
tico/utils/logging.py +45 -0
tico/utils/model.py +37 -0
tico/utils/padding.py +47 -0
tico/utils/passes.py +76 -0
tico/utils/register_custom_op.py +562 -0
tico/utils/trace_decorators.py +101 -0
tico/utils/utils.py +314 -0
tico/utils/validate_args_kwargs.py +1114 -0
tico-0.1.0.dev250411.dist-info/LICENSE +241 -0
tico-0.1.0.dev250411.dist-info/METADATA +17 -0
tico-0.1.0.dev250411.dist-info/RECORD +196 -0
tico-0.1.0.dev250411.dist-info/WHEEL +5 -0
tico-0.1.0.dev250411.dist-info/entry_points.txt +3 -0
tico-0.1.0.dev250411.dist-info/top_level.txt +1 -0

tico/interpreter/interpreter.py ADDED Viewed

@@ -0,0 +1,93 @@
+# Copyright (c) 2025 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from pathlib import Path
+import numpy as np
+import torch
+from cffi import FFI
+class Interpreter:
+    """
+    Python wrapper for C++ luci-interperter class in ONE using CFFI.
+    This class provides a Python interface to the underlying C++ luci-interpreter class in ONE,
+     preserving the original C++ API. Each method corresponds to a method in the C++ class,
+    with additional error handling implemented to ensure that C++ exceptions are captured and
+    translated into Python errors.
+    Note that each method includes `check_for_errors` at the end of the body to catch any C++
+     exceptions and translate them into Python exceptions. This ensures that errors in the C++
+    library do not cause undefined behavior in Python.
+    """
+    def __init__(self, circle_binary: bytes):
+        self.ffi = FFI()
+        self.ffi.cdef(
+            """
+          typedef struct InterpreterWrapper InterpreterWrapper;
+          const char *get_last_error(void);
+          void clear_last_error(void);
+          InterpreterWrapper *Interpreter_new(const uint8_t *data, const size_t data_size);
+          void Interpreter_delete(InterpreterWrapper *intp);
+          void Interpreter_interpret(InterpreterWrapper *intp);
+          void Interpreter_writeInputTensor(InterpreterWrapper *intp, const int input_idx, const void *data, size_t input_size);
+          void Interpreter_readOutputTensor(InterpreterWrapper *intp, const int output_idx, void *output, size_t output_size);
+        """
+        )
+        # TODO Check if one-compiler version is compatible. Whether it has .so file or not for CFFI.
+        intp_lib_path = Path("/usr/share/one/lib/libcircle_interpreter_cffi.so")
+        if not intp_lib_path.is_file():
+            raise RuntimeError("Please install one-compiler for circle inference.")
+        self.C = self.ffi.dlopen(str(intp_lib_path))
+        # Initialize interpreter
+        self.intp = self.C.Interpreter_new(circle_binary, len(circle_binary))
+        self.check_for_errors()
+    def delete(self):
+        self.C.Interpreter_delete(self.intp)
+        self.check_for_errors()
+    def interpret(self):
+        self.C.Interpreter_interpret(self.intp)
+        self.check_for_errors()
+    def writeInputTensor(self, input_idx: int, input_data: torch.Tensor):
+        input_as_numpy = input_data.numpy()
+        # cffi.from_buffer() only accepts C-contiguous array.
+        input_as_numpy = np.ascontiguousarray(input_as_numpy)
+        c_input = self.ffi.from_buffer(input_as_numpy)
+        self.C.Interpreter_writeInputTensor(
+            self.intp, input_idx, c_input, input_data.nbytes
+        )
+        self.check_for_errors()
+    def readOutputTensor(self, output_idx: int, output: np.ndarray):
+        c_output = self.ffi.from_buffer(output)
+        self.C.Interpreter_readOutputTensor(
+            self.intp, output_idx, c_output, output.nbytes
+        )
+        self.check_for_errors()
+    def check_for_errors(self):
+        error_message = self.ffi.string(self.C.get_last_error()).decode("utf-8")
+        if error_message:
+            self.C.clear_last_error()
+            raise RuntimeError(f"C++ Exception: {error_message}")
+    def __del__(self):
+        self.delete()

tico/passes/__init__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ # DO NOT REMOVE THIS FILE

tico/passes/cast_aten_where_arg_type.py ADDED Viewed

@@ -0,0 +1,185 @@
+# Copyright (c) 2025 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from typing import Tuple, TYPE_CHECKING
+if TYPE_CHECKING:
+    import torch.fx
+import torch
+from torch.export import ExportedProgram
+from tico.serialize.circle_mapping import extract_torch_dtype
+from tico.utils import logging
+from tico.utils.passes import PassBase, PassResult
+from tico.utils.trace_decorators import (
+    trace_const_diff_on_pass,
+    trace_graph_diff_on_pass,
+)
+from tico.utils.utils import set_new_meta_val
+dtype_ranking = {
+    torch.int32: 0,
+    torch.int64: 1,
+    torch.float32: 2,
+}
+def sort_by_dtype(
+    result_true: torch.fx.Node, result_false: torch.fx.Node
+) -> Tuple[torch.fx.Node, torch.fx.Node]:
+    true_dtype = extract_torch_dtype(result_true)
+    false_dtype = extract_torch_dtype(result_false)
+    if dtype_ranking[true_dtype] > dtype_ranking[false_dtype]:
+        return result_true, result_false
+    if dtype_ranking[true_dtype] < dtype_ranking[false_dtype]:
+        return result_false, result_true
+    assert False, "There is no case that the dtype_ranking of the nodes are the same"
+def check_if_covered_by_float(tensor: torch.Tensor) -> bool:
+    # About the min/max range, please refer to https://en.wikipedia.org/wiki/Single-precision_floating-point_format#Precision_limitations_on_integer_values
+    if tensor.min() < -(2**24) or tensor.max() > 2**24:
+        return False
+    return True
+@trace_graph_diff_on_pass
+@trace_const_diff_on_pass
+class CastATenWhereArgType(PassBase):
+    """
+    This pass casts the data type of `aten.where.self` operation's argument.
+    This pass is applied when the data type of `aten.where.self` operation's argument is different.
+    If the data type of arguments, which are denoted `result_true` and `result_false` in below graph are identical, this pass is not applied.
+    In addition, this pass casts the data type as the direction that avoids data loss.
+    For example, if the data type of `result_true` is `float32` and the data type of `result_false` is `int32`,
+    then the data type of `result_false` will be casted to `float32`.
+    Moreover, in this case, it should be checked whether the contents of `result_false` are within the range of `float32`.
+    If so, the data type of `result_true` will be casted to `float32`.
+    If not, RuntimeError will be raised.
+    After this pass, the arguments of `aten.where.self` should have same data type.
+    The graph before this pass and the graph after this pass are shown below.
+    NOTE Below example denotes the case when the `result_false` was casted.
+    (before)
+    [condition]   [result_true]   [result_false]
+        |               |                |
+        |               |                |
+        +---------------+----------------+
+                        |
+                        |
+                     [where]
+                        |
+                        |
+                    [output]
+    (after)
+                                  [result_false]
+    [condition]   [result_true]          |
+        |               |             [cast]
+        |               |                |
+        +---------------+----------------+
+                        |
+                        |
+                     [where]
+                        |
+                        |
+                    [output]
+    """
+    def __init__(self):
+        super().__init__()
+    def call(self, exported_program: ExportedProgram) -> PassResult:
+        logger = logging.getLogger(__name__)
+        graph_module = exported_program.graph_module
+        graph = graph_module.graph
+        modified = False
+        for node in graph.nodes:
+            if node.op == "call_function" and node.target == torch.ops.aten.where.self:
+                assert len(node.args) == 3
+                (
+                    _,
+                    result_true,
+                    result_false,
+                ) = node.args  # first argument is not used
+                ep = exported_program
+                if not (
+                    result_true.name in ep.graph_signature.inputs_to_buffers
+                    and result_false.name in ep.graph_signature.inputs_to_buffers
+                ):
+                    continue
+                # Check if they have different data types
+                true_dtype = extract_torch_dtype(result_true)
+                false_dtype = extract_torch_dtype(result_false)
+                if true_dtype == false_dtype:
+                    continue
+                node_to_dtype = {result_true: true_dtype, result_false: false_dtype}
+                not_to_cast, to_cast = sort_by_dtype(result_true, result_false)
+                buf_name_to_data = {name: buf for name, buf in ep.named_buffers()}
+                buf_name = ep.graph_signature.inputs_to_buffers[to_cast.name]
+                buf_data = buf_name_to_data[buf_name]
+                assert isinstance(buf_data, torch.Tensor)
+                dtype_to_cast = node_to_dtype[not_to_cast]
+                if dtype_to_cast == torch.float32:
+                    if not check_if_covered_by_float(buf_data):
+                        raise RuntimeError(
+                            f"{to_cast.name}({buf_data.dtype}) data range is out of {dtype_to_cast} range"
+                        )
+                with graph_module.graph.inserting_after(to_cast):
+                    cast = graph_module.graph.call_function(
+                        torch.ops.aten._to_copy.default,
+                        args=(to_cast,),
+                        kwargs={"dtype": dtype_to_cast},
+                    )
+                # set new meta["val"] in advance because we will use it below for checking if type promotion is valid.
+                set_new_meta_val(cast)
+                node.update_arg(node.args.index(to_cast), cast)
+                # check if type promotion is valid.
+                node_dtype_ori = extract_torch_dtype(node)
+                set_new_meta_val(node)
+                node_dtype = extract_torch_dtype(node)
+                assert (
+                    node_dtype == node_dtype_ori
+                ), f"Type casting doesn't change node's dtype."
+                logger.debug(
+                    f"{to_cast.name}'s dtype was casted from {buf_data.dtype} to {dtype_to_cast}"
+                )
+                modified = True
+        graph.eliminate_dead_code()
+        graph.lint()
+        graph_module.recompile()
+        return PassResult(modified)

tico/passes/cast_mixed_type_args.py ADDED Viewed

@@ -0,0 +1,186 @@
+# Portions of this file are adapted from code originally authored by
+# Meta Platforms, Inc. and affiliates, licensed under the BSD-style
+# license found in the LICENSE file in the root directory of their source tree.
+# Copyright (c) 2025 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from typing import TYPE_CHECKING
+if TYPE_CHECKING:
+    import torch.fx
+import torch
+from torch._prims_common import elementwise_dtypes, ELEMENTWISE_TYPE_PROMOTION_KIND
+from torch.export import ExportedProgram
+from tico.serialize.circle_mapping import extract_torch_dtype
+from tico.utils import logging
+from tico.utils.passes import PassBase, PassResult
+from tico.utils.trace_decorators import trace_graph_diff_on_pass
+from tico.utils.utils import set_new_meta_val
+ops_to_promote = {
+    torch.ops.aten.add.Tensor: ELEMENTWISE_TYPE_PROMOTION_KIND.DEFAULT,
+    torch.ops.aten.div.Tensor: ELEMENTWISE_TYPE_PROMOTION_KIND.DEFAULT,
+    torch.ops.aten.eq.Scalar: ELEMENTWISE_TYPE_PROMOTION_KIND.DEFAULT,
+    torch.ops.aten.eq.Tensor: ELEMENTWISE_TYPE_PROMOTION_KIND.DEFAULT,
+    torch.ops.aten.ge.Scalar: ELEMENTWISE_TYPE_PROMOTION_KIND.DEFAULT,
+    torch.ops.aten.ge.Tensor: ELEMENTWISE_TYPE_PROMOTION_KIND.DEFAULT,
+    torch.ops.aten.gt.Scalar: ELEMENTWISE_TYPE_PROMOTION_KIND.DEFAULT,
+    torch.ops.aten.gt.Tensor: ELEMENTWISE_TYPE_PROMOTION_KIND.DEFAULT,
+    torch.ops.aten.mul.Tensor: ELEMENTWISE_TYPE_PROMOTION_KIND.DEFAULT,
+    torch.ops.aten.minimum.default: ELEMENTWISE_TYPE_PROMOTION_KIND.DEFAULT,
+    torch.ops.aten.ne.Scalar: ELEMENTWISE_TYPE_PROMOTION_KIND.DEFAULT,
+    torch.ops.aten.ne.Tensor: ELEMENTWISE_TYPE_PROMOTION_KIND.DEFAULT,
+    torch.ops.aten.pow.Tensor_Scalar: ELEMENTWISE_TYPE_PROMOTION_KIND.DEFAULT,
+    torch.ops.aten.sub.Tensor: ELEMENTWISE_TYPE_PROMOTION_KIND.DEFAULT,
+}
+def has_same_dtype(lhs, rhs):
+    if isinstance(lhs, torch.fx.Node):
+        lhs_dtype = lhs.meta["val"].dtype
+    elif isinstance(lhs, torch.Tensor):
+        lhs_dtype = lhs.dtype
+    else:
+        lhs_dtype = torch.tensor(lhs).dtype
+    if isinstance(rhs, torch.fx.Node):
+        rhs_dtype = rhs.meta["val"].dtype
+    elif isinstance(rhs, torch.Tensor):
+        rhs_dtype = rhs.dtype
+    else:
+        rhs_dtype = torch.tensor(rhs).dtype
+    if lhs_dtype == rhs_dtype:
+        return True
+    return False
+def to_numeric_type(torch_dtype: torch.dtype):
+    dmap = {
+        torch.float32: float,
+        torch.float: float,
+        torch.int64: int,
+        torch.bool: bool,
+    }
+    if torch_dtype not in dmap:
+        return None
+    return dmap[torch_dtype]
+@trace_graph_diff_on_pass
+class CastMixedTypeArgs(PassBase):
+    def __init__(self, preserve_ep_invariant=True):
+        super().__init__()
+        self.preserve_ep_invariant = preserve_ep_invariant
+    # TODO Folding float and int values before this pass
+    def call(self, exported_program: ExportedProgram) -> PassResult:
+        logger = logging.getLogger(__name__)
+        graph_module = exported_program.graph_module
+        graph = graph_module.graph
+        modified = False
+        for node in graph.nodes:
+            if not node.op == "call_function":
+                continue
+            if node.target not in ops_to_promote:
+                continue
+            assert len(node.args) == 2
+            lhs, rhs = node.args
+            assert isinstance(lhs, (torch.fx.Node, torch.Tensor, float, int)), type(lhs)
+            assert isinstance(rhs, (torch.fx.Node, torch.Tensor, float, int)), type(rhs)
+            if has_same_dtype(lhs, rhs):
+                continue
+            lhs_val = (
+                lhs.meta["val"] if isinstance(lhs, torch.fx.Node) else torch.tensor(lhs)
+            )
+            rhs_val = (
+                rhs.meta["val"] if isinstance(rhs, torch.fx.Node) else torch.tensor(rhs)
+            )
+            type_to_promote: torch.dtype = elementwise_dtypes(
+                lhs_val, rhs_val, type_promotion_kind=ops_to_promote[node.target]
+            )[1]
+            arg_to_promote = None
+            if lhs_val.dtype == type_to_promote:
+                ori_type = rhs_val.dtype
+                arg_to_promote = rhs
+            if rhs_val.dtype == type_to_promote:
+                ori_type = lhs_val.dtype
+                arg_to_promote = lhs
+            assert arg_to_promote != None
+            if isinstance(arg_to_promote, torch.fx.Node):
+                with graph.inserting_after(arg_to_promote):
+                    to_copy = graph.call_function(
+                        torch.ops.aten._to_copy.default,
+                        (arg_to_promote,),
+                        {"dtype": type_to_promote},
+                    )
+                    # set new meta["val"] in advance because we will use it below for checking if type promotion is valid.
+                    set_new_meta_val(to_copy)
+                    node.update_arg(node.args.index(arg_to_promote), to_copy)
+                modified = True
+                logger.debug(
+                    f"{arg_to_promote.name}'s dtype was casted from {ori_type} to {type_to_promote}"
+                )
+            else:
+                index_to_promote = node.args.index(arg_to_promote)
+                if isinstance(arg_to_promote, torch.Tensor):
+                    arg_to_promote = arg_to_promote.to(type_to_promote)
+                else:
+                    # numerical types
+                    numeric_type = to_numeric_type(type_to_promote)
+                    if numeric_type is not None:
+                        arg_to_promote = numeric_type(arg_to_promote)
+                    else:
+                        if self.preserve_ep_invariant:
+                            # ExportedProgram (EP) requires to add a placeholder when
+                            # a tensor is created, which complicates EP structure but
+                            # not necessary for circle serialization. We skip this case if
+                            # preserve_ep_invariant = True.
+                            continue
+                        else:
+                            # Create tensor without placeholder
+                            # NOTE This breaks EP invariant
+                            arg_to_promote = torch.tensor(arg_to_promote).to(
+                                type_to_promote
+                            )
+                node.update_arg(index_to_promote, arg_to_promote)
+                modified = True
+                logger.debug(
+                    f"{arg_to_promote}'s dtype was casted from {ori_type} to {type_to_promote}"
+                )
+            # check if type promotion is valid.
+            node_dtype_ori = extract_torch_dtype(node)
+            set_new_meta_val(node)
+            node_dtype = extract_torch_dtype(node)
+            assert (
+                node_dtype == node_dtype_ori
+            ), f"Type casting doesn't change node's dtype."
+        graph.eliminate_dead_code()
+        graph.lint()
+        graph_module.recompile()
+        return PassResult(modified)