PyPI - tico - Versions diffs - 0.1.0.dev250717__py3-none-any.whl → 0.1.0.dev250721__py3-none-any.whl - Mend

tico 0.1.0.dev250717py3-none-any.whl → 0.1.0.dev250721py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (14) hide show

tico/__init__.py CHANGED Viewed

@@ -21,7 +21,7 @@ from tico.config import CompileConfigV1, get_default_config
 from tico.utils.convert import convert, convert_from_exported_program, convert_from_pt2
 # THIS LINE IS AUTOMATICALLY GENERATED BY setup.py
-__version__ = "0.1.0.dev250717"
+__version__ = "0.1.0.dev250721"
 MINIMUM_SUPPORTED_VERSION = "2.5.0"
 SECURE_TORCH_VERSION = "2.6.0"

tico/experimental/quantization/passes/insert_quantize_on_dtype_mismatch.py CHANGED Viewed

@@ -18,6 +18,9 @@ if TYPE_CHECKING:
     import torch.fx
 import copy
+from collections import defaultdict
+from typing import Any, Callable
 import torch
 from torch.export import ExportedProgram
@@ -92,6 +95,302 @@ def _u8_to_i16(qparam: QuantParam) -> QuantParam:
     return new_qparam
+def _insert_quantize_op_before(node, inp):
+    graph = node.graph
+    qparam: QuantParam = node.meta[QPARAM_KEY]
+    assert qparam.scale is not None
+    assert qparam.zero_point is not None
+    scale = qparam.scale[0]
+    zerop = qparam.zero_point[0]
+    min_, max_ = quant_min_max(qparam.dtype)
+    dtype = getattr(torch, qparam.dtype)
+    with graph.inserting_before(node):
+        q_args = (inp, scale, zerop, min_, max_, dtype)
+        quantize = create_node(
+            graph,
+            torch.ops.quantized_decomposed.quantize_per_tensor.default,
+            args=q_args,
+            origin=node,
+        )
+        quantize.meta[QPARAM_KEY] = copy.deepcopy(qparam)
+        set_new_meta_val(quantize)
+    node.replace_input_with(inp, quantize)
+    return quantize
+def _insert_quantize_op_after(node):
+    graph = node.graph
+    qparam: QuantParam = node.meta[QPARAM_KEY]
+    assert qparam.scale is not None
+    assert qparam.zero_point is not None
+    scale = qparam.scale[0]
+    zerop = qparam.zero_point[0]
+    min_, max_ = quant_min_max(qparam.dtype)
+    dtype = getattr(torch, qparam.dtype)
+    with graph.inserting_after(node):
+        q_args = (node, scale, zerop, min_, max_, dtype)
+        quantize = create_node(
+            graph,
+            torch.ops.quantized_decomposed.quantize_per_tensor.default,
+            args=q_args,
+        )
+    node.replace_all_uses_with(quantize, propagate_meta=True)
+    quantize.replace_input_with(quantize, node)
+    quantize.meta[QPARAM_KEY] = copy.deepcopy(qparam)
+    return quantize
+def _linear_handler(node, logger):
+    lin_args = LinearArgs(*node.args, **node.kwargs)
+    inp = lin_args.input
+    if QPARAM_KEY not in inp.meta:
+        return
+    if QPARAM_KEY not in node.meta:
+        return
+    if qparam_dtype(inp) == qparam_dtype(node):
+        return
+    if qparam_dtype(inp) == "uint8" and qparam_dtype(node) == "int16":
+        quantize = _insert_quantize_op_after(node)
+        quantize.meta[QPARAM_KEY] = copy.deepcopy(node.meta[QPARAM_KEY])
+        # Update node's qparam from i16 to u8
+        # NOTE This would severely degrade accuracy. It is
+        # important to mitigate this accuracy drop in backend.
+        node.meta[QPARAM_KEY] = _i16_to_u8(node.meta[QPARAM_KEY])
+        logger.debug(f"quantize_per_tensor.default is inserted after {node.name}.")
+    else:
+        raise NotYetSupportedError(
+            f"Unsupported dtype: From {qparam_dtype(inp)} to {qparam_dtype(node)}"
+        )
+def _add_handler(node, logger):
+    add_args = AddTensorArgs(*node.args, **node.kwargs)
+    x = add_args.input
+    y = add_args.other
+    if not isinstance(x, torch.fx.Node):
+        return
+    if not isinstance(y, torch.fx.Node):
+        return
+    if QPARAM_KEY not in x.meta:
+        return
+    if QPARAM_KEY not in y.meta:
+        return
+    if QPARAM_KEY not in node.meta:
+        return
+    if qparam_dtype(x) == qparam_dtype(node):
+        return
+    if qparam_dtype(x) != qparam_dtype(y):
+        return
+    if qparam_dtype(x) == "int16" and qparam_dtype(node) == "uint8":
+        quantize = _insert_quantize_op_after(node)
+        quantize.meta[QPARAM_KEY] = copy.deepcopy(node.meta[QPARAM_KEY])
+        node.meta[QPARAM_KEY] = _u8_to_i16(node.meta[QPARAM_KEY])
+        logger.debug(f"quantize_per_tensor.default is inserted after {node.name}.")
+    else:
+        raise NotYetSupportedError("Unsupported dtype")
+def _mul_handler(node, logger):
+    mul_args = MulTensorArgs(*node.args, **node.kwargs)
+    x = mul_args.input
+    y = mul_args.other
+    if not isinstance(x, torch.fx.Node):
+        return
+    if not isinstance(y, torch.fx.Node):
+        return
+    if QPARAM_KEY not in x.meta:
+        return
+    if QPARAM_KEY not in y.meta:
+        return
+    if QPARAM_KEY not in node.meta:
+        return
+    if qparam_dtype(x) == qparam_dtype(node):
+        return
+    if qparam_dtype(x) == "int16" and qparam_dtype(node) == "uint8":
+        quantize = _insert_quantize_op_after(node)
+        quantize.meta[QPARAM_KEY] = copy.deepcopy(node.meta[QPARAM_KEY])
+        node.meta[QPARAM_KEY] = _u8_to_i16(node.meta[QPARAM_KEY])
+        logger.debug(f"quantize_per_tensor.default is inserted after {node.name}.")
+    else:
+        raise NotYetSupportedError("Unsupported dtype")
+def _cat_handler(node, logger):
+    cat_args = CatArgs(*node.args, **node.kwargs)
+    tensors = cat_args.tensors
+    if any(QPARAM_KEY not in x.meta for x in tensors):
+        return
+    if QPARAM_KEY not in node.meta:
+        return
+    assert len(tensors) > 0
+    in_dtype = qparam_dtype(tensors[0])
+    if in_dtype == qparam_dtype(node):
+        return
+    if any(qparam_dtype(x) != in_dtype for x in tensors):
+        return
+    if in_dtype == "int16" and qparam_dtype(node) == "uint8":
+        quantize = _insert_quantize_op_after(node)
+        quantize.meta[QPARAM_KEY] = copy.deepcopy(node.meta[QPARAM_KEY])
+        node.meta[QPARAM_KEY] = _u8_to_i16(node.meta[QPARAM_KEY])
+        logger.debug(f"quantize_per_tensor.default is inserted after {node.name}.")
+    else:
+        raise NotYetSupportedError("Unsupported dtype")
+def _bmm_handler(node, logger):
+    bmm_args = BmmArgs(*node.args, **node.kwargs)
+    x = bmm_args.input
+    y = bmm_args.mat2
+    if QPARAM_KEY not in x.meta:
+        return
+    if QPARAM_KEY not in y.meta:
+        return
+    if QPARAM_KEY not in node.meta:
+        return
+    if qparam_dtype(x) == qparam_dtype(node):
+        return
+    if qparam_dtype(x) == "int16" and qparam_dtype(node) == "uint8":
+        quantize = _insert_quantize_op_after(node)
+        quantize.meta[QPARAM_KEY] = copy.deepcopy(node.meta[QPARAM_KEY])
+        node.meta[QPARAM_KEY] = _u8_to_i16(node.meta[QPARAM_KEY])
+        logger.debug(f"quantize_per_tensor.default is inserted after {node.name}.")
+    elif qparam_dtype(x) == "uint8" and qparam_dtype(node) == "int16":
+        quantize = _insert_quantize_op_after(node)
+        quantize.meta[QPARAM_KEY] = copy.deepcopy(node.meta[QPARAM_KEY])
+        node.meta[QPARAM_KEY] = _i16_to_u8(node.meta[QPARAM_KEY])
+        logger.debug(f"quantize_per_tensor.default is inserted after {node.name}.")
+    else:
+        raise NotYetSupportedError("Unsupported dtype")
+def _permute_handler(node, logger):
+    per_args = PermuteArgs(*node.args, **node.kwargs)
+    inp = per_args.input
+    if QPARAM_KEY not in inp.meta:
+        return
+    if QPARAM_KEY not in node.meta:
+        return
+    if qparam_dtype(inp) == qparam_dtype(node):
+        return
+    if qparam_dtype(inp) == "int16" and qparam_dtype(node) == "uint8":
+        # A new Quantize Op (s16 to u8) is inserted before (not after)
+        # permute Op to reduce tensor size ealier
+        quantize = _insert_quantize_op_before(node, inp)
+        quantize.meta[QPARAM_KEY] = copy.deepcopy(node.meta[QPARAM_KEY])
+        logger.debug(f"quantize_per_tensor.default is inserted before {node.name}.")
+    elif qparam_dtype(inp) == "uint8" and qparam_dtype(node) == "int16":
+        quantize = _insert_quantize_op_after(node)
+        quantize.meta[QPARAM_KEY] = copy.deepcopy(node.meta[QPARAM_KEY])
+        node.meta[QPARAM_KEY] = _i16_to_u8(node.meta[QPARAM_KEY])
+        logger.debug(f"quantize_per_tensor.default is inserted after {node.name}.")
+    else:
+        raise NotYetSupportedError("Unsupported dtype")
+def _reshape_handler(node, logger):
+    reshape_args = ReshapeArgs(*node.args, **node.kwargs)
+    inp = reshape_args.input
+    if QPARAM_KEY not in inp.meta:
+        return
+    if QPARAM_KEY not in node.meta:
+        return
+    if qparam_dtype(inp) == qparam_dtype(node):
+        return
+    if qparam_dtype(inp) == "int16" and qparam_dtype(node) == "uint8":
+        # A new Quantize Op (s16 to u8) is inserted before (not after)
+        # reshape Op to reduce tensor size ealier
+        quantize = _insert_quantize_op_before(node, inp)
+        quantize.meta[QPARAM_KEY] = copy.deepcopy(node.meta[QPARAM_KEY])
+        logger.debug(f"quantize_per_tensor.default is inserted before {node.name}.")
+    elif qparam_dtype(inp) == "uint8" and qparam_dtype(node) == "int16":
+        quantize = _insert_quantize_op_after(node)
+        quantize.meta[QPARAM_KEY] = copy.deepcopy(node.meta[QPARAM_KEY])
+        node.meta[QPARAM_KEY] = _i16_to_u8(node.meta[QPARAM_KEY])
+        logger.debug(f"quantize_per_tensor.default is inserted after {node.name}.")
+    else:
+        raise NotYetSupportedError("Unsupported dtype")
+def _relu_handler(node, logger):
+    relu_args = ReluArgs(*node.args, **node.kwargs)
+    inp = relu_args.input
+    if QPARAM_KEY not in inp.meta:
+        return
+    if QPARAM_KEY not in node.meta:
+        return
+    if qparam_dtype(inp) == qparam_dtype(node):
+        return
+    if qparam_dtype(inp) == "int16" and qparam_dtype(node) == "uint8":
+        quantize = _insert_quantize_op_after(node)
+        quantize.meta[QPARAM_KEY] = copy.deepcopy(node.meta[QPARAM_KEY])
+        node.meta[QPARAM_KEY] = _u8_to_i16(node.meta[QPARAM_KEY])
+        logger.debug(f"quantize_per_tensor.default is inserted after {node.name}.")
+    else:
+        raise NotYetSupportedError("Unsupported dtype")
+_op_handler: defaultdict[Any, Any | None] = defaultdict(lambda: None)
+_op_handler[torch.ops.aten.linear.default] = _linear_handler
+_op_handler[torch.ops.aten.add.Tensor] = _add_handler
+_op_handler[torch.ops.aten.mul.Tensor] = _mul_handler
+_op_handler[torch.ops.aten.cat.default] = _cat_handler
+_op_handler[torch.ops.aten.bmm.default] = _bmm_handler
+_op_handler[torch.ops.aten.permute.default] = _permute_handler
+_op_handler[torch.ops.aten.reshape.default] = _reshape_handler
+_op_handler[torch.ops.aten.relu.default] = _relu_handler
 @trace_graph_diff_on_pass
 class InsertQuantizeOnDtypeMismatch(PassBase):
     """
@@ -138,296 +437,13 @@ class InsertQuantizeOnDtypeMismatch(PassBase):
         graph_module = exported_program.graph_module
         graph: torch.fx.Graph = graph_module.graph
-        def _insert_quantize_op_before(node, inp):
-            qparam: QuantParam = node.meta[QPARAM_KEY]
-            assert qparam.scale is not None
-            assert qparam.zero_point is not None
-            scale = qparam.scale[0]
-            zerop = qparam.zero_point[0]
-            min_, max_ = quant_min_max(qparam.dtype)
-            dtype = getattr(torch, qparam.dtype)
-            with graph.inserting_before(node):
-                q_args = (inp, scale, zerop, min_, max_, dtype)
-                quantize = create_node(
-                    graph,
-                    torch.ops.quantized_decomposed.quantize_per_tensor.default,
-                    args=q_args,
-                    origin=node,
-                )
-                quantize.meta[QPARAM_KEY] = copy.deepcopy(qparam)
-                set_new_meta_val(quantize)
-            node.replace_input_with(inp, quantize)
-            return quantize
-        def _insert_quantize_op_after(node):
-            qparam: QuantParam = node.meta[QPARAM_KEY]
-            assert qparam.scale is not None
-            assert qparam.zero_point is not None
-            scale = qparam.scale[0]
-            zerop = qparam.zero_point[0]
-            min_, max_ = quant_min_max(qparam.dtype)
-            dtype = getattr(torch, qparam.dtype)
-            with graph.inserting_after(node):
-                q_args = (node, scale, zerop, min_, max_, dtype)
-                quantize = create_node(
-                    graph,
-                    torch.ops.quantized_decomposed.quantize_per_tensor.default,
-                    args=q_args,
-                )
-            node.replace_all_uses_with(quantize, propagate_meta=True)
-            quantize.replace_input_with(quantize, node)
-            quantize.meta[QPARAM_KEY] = copy.deepcopy(qparam)
-            return quantize
         for node in graph.nodes:
             if node.op != "call_function":
                 continue
-            if node.target == torch.ops.aten.linear.default:
-                lin_args = LinearArgs(*node.args, **node.kwargs)
-                inp = lin_args.input
-                if QPARAM_KEY not in inp.meta:
-                    continue
-                if QPARAM_KEY not in node.meta:
-                    continue
-                if qparam_dtype(inp) == qparam_dtype(node):
-                    continue
-                if qparam_dtype(inp) == "uint8" and qparam_dtype(node) == "int16":
-                    quantize = _insert_quantize_op_after(node)
-                    quantize.meta[QPARAM_KEY] = copy.deepcopy(node.meta[QPARAM_KEY])
-                    # Update node's qparam from i16 to u8
-                    # NOTE This would severely degrade accuracy. It is
-                    # important to mitigate this accuracy drop in backend.
-                    node.meta[QPARAM_KEY] = _i16_to_u8(node.meta[QPARAM_KEY])
-                    logger.debug(
-                        f"quantize_per_tensor.default is inserted after {node.name}."
-                    )
-                else:
-                    raise NotYetSupportedError(
-                        f"Unsupported dtype: From {qparam_dtype(inp)} to {qparam_dtype(node)}"
-                    )
-            elif node.target == torch.ops.aten.add.Tensor:
-                add_args = AddTensorArgs(*node.args, **node.kwargs)
-                x = add_args.input
-                y = add_args.other
-                if not isinstance(x, torch.fx.Node):
-                    continue
-                if not isinstance(y, torch.fx.Node):
-                    continue
-                if QPARAM_KEY not in x.meta:
-                    continue
-                if QPARAM_KEY not in y.meta:
-                    continue
-                if QPARAM_KEY not in node.meta:
-                    continue
-                if qparam_dtype(x) == qparam_dtype(node):
-                    continue
-                if qparam_dtype(x) != qparam_dtype(y):
-                    continue
-                if qparam_dtype(x) == "int16" and qparam_dtype(node) == "uint8":
-                    quantize = _insert_quantize_op_after(node)
-                    quantize.meta[QPARAM_KEY] = copy.deepcopy(node.meta[QPARAM_KEY])
-                    node.meta[QPARAM_KEY] = _u8_to_i16(node.meta[QPARAM_KEY])
-                    logger.debug(
-                        f"quantize_per_tensor.default is inserted after {node.name}."
-                    )
-                else:
-                    raise NotYetSupportedError("Unsupported dtype")
-            elif node.target == torch.ops.aten.mul.Tensor:
-                mul_args = MulTensorArgs(*node.args, **node.kwargs)
-                x = mul_args.input
-                y = mul_args.other
-                if not isinstance(x, torch.fx.Node):
-                    continue
-                if not isinstance(y, torch.fx.Node):
-                    continue
-                if QPARAM_KEY not in x.meta:
-                    continue
-                if QPARAM_KEY not in y.meta:
-                    continue
-                if QPARAM_KEY not in node.meta:
-                    continue
-                if qparam_dtype(x) == qparam_dtype(node):
-                    continue
-                if qparam_dtype(x) == "int16" and qparam_dtype(node) == "uint8":
-                    quantize = _insert_quantize_op_after(node)
-                    quantize.meta[QPARAM_KEY] = copy.deepcopy(node.meta[QPARAM_KEY])
-                    node.meta[QPARAM_KEY] = _u8_to_i16(node.meta[QPARAM_KEY])
-                    logger.debug(
-                        f"quantize_per_tensor.default is inserted after {node.name}."
-                    )
-                else:
-                    raise NotYetSupportedError("Unsupported dtype")
-            elif node.target == torch.ops.aten.cat.default:
-                cat_args = CatArgs(*node.args, **node.kwargs)
-                tensors = cat_args.tensors
-                if any(QPARAM_KEY not in x.meta for x in tensors):
-                    continue
-                if QPARAM_KEY not in node.meta:
-                    continue
-                assert len(tensors) > 0
-                in_dtype = qparam_dtype(tensors[0])
-                if in_dtype == qparam_dtype(node):
-                    continue
-                if any(qparam_dtype(x) != in_dtype for x in tensors):
-                    continue
-                if in_dtype == "int16" and qparam_dtype(node) == "uint8":
-                    quantize = _insert_quantize_op_after(node)
-                    quantize.meta[QPARAM_KEY] = copy.deepcopy(node.meta[QPARAM_KEY])
-                    node.meta[QPARAM_KEY] = _u8_to_i16(node.meta[QPARAM_KEY])
-                    logger.debug(
-                        f"quantize_per_tensor.default is inserted after {node.name}."
-                    )
-                else:
-                    raise NotYetSupportedError("Unsupported dtype")
-            elif node.target == torch.ops.aten.bmm.default:
-                bmm_args = BmmArgs(*node.args, **node.kwargs)
-                x = bmm_args.input
-                y = bmm_args.mat2
-                if QPARAM_KEY not in x.meta:
-                    continue
-                if QPARAM_KEY not in y.meta:
-                    continue
-                if QPARAM_KEY not in node.meta:
-                    continue
-                if qparam_dtype(x) == qparam_dtype(node):
-                    continue
-                if qparam_dtype(x) == "int16" and qparam_dtype(node) == "uint8":
-                    quantize = _insert_quantize_op_after(node)
-                    quantize.meta[QPARAM_KEY] = copy.deepcopy(node.meta[QPARAM_KEY])
-                    node.meta[QPARAM_KEY] = _u8_to_i16(node.meta[QPARAM_KEY])
-                    logger.debug(
-                        f"quantize_per_tensor.default is inserted after {node.name}."
-                    )
-                else:
-                    raise NotYetSupportedError("Unsupported dtype")
-            elif node.target == torch.ops.aten.permute.default:
-                per_args = PermuteArgs(*node.args, **node.kwargs)
-                inp = per_args.input
-                if QPARAM_KEY not in inp.meta:
-                    continue
-                if QPARAM_KEY not in node.meta:
-                    continue
-                if qparam_dtype(inp) == qparam_dtype(node):
-                    continue
-                if qparam_dtype(inp) == "int16" and qparam_dtype(node) == "uint8":
-                    # A new Quantize Op (s16 to u8) is inserted before (not after)
-                    # permute Op to reduce tensor size ealier
-                    quantize = _insert_quantize_op_before(node, inp)
-                    quantize.meta[QPARAM_KEY] = copy.deepcopy(node.meta[QPARAM_KEY])
-                    logger.debug(
-                        f"quantize_per_tensor.default is inserted before {node.name}."
-                    )
-                elif qparam_dtype(inp) == "uint8" and qparam_dtype(node) == "int16":
-                    quantize = _insert_quantize_op_after(node)
-                    quantize.meta[QPARAM_KEY] = copy.deepcopy(node.meta[QPARAM_KEY])
-                    node.meta[QPARAM_KEY] = _i16_to_u8(node.meta[QPARAM_KEY])
-                    logger.debug(
-                        f"quantize_per_tensor.default is inserted after {node.name}."
-                    )
-                else:
-                    raise NotYetSupportedError("Unsupported dtype")
-            elif node.target == torch.ops.aten.reshape.default:
-                reshape_args = ReshapeArgs(*node.args, **node.kwargs)
-                inp = reshape_args.input
-                if QPARAM_KEY not in inp.meta:
-                    continue
-                if QPARAM_KEY not in node.meta:
-                    continue
-                if qparam_dtype(inp) == qparam_dtype(node):
-                    continue
-                if qparam_dtype(inp) == "int16" and qparam_dtype(node) == "uint8":
-                    # A new Quantize Op (s16 to u8) is inserted before (not after)
-                    # reshape Op to reduce tensor size ealier
-                    quantize = _insert_quantize_op_before(node, inp)
-                    quantize.meta[QPARAM_KEY] = copy.deepcopy(node.meta[QPARAM_KEY])
-                    logger.debug(
-                        f"quantize_per_tensor.default is inserted before {node.name}."
-                    )
-                elif qparam_dtype(inp) == "uint8" and qparam_dtype(node) == "int16":
-                    quantize = _insert_quantize_op_after(node)
-                    quantize.meta[QPARAM_KEY] = copy.deepcopy(node.meta[QPARAM_KEY])
-                    node.meta[QPARAM_KEY] = _i16_to_u8(node.meta[QPARAM_KEY])
-                    logger.debug(
-                        f"quantize_per_tensor.default is inserted after {node.name}."
-                    )
-                else:
-                    raise NotYetSupportedError("Unsupported dtype")
-            elif node.target == torch.ops.aten.relu.default:
-                relu_args = ReluArgs(*node.args, **node.kwargs)
-                inp = relu_args.input
-                if QPARAM_KEY not in inp.meta:
-                    continue
-                if QPARAM_KEY not in node.meta:
-                    continue
-                if qparam_dtype(inp) == qparam_dtype(node):
-                    continue
-                if qparam_dtype(inp) == "int16" and qparam_dtype(node) == "uint8":
-                    quantize = _insert_quantize_op_after(node)
-                    quantize.meta[QPARAM_KEY] = copy.deepcopy(node.meta[QPARAM_KEY])
-                    node.meta[QPARAM_KEY] = _u8_to_i16(node.meta[QPARAM_KEY])
-                    logger.debug(
-                        f"quantize_per_tensor.default is inserted after {node.name}."
-                    )
-                else:
-                    raise NotYetSupportedError("Unsupported dtype")
-            # TODO Support more ops.
+            handler = _op_handler[node.target]
+            if handler is not None:
+                handler(node, logger)
         graph.eliminate_dead_code()
         graph.lint()

tico/serialize/circle_graph.py CHANGED Viewed

@@ -152,6 +152,15 @@ class CircleSubgraph(circle.SubGraph.SubGraphT):
         assert node.meta.get("val") is not None
         tensor.type = extract_circle_dtype(node)
         tensor.shape = list(extract_shape(node))
+        # Handle dynamic shape
+        if any(isinstance(s, torch.SymInt) for s in tensor.shape):
+            tensor.shapeSignature = tensor.shape.copy()
+            for idx, s in enumerate(tensor.shape):
+                if isinstance(s, torch.SymInt):
+                    tensor.shape[idx] = 1
+                    tensor.shapeSignature[idx] = -1
         if QPARAM_KEY in node.meta:
             tensor.quantization = to_circle_qparam(node.meta[QPARAM_KEY])
             tensor.type = str_to_circle_dtype(node.meta[QPARAM_KEY].dtype)
@@ -241,6 +250,15 @@ class CircleSubgraph(circle.SubGraph.SubGraphT):
         if source_node is not None:
             self.name_to_node[tensor.name] = source_node
         tensor.shape = shape
+        # Handle dynamic shape
+        if any(isinstance(s, torch.SymInt) for s in tensor.shape):
+            tensor.shapeSignature = tensor.shape.copy()
+            for idx, s in enumerate(tensor.shape):
+                if isinstance(s, torch.SymInt):
+                    tensor.shape[idx] = 1
+                    tensor.shapeSignature[idx] = -1
         if qparam is not None:
             tensor.quantization = to_circle_qparam(qparam)
             tensor.type = str_to_circle_dtype(qparam.dtype)

tico/serialize/operators/op_softmax.py CHANGED Viewed

@@ -24,25 +24,18 @@ from tico.serialize.operators.hashable_opcode import OpCode
 from tico.serialize.operators.node_visitor import NodeVisitor, register_node_visitor
 from tico.serialize.operators.utils import create_builtin_operator, get_op_index
 from tico.utils.errors import NotYetSupportedError
-from tico.utils.utils import HAS_TORCH_OVER_25
 from tico.utils.validate_args_kwargs import SafeSoftmaxArgs, SoftmaxArgs
 @register_node_visitor
 class SoftMaxVisitor(NodeVisitor):
-    target: List[torch._ops.OpOverload] = (
-        [
-            torch.ops.aten._softmax.default,
-            # NOTE: Let's treat _safe_softmax as normal _softmax as its usage is for training.
-            # In order for optimization during inference, it can be replaced to softmax.
-            # ref: https://github.com/pytorch/pytorch/pull/133882
-            torch.ops.aten._safe_softmax.default,
-        ]
-        if HAS_TORCH_OVER_25
-        else [
-            torch.ops.aten._softmax.default,
-        ]
-    )
+    target: List[torch._ops.OpOverload] = [
+        torch.ops.aten._softmax.default,
+        # NOTE: Let's treat _safe_softmax as normal _softmax as its usage is for training.
+        # In order for optimization during inference, it can be replaced to softmax.
+        # ref: https://github.com/pytorch/pytorch/pull/133882
+        torch.ops.aten._safe_softmax.default,
+    ]
     def __init__(self, op_codes: Dict[OpCode, int], graph):
         super().__init__(op_codes, graph)

tico/utils/convert.py CHANGED Viewed

@@ -106,6 +106,7 @@ def traced_run_decompositions(exported_program: ExportedProgram):
             torch.ops.aten._safe_softmax.default,
             torch.ops.aten.relu6.default,  # Do not decompose to hardtanh
             torch.ops.aten.linear.default,
+            torch.ops.aten.upsample_nearest2d.vec,
         )
         ep = ep.run_decompositions(_preserve_ops=_preserve_ops)
@@ -124,6 +125,7 @@ def traced_run_decompositions(exported_program: ExportedProgram):
             torch.ops.aten.relu6.default,  # Do not decompose to hardtanh
             torch.ops.aten.prelu.default,
             torch.ops.aten.linear.default,
+            torch.ops.aten.upsample_nearest2d.vec,
         )
         for op in _preserve_ops:
             if op in _decomp_table:
@@ -138,6 +140,7 @@ def traced_run_decompositions(exported_program: ExportedProgram):
         torch.__version__.startswith("2.6")
         or torch.__version__.startswith("2.7")
         or torch.__version__.startswith("2.8")
+        or torch.__version__.startswith("2.9")
     ):
         return run_decompositions(exported_program)
     else:
@@ -293,6 +296,7 @@ def convert(
     mod: torch.nn.Module,
     args: Tuple[Any, ...],
     kwargs: Optional[Dict[str, Any]] = None,
+    dynamic_shapes: Optional[dict] = None,
     strict: bool = True,
     config: CompileConfigBase = get_default_config(),
 ) -> CircleModel:
@@ -303,7 +307,9 @@ def convert(
         )
     with torch.no_grad():
-        exported_program = export(mod, args, kwargs, strict=strict)
+        exported_program = export(
+            mod, args, kwargs, dynamic_shapes=dynamic_shapes, strict=strict
+        )
     circle_binary = convert_exported_module_to_circle(exported_program, config=config)

tico/utils/dtype.py ADDED Viewed

@@ -0,0 +1,20 @@
+import numpy as np
+import torch
+NUMPY_TO_TORCH_DTYPE_DICT = {
+    np.dtype("float32"): torch.float32,
+    np.dtype("float64"): torch.float64,
+    np.dtype("float16"): torch.float16,
+    np.dtype("complex64"): torch.complex64,
+    np.dtype("complex128"): torch.complex128,
+    np.dtype("int64"): torch.int64,
+    np.dtype("int32"): torch.int32,
+    np.dtype("int16"): torch.int16,
+    np.dtype("int8"): torch.int8,
+    np.dtype("uint8"): torch.uint8,
+    np.dtype("bool"): torch.bool,
+}
+def numpy_dtype_to_torch_dtype(np_dtype: np.dtype) -> torch.dtype:
+    return NUMPY_TO_TORCH_DTYPE_DICT[np_dtype]

tico/utils/torch_compat.py ADDED Viewed

@@ -0,0 +1,52 @@
+# Copyright (c) 2025 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+Runtime **capability-detection helpers** for the `torch.export` stack.
+Instead of sprinkling version checks like `torch.__version__ >= "2.9"` throughout
+the codebase, import these helpers once and branch on the feature you need.
+Each probe executes only **once per process** thanks to `functools.lru_cache`,
+so the overhead is negligible.
+"""
+import functools
+import torch
+@functools.lru_cache(maxsize=None)
+def export_produces_slice() -> bool:
+    """
+    Compile a minimal model with `torch.export.export` and inspect its FX graph
+    to see whether an `aten.slice.Tensor` node appears.
+    Returns
+    -------
+    bool
+        * ``True``  — downstream passes should expect redundant **slice** nodes.
+        * ``False`` — downstream passes should expect only a **select** node.
+    """
+    class _Probe(torch.nn.Module):
+        def forward(self, x):  # simple slice: keep all dims except 3rd
+            return x[:, :, 1]
+        def get_example_inputs(self):
+            return (torch.randn(1, 4, 4),)
+    m = _Probe()
+    ep = torch.export.export(m, m.get_example_inputs())
+    return any(n.target == torch.ops.aten.slice.Tensor for n in ep.graph.nodes)

tico/utils/utils.py CHANGED Viewed

@@ -29,10 +29,6 @@ from torch.utils import _pytree as pytree
 from tico.serialize.quant_param import QuantParam
-HAS_TORCH_OVER_25 = Version(torch.__version__) >= Version("2.5.0")
-HAS_TORCH_OVER_28_DEV = Version(torch.__version__) >= Version("2.8.0.dev")
 def get_fake_mode(exported_program: ExportedProgram):
     fake_mode = detect_fake_mode(
         tuple(

{tico-0.1.0.dev250717.dist-info → tico-0.1.0.dev250721.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: tico
-Version: 0.1.0.dev250717
+Version: 0.1.0.dev250721
 Summary: Convert exported Torch module to circle
 Home-page: UNKNOWN
 License: UNKNOWN

{tico-0.1.0.dev250717.dist-info → tico-0.1.0.dev250721.dist-info}/RECORD RENAMED Viewed

@@ -1,4 +1,4 @@
-tico/__init__.py,sha256=8WsnAhznDCSGOK_vrdZdi2apsz1wqJDKSfCjR4LTG8c,1743
+tico/__init__.py,sha256=QY54qph93oHLtUUYMg8T_e_Bpn4Zmxfif9POfAGssHI,1743
 tico/pt2_to_circle.py,sha256=gu3MD4Iqc0zMZcCZ2IT8oGbyj21CTSbT3Rgd9s2B_9A,2767
 tico/config/__init__.py,sha256=xZzCXjZ84qE-CsBi-dfaL05bqpQ3stKKfTXhnrJRyVs,142
 tico/config/base.py,sha256=anwOiJFkUxUi7Cef573JgQcjk6S-FSi6O_TLjYASW-g,1244
@@ -51,7 +51,7 @@ tico/experimental/quantization/evaluation/executor/circle_executor.py,sha256=eCC
 tico/experimental/quantization/evaluation/executor/triv24_executor.py,sha256=sUoXl6oOO2arAKaNjOBg7HiQja145_Jv6qgY7XtR7A8,5159
 tico/experimental/quantization/passes/__init__.py,sha256=IO6FP_xYbGy0dW0HL26GXD3ouxARaxCK7bz9dn4blPQ,26
 tico/experimental/quantization/passes/fold_quant_ops.py,sha256=iaBMyO49CwVkhebMz3rjkHWfWE2LhwH6fORe7n4S6XQ,7040
-tico/experimental/quantization/passes/insert_quantize_on_dtype_mismatch.py,sha256=FfqTlGANcG1V64zw0MFcIxL9WafuxPINuzWohGdsYCg,16617
+tico/experimental/quantization/passes/insert_quantize_on_dtype_mismatch.py,sha256=t3bnNY9Abm8CZfSWzsbvx59luylXVxqmUvCKPBVPAIE,14731
 tico/experimental/quantization/passes/propagate_qparam_backward.py,sha256=TGtyW0Z2qOTgVIasBdGRgbwH31YYd6ek7OvLTmCV614,3118
 tico/experimental/quantization/passes/propagate_qparam_forward.py,sha256=RhUHGCR2RpBO5KYkQ7Z8U5u7HEwDq2wdKHLKAJCi-5c,5138
 tico/experimental/quantization/passes/quantize_bias.py,sha256=ZQ3rETYStpW28JUbODRixbq5sDEOiIOB_qWA-Jzuu-Y,4337
@@ -96,7 +96,7 @@ tico/passes/remove_redundant_to_copy.py,sha256=tKy4XKkO2l33fMxVPQ_iFkUeFvP15kbPv
 tico/passes/restore_linear.py,sha256=xGJdNb-1CrkOKS9BnLbcblkZc6P2vVjKIi-7lRcs7Bk,4111
 tico/passes/segment_index_select.py,sha256=jn0M2sdUcDyjrvxfvM40wt5644iPQMY_ud0uvptXN84,5187
 tico/serialize/__init__.py,sha256=IO6FP_xYbGy0dW0HL26GXD3ouxARaxCK7bz9dn4blPQ,26
-tico/serialize/circle_graph.py,sha256=_u0vFDhPdOhEkucmaEhqILo13NKbjyVemPYFfC5YCZg,11619
+tico/serialize/circle_graph.py,sha256=3t78g5eKzhHKvIBJqQ-CcwbqoV-2QwAdd_8wm4W1yXw,12317
 tico/serialize/circle_mapping.py,sha256=C9C3ORACQOdvBdnt5KRzlT8zao_TvzQklIxH794OhP0,5719
 tico/serialize/circle_serializer.py,sha256=KRx_Azx2Je9XNYe-pZuuiSMvbXEddd8M8qDATIt7XXk,8981
 tico/serialize/pack.py,sha256=5HZ9kX3x6C6CyT_FWS6FRmvx_P7Dx21orjUNQxJ2xlo,1297
@@ -169,7 +169,7 @@ tico/serialize/operators/op_select_copy.py,sha256=GPLN7QZmwSlA4WRbjfU6pLer3KVWzg
 tico/serialize/operators/op_sigmoid.py,sha256=ZubbGG1yU5uvNkEmOmbjj3eq7d9mwEaJdChRgL0OjDU,2045
 tico/serialize/operators/op_sin.py,sha256=MbttmHTVKhwKK6gH9Vbcbn5aAaxnQ71NdpmQAlTcojU,1827
 tico/serialize/operators/op_slice.py,sha256=g0r8lj5CIxpT6ixOKqUzwKiNhoiuIFwWjbpaiCoOg6w,5259
-tico/serialize/operators/op_softmax.py,sha256=8AwmsAVdSoIMKdfejrw9cy44TbOvvXsA0w3WQDVpI3A,3855
+tico/serialize/operators/op_softmax.py,sha256=qwYke5zfhnSL89DZbzdr5Fc9SsJf0vI-LDZjT_NFpbc,3669
 tico/serialize/operators/op_split_with_sizes.py,sha256=TgYg1cu-3BSz9SsXfAhoJbo4q5ZzFaoFArkH_obsYlU,4274
 tico/serialize/operators/op_sqrt.py,sha256=9Q5jkuEPrim11XfSQHGDGVTMYk1TQhOfVqMVYD_eIrI,1871
 tico/serialize/operators/op_squeeze.py,sha256=QnNwfAdTC1xBm04C9DkVs8VB5YRN-4fCsIWn189QaPg,2416
@@ -183,9 +183,10 @@ tico/serialize/operators/op_view.py,sha256=5EMww-ve17Vm9XPuV03Tn7vJsjpU2J8U4d_FO
 tico/serialize/operators/op_where.py,sha256=doE81GSwygrPBm3JIfN9w7kKXxeIYKxgk0eoY22QIcg,2845
 tico/serialize/operators/utils.py,sha256=lXGpEJW1h8U_-gfc6EWjvvSiq3yJ9P-v1v3EMRT_pSk,2954
 tico/utils/__init__.py,sha256=IO6FP_xYbGy0dW0HL26GXD3ouxARaxCK7bz9dn4blPQ,26
-tico/utils/convert.py,sha256=w4l7fnqbiVACOU5-OXr8Ebyl4EMeeBz6vwUSuOS_CtI,12977
+tico/utils/convert.py,sha256=lNbbNswbKyCTK5E5i5CXkJBykWWfF5HDChhM3DLscWo,13222
 tico/utils/define.py,sha256=Ypgp7YffM4pgPl4Zh6TmogSn1OxGBMRw_e09qYGflZk,1467
 tico/utils/diff_graph.py,sha256=_eDGGPDPYQD4b--MXX0DLoVgSt_wLfNPt47UlolLLR4,5272
+tico/utils/dtype.py,sha256=4-k1iUaHivFFXAQuDs7up6fXt5y4FqldGNokAPa3kic,603
 tico/utils/errors.py,sha256=f3csJjgbXG9W1aHhqEcou008Aor19W57X8oT5Hx8w1M,954
 tico/utils/graph.py,sha256=Y6aODsnc_-9l61oanknb7K1jqJ8B35iPypOKkM0Qkk0,9149
 tico/utils/installed_packages.py,sha256=J0FTwnkCGs0MxRWoCMYAqiwH7Z0GWFDLV--x-IndSp4,1017
@@ -196,16 +197,17 @@ tico/utils/passes.py,sha256=kGmDe__5cPaO6i5EDAoXSVe6yXEoX9hAny4ROb3ZEmQ,2409
 tico/utils/pytree_utils.py,sha256=jrk3N6X6LiUnBCX_gM1K9nywbVAJBVnszlTAgeIeDUc,5219
 tico/utils/register_custom_op.py,sha256=3-Yl6iYmx1qQA2igNHt4hYhQhQMkdPb7gF50LIY8yvc,27350
 tico/utils/serialize.py,sha256=AQXMBOLu-Kg2Rn-qbqsAtHndjZAZIavlKA0QFgJREHM,1420
+tico/utils/torch_compat.py,sha256=oc6PztVsXdHcQ3iaVR90wLLxrGaj6zFHWZ8K9rRS6q8,1795
 tico/utils/trace_decorators.py,sha256=ddLIiKQfSaQrxgF1kNpwjFTQnXENzeSfcr1kuAW4jGI,3221
-tico/utils/utils.py,sha256=fnbZ2RLH6-J-wqb32O4qsR1ce4BJU0wYNrk84QXa6_E,13158
+tico/utils/utils.py,sha256=kg4in3E0eH2nURpnt8XRPUS2Iam3HjJRadDZdyhUy0w,13014
 tico/utils/validate_args_kwargs.py,sha256=3dXkNll9E9eZq-p0HjYaV4YltQESqdEHBU34k-tIg1k,26733
 tico/utils/mx/__init__.py,sha256=IO6FP_xYbGy0dW0HL26GXD3ouxARaxCK7bz9dn4blPQ,26
 tico/utils/mx/elemwise_ops.py,sha256=V6glyAHsVR1joqpsgnNytatCD_ew92xNWZ19UFDoMTA,10281
 tico/utils/mx/formats.py,sha256=uzNWyu-1onUlwQfX5cZ6fZSUfHMRqorper7_T1k3jfk,3404
 tico/utils/mx/mx_ops.py,sha256=RcfUTYVi-wilGB2sC35OeARdwDqnixv7dG5iyZ-fQT8,8555
-tico-0.1.0.dev250717.dist-info/LICENSE,sha256=kp4JLII7bzRhPb0CPD5XTDZMh22BQ7h3k3B7t8TiSbw,12644
-tico-0.1.0.dev250717.dist-info/METADATA,sha256=AxsK-qqfRS2Cd0fJ8ChPI-pVZHvX5Kt1XeB7SMkdyKc,8430
-tico-0.1.0.dev250717.dist-info/WHEEL,sha256=G16H4A3IeoQmnOrYV4ueZGKSjhipXx8zc8nu9FGlvMA,92
-tico-0.1.0.dev250717.dist-info/entry_points.txt,sha256=kBKYSS_IYrSXmUYevmmepqIVPScq5vF8ulQRu3I_Zf0,59
-tico-0.1.0.dev250717.dist-info/top_level.txt,sha256=oqs7UPoNSKZEwqsX8B-KAWdQwfAa7i60pbxW_Jk7P3w,5
-tico-0.1.0.dev250717.dist-info/RECORD,,
+tico-0.1.0.dev250721.dist-info/LICENSE,sha256=kp4JLII7bzRhPb0CPD5XTDZMh22BQ7h3k3B7t8TiSbw,12644
+tico-0.1.0.dev250721.dist-info/METADATA,sha256=OwOUs1qxfuulEmUn99ocFFSGB9L0XmUmCqNhZugmsCU,8430
+tico-0.1.0.dev250721.dist-info/WHEEL,sha256=G16H4A3IeoQmnOrYV4ueZGKSjhipXx8zc8nu9FGlvMA,92
+tico-0.1.0.dev250721.dist-info/entry_points.txt,sha256=kBKYSS_IYrSXmUYevmmepqIVPScq5vF8ulQRu3I_Zf0,59
+tico-0.1.0.dev250721.dist-info/top_level.txt,sha256=oqs7UPoNSKZEwqsX8B-KAWdQwfAa7i60pbxW_Jk7P3w,5
+tico-0.1.0.dev250721.dist-info/RECORD,,

{tico-0.1.0.dev250717.dist-info → tico-0.1.0.dev250721.dist-info}/LICENSE RENAMED Viewed

File without changes

{tico-0.1.0.dev250717.dist-info → tico-0.1.0.dev250721.dist-info}/WHEEL RENAMED Viewed

File without changes

{tico-0.1.0.dev250717.dist-info → tico-0.1.0.dev250721.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{tico-0.1.0.dev250717.dist-info → tico-0.1.0.dev250721.dist-info}/top_level.txt RENAMED Viewed

File without changes

tico 0.1.0.dev250717__py3-none-any.whl → 0.1.0.dev250721__py3-none-any.whl

tico 0.1.0.dev250717py3-none-any.whl → 0.1.0.dev250721py3-none-any.whl