PyPI - tico - Versions diffs - 0.1.0__py3-none-any.whl - Mend

tico 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (206) hide show

tico/__init__.py +42 -0
tico/config/__init__.py +4 -0
tico/config/base.py +37 -0
tico/config/factory.py +41 -0
tico/config/v1.py +35 -0
tico/experimental/__init__.py +1 -0
tico/experimental/quantization/__init__.py +1 -0
tico/experimental/quantization/algorithm/__init__.py +1 -0
tico/experimental/quantization/algorithm/gptq/__init__.py +1 -0
tico/experimental/quantization/algorithm/gptq/gptq.py +172 -0
tico/experimental/quantization/algorithm/gptq/quant.py +153 -0
tico/experimental/quantization/algorithm/gptq/quantizer.py +225 -0
tico/experimental/quantization/algorithm/gptq/utils.py +65 -0
tico/experimental/quantization/algorithm/pt2e/__init__.py +1 -0
tico/experimental/quantization/algorithm/pt2e/annotation/__init__.py +1 -0
tico/experimental/quantization/algorithm/pt2e/annotation/annotator.py +215 -0
tico/experimental/quantization/algorithm/pt2e/annotation/config.py +26 -0
tico/experimental/quantization/algorithm/pt2e/annotation/op/__init__.py +21 -0
tico/experimental/quantization/algorithm/pt2e/annotation/op/adaptive_avg_pool2d.py +65 -0
tico/experimental/quantization/algorithm/pt2e/annotation/op/add.py +57 -0
tico/experimental/quantization/algorithm/pt2e/annotation/op/conv2d.py +92 -0
tico/experimental/quantization/algorithm/pt2e/annotation/op/div.py +57 -0
tico/experimental/quantization/algorithm/pt2e/annotation/op/linear.py +94 -0
tico/experimental/quantization/algorithm/pt2e/annotation/op/mean.py +53 -0
tico/experimental/quantization/algorithm/pt2e/annotation/op/mul.py +57 -0
tico/experimental/quantization/algorithm/pt2e/annotation/op/relu6.py +53 -0
tico/experimental/quantization/algorithm/pt2e/annotation/op/rsqrt.py +53 -0
tico/experimental/quantization/algorithm/pt2e/annotation/op/sub.py +57 -0
tico/experimental/quantization/algorithm/pt2e/annotation/spec.py +47 -0
tico/experimental/quantization/algorithm/pt2e/annotation/utils.py +88 -0
tico/experimental/quantization/algorithm/pt2e/quantizer.py +78 -0
tico/experimental/quantization/algorithm/pt2e/transformation/__init__.py +1 -0
tico/experimental/quantization/algorithm/pt2e/transformation/convert_scalars_to_attrs.py +58 -0
tico/experimental/quantization/algorithm/pt2e/utils.py +138 -0
tico/experimental/quantization/algorithm/smoothquant/__init__.py +1 -0
tico/experimental/quantization/algorithm/smoothquant/observer.py +78 -0
tico/experimental/quantization/algorithm/smoothquant/quantizer.py +81 -0
tico/experimental/quantization/algorithm/smoothquant/smooth_quant.py +164 -0
tico/experimental/quantization/config.py +68 -0
tico/experimental/quantization/evaluation/__init__.py +1 -0
tico/experimental/quantization/evaluation/backend.py +20 -0
tico/experimental/quantization/evaluation/evaluate.py +223 -0
tico/experimental/quantization/evaluation/executor/__init__.py +1 -0
tico/experimental/quantization/evaluation/executor/backend_executor.py +54 -0
tico/experimental/quantization/evaluation/executor/circle_executor.py +75 -0
tico/experimental/quantization/evaluation/executor/triv24_executor.py +128 -0
tico/experimental/quantization/evaluation/metric.py +109 -0
tico/experimental/quantization/evaluation/utils.py +185 -0
tico/experimental/quantization/passes/__init__.py +1 -0
tico/experimental/quantization/passes/fold_quant_ops.py +154 -0
tico/experimental/quantization/passes/insert_quantize_on_dtype_mismatch.py +345 -0
tico/experimental/quantization/passes/propagate_qparam_backward.py +91 -0
tico/experimental/quantization/passes/propagate_qparam_forward.py +141 -0
tico/experimental/quantization/passes/quantize_bias.py +123 -0
tico/experimental/quantization/passes/remove_weight_dequant_op.py +177 -0
tico/experimental/quantization/public_interface.py +108 -0
tico/experimental/quantization/quantizer.py +71 -0
tico/interpreter/__init__.py +1 -0
tico/interpreter/infer.py +116 -0
tico/interpreter/interpreter.py +93 -0
tico/passes/__init__.py +1 -0
tico/passes/cast_aten_where_arg_type.py +191 -0
tico/passes/cast_mixed_type_args.py +187 -0
tico/passes/const_prop_pass.py +307 -0
tico/passes/convert_conv1d_to_conv2d.py +160 -0
tico/passes/convert_layout_op_to_reshape.py +85 -0
tico/passes/convert_repeat_to_expand_copy.py +89 -0
tico/passes/convert_to_relu6.py +181 -0
tico/passes/decompose_addmm.py +124 -0
tico/passes/decompose_batch_norm.py +192 -0
tico/passes/decompose_fake_quantize.py +134 -0
tico/passes/decompose_fake_quantize_tensor_qparams.py +294 -0
tico/passes/decompose_group_norm.py +275 -0
tico/passes/decompose_grouped_conv2d.py +209 -0
tico/passes/decompose_slice_scatter.py +169 -0
tico/passes/extract_dtype_kwargs.py +122 -0
tico/passes/fill_meta_val.py +57 -0
tico/passes/fuse_leading_unsqueeze_reshape.py +112 -0
tico/passes/fuse_redundant_reshape_to_mean.py +102 -0
tico/passes/legalize_causal_mask_value.py +108 -0
tico/passes/legalize_predefined_layout_operators.py +386 -0
tico/passes/lower_pow2_to_mul.py +75 -0
tico/passes/lower_to_resize_nearest_neighbor.py +235 -0
tico/passes/lower_to_slice.py +230 -0
tico/passes/merge_consecutive_cat.py +80 -0
tico/passes/ops.py +78 -0
tico/passes/remove_nop.py +84 -0
tico/passes/remove_redundant_assert_nodes.py +51 -0
tico/passes/remove_redundant_expand.py +66 -0
tico/passes/remove_redundant_permute.py +122 -0
tico/passes/remove_redundant_reshape.py +436 -0
tico/passes/remove_redundant_slice.py +62 -0
tico/passes/remove_redundant_to_copy.py +86 -0
tico/passes/restore_linear.py +115 -0
tico/passes/segment_index_select.py +145 -0
tico/pt2_to_circle.py +105 -0
tico/serialize/__init__.py +1 -0
tico/serialize/circle_graph.py +319 -0
tico/serialize/circle_mapping.py +177 -0
tico/serialize/circle_serializer.py +240 -0
tico/serialize/operators/__init__.py +28 -0
tico/serialize/operators/hashable_opcode.py +43 -0
tico/serialize/operators/node_visitor.py +80 -0
tico/serialize/operators/op_abs.py +53 -0
tico/serialize/operators/op_add.py +69 -0
tico/serialize/operators/op_alias_copy.py +64 -0
tico/serialize/operators/op_any.py +150 -0
tico/serialize/operators/op_arange_start_step.py +61 -0
tico/serialize/operators/op_argmax.py +62 -0
tico/serialize/operators/op_avg_pool2d.py +192 -0
tico/serialize/operators/op_bmm.py +62 -0
tico/serialize/operators/op_cat.py +66 -0
tico/serialize/operators/op_clamp.py +126 -0
tico/serialize/operators/op_clone.py +71 -0
tico/serialize/operators/op_constant_pad_nd.py +72 -0
tico/serialize/operators/op_conv2d.py +186 -0
tico/serialize/operators/op_copy.py +164 -0
tico/serialize/operators/op_cos.py +59 -0
tico/serialize/operators/op_cumsum.py +95 -0
tico/serialize/operators/op_depthwise_conv2d.py +199 -0
tico/serialize/operators/op_dequantize_per_channel.py +82 -0
tico/serialize/operators/op_dequantize_per_tensor.py +64 -0
tico/serialize/operators/op_div.py +62 -0
tico/serialize/operators/op_embedding.py +60 -0
tico/serialize/operators/op_eq.py +64 -0
tico/serialize/operators/op_exp.py +60 -0
tico/serialize/operators/op_expand.py +91 -0
tico/serialize/operators/op_full.py +48 -0
tico/serialize/operators/op_full_like.py +55 -0
tico/serialize/operators/op_ge.py +54 -0
tico/serialize/operators/op_gelu.py +59 -0
tico/serialize/operators/op_gt.py +54 -0
tico/serialize/operators/op_index.py +82 -0
tico/serialize/operators/op_index_select.py +64 -0
tico/serialize/operators/op_instance_norm.py +91 -0
tico/serialize/operators/op_leaky_relu.py +60 -0
tico/serialize/operators/op_linear.py +70 -0
tico/serialize/operators/op_log.py +53 -0
tico/serialize/operators/op_log1p.py +86 -0
tico/serialize/operators/op_logical_and.py +63 -0
tico/serialize/operators/op_logical_not.py +62 -0
tico/serialize/operators/op_lt.py +61 -0
tico/serialize/operators/op_max_dim.py +70 -0
tico/serialize/operators/op_max_pool2d_with_indices.py +155 -0
tico/serialize/operators/op_maximum.py +53 -0
tico/serialize/operators/op_mean.py +66 -0
tico/serialize/operators/op_minimum.py +53 -0
tico/serialize/operators/op_mm.py +177 -0
tico/serialize/operators/op_mul.py +99 -0
tico/serialize/operators/op_ne.py +54 -0
tico/serialize/operators/op_neg.py +59 -0
tico/serialize/operators/op_permute.py +65 -0
tico/serialize/operators/op_pow.py +141 -0
tico/serialize/operators/op_prelu.py +54 -0
tico/serialize/operators/op_quantize_per_tensor.py +79 -0
tico/serialize/operators/op_reciprocal.py +64 -0
tico/serialize/operators/op_relu.py +53 -0
tico/serialize/operators/op_relu6.py +52 -0
tico/serialize/operators/op_repeat.py +100 -0
tico/serialize/operators/op_reshape.py +73 -0
tico/serialize/operators/op_resize_nearest_neighbor.py +70 -0
tico/serialize/operators/op_rsqrt.py +53 -0
tico/serialize/operators/op_scalar_tensor.py +51 -0
tico/serialize/operators/op_select_copy.py +65 -0
tico/serialize/operators/op_sigmoid.py +56 -0
tico/serialize/operators/op_sin.py +53 -0
tico/serialize/operators/op_slice.py +155 -0
tico/serialize/operators/op_softmax.py +100 -0
tico/serialize/operators/op_split_with_sizes.py +99 -0
tico/serialize/operators/op_sqrt.py +55 -0
tico/serialize/operators/op_squeeze.py +73 -0
tico/serialize/operators/op_sub.py +71 -0
tico/serialize/operators/op_sum.py +63 -0
tico/serialize/operators/op_tanh.py +54 -0
tico/serialize/operators/op_to_copy.py +105 -0
tico/serialize/operators/op_unsqueeze.py +66 -0
tico/serialize/operators/op_view.py +74 -0
tico/serialize/operators/op_where.py +82 -0
tico/serialize/operators/utils.py +94 -0
tico/serialize/pack.py +35 -0
tico/serialize/quant_param.py +42 -0
tico/utils/__init__.py +1 -0
tico/utils/convert.py +296 -0
tico/utils/define.py +35 -0
tico/utils/diff_graph.py +181 -0
tico/utils/errors.py +35 -0
tico/utils/graph.py +282 -0
tico/utils/logging.py +45 -0
tico/utils/model.py +37 -0
tico/utils/mx/__init__.py +1 -0
tico/utils/mx/elemwise_ops.py +267 -0
tico/utils/mx/formats.py +125 -0
tico/utils/mx/mx_ops.py +270 -0
tico/utils/padding.py +47 -0
tico/utils/passes.py +76 -0
tico/utils/register_custom_op.py +609 -0
tico/utils/serialize.py +42 -0
tico/utils/trace_decorators.py +101 -0
tico/utils/utils.py +406 -0
tico/utils/validate_args_kwargs.py +1149 -0
tico-0.1.0.dist-info/LICENSE +241 -0
tico-0.1.0.dist-info/METADATA +354 -0
tico-0.1.0.dist-info/RECORD +206 -0
tico-0.1.0.dist-info/WHEEL +5 -0
tico-0.1.0.dist-info/entry_points.txt +3 -0
tico-0.1.0.dist-info/top_level.txt +1 -0

tico/utils/graph.py ADDED Viewed

@@ -0,0 +1,282 @@
+# Portions of this file are adapted from code originally authored by
+# Meta Platforms, Inc. and affiliates, licensed under the BSD-style
+# license found in the LICENSE file in the root directory of their source tree.
+# Copyright (c) 2025 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from typing import Any, Dict, Optional, Tuple, TYPE_CHECKING
+if TYPE_CHECKING:
+    import torch.fx
+import torch
+from torch.export import ExportedProgram
+from torch.export.exported_program import InputKind, InputSpec, TensorArgument
+from tico.utils.utils import get_fake_mode, set_new_meta_val
+def is_torch_param(node: torch.fx.Node, ep: ExportedProgram):
+    assert node.op == "placeholder"
+    return node.name in ep.graph_signature.inputs_to_parameters
+def is_torch_buffer(node: torch.fx.Node, ep: ExportedProgram):
+    assert node.op == "placeholder"
+    return node.name in ep.graph_signature.inputs_to_buffers
+def get_torch_param_value(node: torch.fx.Node, ep: ExportedProgram):
+    assert isinstance(node, torch.fx.Node)
+    assert node.op == "placeholder"
+    assert (
+        node.name in ep.graph_signature.inputs_to_parameters
+    ), "Node {node.name} is not in the parameters"  # FIX CALLER UNLESS
+    param_name = ep.graph_signature.inputs_to_parameters[node.name]
+    named_params = dict(ep.named_parameters())
+    assert param_name in named_params
+    return named_params[param_name].data
+def get_torch_buffer_value(node: torch.fx.Node, ep: ExportedProgram):
+    assert isinstance(node, torch.fx.Node)
+    assert node.op == "placeholder"
+    assert (
+        node.name in ep.graph_signature.inputs_to_buffers
+    ), "Node {node.name} is not in the buffers"  # FIX CALLER UNLESS
+    buf_name = ep.graph_signature.inputs_to_buffers[node.name]
+    named_buf = dict(ep.named_buffers())
+    assert buf_name in named_buf
+    return named_buf[buf_name]
+def get_first_user_input(exported_program: ExportedProgram) -> Optional[torch.fx.Node]:
+    """Returns the first user input node in the graph."""
+    first_user_input: Optional[torch.fx.Node] = None
+    graph_module = exported_program.graph_module
+    graph: torch.fx.Graph = graph_module.graph
+    for node in graph.nodes:
+        if (
+            node.op == "placeholder"
+            and node.name in exported_program.graph_signature.user_inputs
+        ):
+            first_user_input = node
+            break
+    return first_user_input
+def generate_fqn(prefix: str, exported_program: ExportedProgram):
+    """
+    Generate fully-qualized name for constants.
+    This function prevents `exported_program.constants` from having duplicate keys.
+    """
+    cnt = len(exported_program.constants)
+    while True:
+        if f"{prefix}{cnt}" in exported_program.constants:
+            cnt += 1
+            continue
+        break
+    return f"{prefix}{cnt}"
+def create_input_spec(node, input_kind: InputKind):
+    """
+    @ref https://pytorch.org/docs/stable/export.ir_spec.html#placeholder
+    """
+    if input_kind == InputKind.CONSTANT_TENSOR:
+        return InputSpec(
+            kind=InputKind.CONSTANT_TENSOR,
+            arg=TensorArgument(name=node.name),
+            target=node.target,  # type: ignore[arg-type]
+            persistent=True,
+        )
+    else:
+        raise NotImplementedError("NYI")
+def validate_input_specs(exported_program):
+    name_to_spec_dict = {
+        s.arg.name: s for s in exported_program.graph_signature.input_specs
+    }
+    for node in exported_program.graph.nodes:
+        if node.op != "placeholder":
+            continue
+        if node.name not in name_to_spec_dict:
+            raise RuntimeError(
+                "Placeholder node {node.name} does not have corresponding input spec!"
+            )
+def add_placeholder(
+    exported_program: ExportedProgram,
+    tensor: torch.Tensor,
+    prefix: str,
+) -> torch.fx.Node:
+    """
+    Add a placeholder to the graph and update the exported program.
+    """
+    fqn_name = generate_fqn(prefix, exported_program)
+    # Get fake mode before adding placeholder
+    fake_mode = get_fake_mode(exported_program)
+    first_user_input = get_first_user_input(exported_program)
+    if not first_user_input:
+        # Placeholder nodes must be the first N nodes in the nodes list of a graph.
+        # Therefore, insert the newly created placeholders at the start of the node list.
+        assert exported_program.graph.nodes
+        first_node = list(exported_program.graph.nodes)[0]
+        first_user_input = first_node
+    # Add a placeholder to the graph.
+    with exported_program.graph.inserting_before(first_user_input):
+        const_node = exported_program.graph.placeholder(fqn_name)
+    const_node.meta["val"] = fake_mode.from_tensor(tensor, static_shapes=True)
+    const_node.meta["val"].constant = tensor
+    # Add a new constant to the exported program.
+    exported_program.constants[const_node.name] = tensor
+    # Use update (instead of append) if this assert is violated
+    assert const_node.name not in [
+        s.arg.name for s in exported_program.graph_signature.input_specs
+    ]
+    # Append the new input spec.
+    exported_program.graph_signature.input_specs.append(
+        create_input_spec(const_node, InputKind.CONSTANT_TENSOR)
+    )
+    # Get old input specs
+    name_to_spec_dict = {
+        s.arg.name: s for s in exported_program.graph_signature.input_specs
+    }
+    # Add the new constants to input specs dict.
+    name_to_spec_dict.update(
+        {const_node.name: create_input_spec(const_node, InputKind.CONSTANT_TENSOR)}
+    )
+    # Generate new input spec *in the same order of nodes*
+    # IMPORTANT Input specs and their placeholder nodes must have the same order.
+    new_input_specs = []
+    for node in exported_program.graph.nodes:
+        if node.op != "placeholder":
+            continue
+        new_input_specs.append(name_to_spec_dict[node.name])
+    exported_program.graph_signature.input_specs = new_input_specs
+    return const_node
+def is_single_value_tensor(t: torch.Tensor):
+    if len(t.size()) == 0:
+        return True
+    if len(t.size()) == 1 and t.size()[0] == 1:
+        return True
+    return False
+def get_module_name_chain(node: Optional[torch.fx.Node]) -> str:
+    """
+    Returns a slash-separated string of module names representing the
+    hierarchical path of the FX node within the original model.
+    If the node has no `nn_module_stack` metadata, "unknown" is returned.
+    Example:
+        "encoder/layer1/linear"
+    Parameters
+    ----------
+    node: torch.fx.Node
+        A node from an ExportedProgram graph.
+    Returns
+    -------
+    str
+        A human-readable string that describes the full module path.
+    """
+    if node is None:
+        return "unknown"
+    # Let's prefix "tico" for graph inputs
+    if node.op == "placeholder" and "nn_module_stack" not in node.meta:
+        return "tico"
+    assert isinstance(node, torch.fx.Node)
+    stack = node.meta.get("nn_module_stack")
+    if stack:
+        assert isinstance(stack, dict)
+        # Retrieving the last element is enough.
+        return next(reversed(stack.values()))[1]
+    else:
+        return "unknown"
+def create_node(
+    graph: torch.fx.Graph,
+    target: torch._ops.OpOverload,
+    args: Optional[Tuple[Any, ...]] = None,
+    kwargs: Optional[Dict[str, Any]] = None,
+    *,
+    origin: Optional[torch.fx.Node] = None,
+) -> torch.fx.Node:
+    """
+    Insert a new node into graph and propagate metadata from *origin*.
+    Parameters
+    ----------
+    graph : torch.fx.Graph
+        The graph that will own the newly-created node.
+    target : torch._ops.OpOverload
+        The op to call (e.g. `torch.add` or "call_function" target).
+    args : Tuple[Any, ...], optional
+        Positional arguments for the new node.
+    kwargs : Dict[str, Any], optional
+        Keyword arguments for the new node.
+    origin : torch.fx.Node, optional
+        If given, every key in `origin.meta` **except** "val" is copied
+        onto the new node.  "val" is recomputed from *args* /*kwargs* using
+        the internal meta-inference helper.
+    Returns
+    -------
+    torch.fx.Node
+        The freshly inserted node with fully-populated `.meta`.
+    """
+    new_node = graph.call_function(target, args=args, kwargs=kwargs)
+    if origin:
+        assert isinstance(origin, torch.fx.Node), type(origin)
+        # Propagate "nn_module_stack" to retain the originating module context
+        #  for meaningful node names.
+        if "nn_module_stack" in origin.meta:
+            new_node.meta["nn_module_stack"] = origin.meta["nn_module_stack"]
+    return new_node

tico/utils/logging.py ADDED Viewed

@@ -0,0 +1,45 @@
+# Copyright (c) 2025 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import logging
+import os
+def _loggerLevel():
+    TICO_LOG = os.environ.get("TICO_LOG")
+    if TICO_LOG == "1":
+        log_level = logging.FATAL
+    elif TICO_LOG == "2":
+        log_level = logging.WARNING
+    elif TICO_LOG == "3":
+        log_level = logging.INFO
+    elif TICO_LOG == "4":
+        log_level = logging.DEBUG
+    else:
+        log_level = logging.WARNING
+    return log_level
+LOG_LEVEL = _loggerLevel()
+def getLogger(name: str):
+    """
+    Get logger with setting log level according to the `TICO_LOG` environment variable.
+    """
+    logging.basicConfig()
+    logger = logging.getLogger(name)
+    logger.setLevel(LOG_LEVEL)
+    return logger

tico/utils/model.py ADDED Viewed

@@ -0,0 +1,37 @@
+# Copyright (c) 2025 Samsung Electronics Co., Ltd. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from __future__ import annotations
+from typing import Any
+from tico.interpreter import infer
+class CircleModel:
+    def __init__(self, circle_binary: bytes):
+        self.circle_binary = circle_binary
+    def __call__(self, *args: Any, **kwargs: Any) -> Any:
+        return infer.infer(self.circle_binary, *args, **kwargs)
+    @staticmethod
+    def load(circle_path: str) -> CircleModel:
+        with open(circle_path, "rb") as f:
+            buf = bytes(f.read())
+        return CircleModel(buf)
+    def save(self, circle_path: str) -> None:
+        with open(circle_path, "wb") as f:
+            f.write(self.circle_binary)

tico/utils/mx/__init__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ # DO NOT REMOVE THIS FILE

tico/utils/mx/elemwise_ops.py ADDED Viewed

@@ -0,0 +1,267 @@
+"""
+Copyright (c) Microsoft Corporation.
+Licensed under the MIT License.
+Name:    elemwise_ops.py
+Pytorch functions for elementwise (i.e. bfloat) quantization.
+Usage Notes:
+ - Use the "Exposed Methods" below to implement autograd functions
+ - Use autograd functions to then implement torch.nn.Module(s)
+ - Do *not* use methods in this file in Modules, they have no defined
+   backwards pass and will block gradient computation.
+ - Avoid importing internal function if at all possible.
+Exposed Methods:
+    quantize_elemwise_op - quantizes a tensor to bfloat or other
+                           custom float format
+"""
+import torch
+from .formats import RoundingMode, _get_format_params
+from .formats import _get_min_norm, _get_max_norm
+# -------------------------------------------------------------------------
+# Helper funcs
+# -------------------------------------------------------------------------
+# Never explicitly compute 2**(-exp) since subnorm numbers have
+# exponents smaller than -126
+def _safe_lshift(x, bits, exp):
+    if exp is None:
+        return x * (2**bits)
+    else:
+        return x / (2 ** exp) * (2**bits)
+def _safe_rshift(x, bits, exp):
+    if exp is None:
+        return x / (2**bits)
+    else:
+        return x / (2**bits) * (2 ** exp)
+def _round_mantissa(A, bits, round, clamp=False):
+    """
+    Rounds mantissa to nearest bits depending on the rounding method 'round'
+    Args:
+      A     {PyTorch tensor} -- Input tensor
+      round {str}            --  Rounding method
+                                 "floor" rounds to the floor
+                                 "nearest" rounds to ceil or floor, whichever is nearest
+    Returns:
+      A {PyTorch tensor} -- Tensor with mantissas rounded
+    """
+    if round == "dither":
+        rand_A = torch.rand_like(A, requires_grad=False)
+        A = torch.sign(A) * torch.floor(torch.abs(A) + rand_A)
+    elif round == "floor":
+        A = torch.sign(A) * torch.floor(torch.abs(A))
+    elif round == "nearest":
+        A = torch.sign(A) * torch.floor(torch.abs(A) + 0.5)
+    elif round == "even":
+        absA = torch.abs(A)
+        # find 0.5, 2.5, 4.5 ...
+        maskA = ((absA - 0.5) % 2 == torch.zeros_like(A)).type(A.dtype)
+        A = torch.sign(A) * (torch.floor(absA + 0.5) - maskA)
+    else:
+        raise Exception("Unrecognized round method %s" % (round))
+    # Clip values that cannot be expressed by the specified number of bits
+    if clamp:
+        max_mantissa = 2 ** (bits - 1) - 1
+        A = torch.clamp(A, -max_mantissa, max_mantissa)
+    return A
+# -------------------------------------------------------------------------
+# Main funcs
+# -------------------------------------------------------------------------
+def _quantize_elemwise_core(A, bits, exp_bits, max_norm, round='nearest',
+                            saturate_normals=False, allow_denorm=True,
+                            custom_cuda=False):
+    """ Core function used for element-wise quantization
+    Arguments:
+      A         {PyTorch tensor} -- A tensor to be quantized
+      bits      {int}            -- Number of mantissa bits. Includes
+                                    sign bit and implicit one for floats
+      exp_bits  {int}            -- Number of exponent bits, 0 for ints
+      max_norm  {float}          -- Largest representable normal number
+      round     {str}            -- Rounding mode: (floor, nearest, even)
+      saturate_normals {bool}    -- If True, normal numbers (i.e., not NaN/Inf)
+                                    that exceed max norm are clamped.
+                                    Must be True for correct MX conversion.
+      allow_denorm     {bool}    -- If False, flush denorm numbers in the
+                                    elem_format to zero.
+      custom_cuda      {str}     -- If True, use custom CUDA kernels
+    Returns:
+      quantized tensor {PyTorch tensor} -- A tensor that has been quantized
+    """
+    A_is_sparse = A.is_sparse
+    if A_is_sparse:
+        if A.layout != torch.sparse_coo:
+            raise NotImplementedError("Only COO layout sparse tensors are currently supported.")
+        sparse_A = A.coalesce()
+        A = sparse_A.values().clone()
+    # custom cuda only support floor and nearest rounding modes
+    custom_cuda = custom_cuda and round in RoundingMode.string_enums()
+    if custom_cuda:
+        A = A.contiguous()
+        from . import custom_extensions
+        if A.device.type == "cuda":
+            A = custom_extensions.funcs.quantize_elemwise_func_cuda(
+                A, bits, exp_bits, max_norm, RoundingMode[round],
+                saturate_normals, allow_denorm)
+        elif A.device.type == "cpu":
+            A = custom_extensions.funcs.quantize_elemwise_func_cpp(
+                A, bits, exp_bits, max_norm, RoundingMode[round],
+                saturate_normals, allow_denorm)
+        return A
+    # Flush values < min_norm to zero if denorms are not allowed
+    if not allow_denorm and exp_bits > 0:
+        min_norm = _get_min_norm(exp_bits)
+        out = (torch.abs(A) >= min_norm).type(A.dtype) * A
+    else:
+        out = A
+    if exp_bits != 0:
+        private_exp = torch.floor(torch.log2(
+            torch.abs(A) + (A == 0).type(A.dtype)))
+        # The minimum representable exponent for 8 exp bits is -126
+        min_exp = -(2**(exp_bits-1)) + 2
+        private_exp = private_exp.clip(min=min_exp)
+    else:
+        private_exp = None
+    # Scale up so appropriate number of bits are in the integer portion of the number
+    out = _safe_lshift(out, bits - 2, private_exp)
+    out = _round_mantissa(out, bits, round, clamp=False)
+    # Undo scaling
+    out = _safe_rshift(out, bits - 2, private_exp)
+    # Set values > max_norm to Inf if desired, else clamp them
+    if saturate_normals or exp_bits == 0:
+        out = torch.clamp(out, min=-max_norm, max=max_norm)
+    else:
+        out = torch.where((torch.abs(out) > max_norm),
+                           torch.sign(out) * float("Inf"), out)
+    # handle Inf/NaN
+    if not custom_cuda:
+        out[A == float("Inf")] = float("Inf")
+        out[A == -float("Inf")] = -float("Inf")
+        out[A == float("NaN")] = float("NaN")
+    if A_is_sparse:
+        output = torch.sparse_coo_tensor(sparse_A.indices(), output,
+                sparse_A.size(), dtype=sparse_A.dtype, device=sparse_A.device,
+                requires_grad=sparse_A.requires_grad)
+    return out
+def _quantize_elemwise(A, elem_format, round='nearest', custom_cuda=False,
+                       saturate_normals=False, allow_denorm=True):
+    """ Quantize values to a defined format. See _quantize_elemwise_core()
+    """
+    if elem_format == None:
+        return A
+    ebits, mbits, _, max_norm, _ = _get_format_params(elem_format)
+    output = _quantize_elemwise_core(
+            A, mbits, ebits, max_norm,
+            round=round, allow_denorm=allow_denorm,
+            saturate_normals=saturate_normals,
+            custom_cuda=custom_cuda)
+    return output
+def _quantize_bfloat(A, bfloat, round='nearest', custom_cuda=False, allow_denorm=True):
+    """ Quantize values to bfloatX format
+    Arguments:
+      bfloat      {int}       -- Total number of bits for bfloatX format,
+                                 Includes 1 sign, 8 exp bits, and variable
+                                 mantissa bits. Must be >= 9.
+    """
+    # Shortcut for no quantization
+    if bfloat == 0 or bfloat == 32:
+        return A
+    max_norm = _get_max_norm(8, bfloat-7)
+    return _quantize_elemwise_core(
+            A, bits=bfloat-7, exp_bits=8, max_norm=max_norm, round=round,
+            allow_denorm=allow_denorm, custom_cuda=custom_cuda)
+def _quantize_fp(A, exp_bits=None, mantissa_bits=None,
+                 round='nearest', custom_cuda=False, allow_denorm=True):
+    """ Quantize values to IEEE fpX format. The format defines NaN/Inf
+        and subnorm numbers in the same way as FP32 and FP16.
+    Arguments:
+        exp_bits        {int} -- number of bits used to store exponent
+        mantissa_bits   {int} -- number of bits used to store mantissa, not
+                                 including sign or implicit 1
+        round           {str} -- Rounding mode, (floor, nearest, even)
+    """
+    # Shortcut for no quantization
+    if exp_bits is None or mantissa_bits is None:
+        return A
+    max_norm = _get_max_norm(exp_bits, mantissa_bits+2)
+    output = _quantize_elemwise_core(
+            A, bits=mantissa_bits + 2, exp_bits=exp_bits,
+            max_norm=max_norm, round=round, allow_denorm=allow_denorm,
+            custom_cuda=custom_cuda)
+    return output
+def quantize_elemwise_op(A, mx_specs, round=None):
+    """A function used for element-wise quantization with mx_specs
+    Arguments:
+      A          {PyTorch tensor} -- a tensor that needs to be quantized
+      mx_specs {dictionary}     -- dictionary to specify mx_specs
+      round      {str}            -- Rounding mode, choose from (floor, nearest, even)
+                                     (default: "nearest")
+    Returns:
+      quantized value {PyTorch tensor} -- a tensor that has been quantized
+    """
+    if mx_specs is None:
+        return A
+    elif round is None:
+        round = mx_specs['round']
+    if mx_specs['bfloat'] == 16 and round == 'even'\
+        and torch.cuda.is_bf16_supported() \
+        and mx_specs['bfloat_subnorms'] == True:
+        return A.to(torch.bfloat16)
+    if mx_specs['bfloat'] > 0 and mx_specs['fp'] > 0:
+        raise ValueError("Cannot set both [bfloat] and [fp] in mx_specs.")
+    elif mx_specs['bfloat'] > 9:
+        A = _quantize_bfloat(A, bfloat=mx_specs['bfloat'], round=round,
+                             custom_cuda=mx_specs['custom_cuda'],
+                             allow_denorm=mx_specs['bfloat_subnorms'])
+    elif mx_specs['bfloat'] > 0 and mx_specs['bfloat'] <= 9:
+        raise ValueError("Cannot set [bfloat] <= 9 in mx_specs.")
+    elif mx_specs['fp'] > 6:
+        A = _quantize_fp(A, exp_bits=5, mantissa_bits=mx_specs['fp'] - 6,
+                         round=round, custom_cuda=mx_specs['custom_cuda'],
+                         allow_denorm=mx_specs['bfloat_subnorms'])
+    elif mx_specs['fp'] > 0 and mx_specs['fp'] <= 6:
+        raise ValueError("Cannot set [fp] <= 6 in mx_specs.")
+    return A