tico 0.2.0.dev260511__tar.gz → 0.2.0.dev260512__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/PKG-INFO +1 -1
- tico-0.2.0.dev260512/tico/_version.py +1 -0
- tico-0.2.0.dev260512/tico/passes/remove_unused_placeholder.py +130 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/config/builders.py +23 -5
- tico-0.2.0.dev260512/tico/quantization/config/llama_attention.py +209 -0
- tico-0.2.0.dev260512/tico/quantization/passes/quantize_bias.py +145 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/wrapq/examples/quantize_full_qmodel_with_gptq.py +129 -66
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/wrapq/wrappers/llama/quant_attention.py +391 -90
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/wrapq/wrappers/llama/quant_decoder_layer.py +32 -5
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/wrapq/wrappers/llama/quant_model.py +22 -1
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/utils/convert.py +2 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico.egg-info/PKG-INFO +1 -1
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico.egg-info/SOURCES.txt +2 -1
- tico-0.2.0.dev260511/tico/_version.py +0 -1
- tico-0.2.0.dev260511/tico/quantization/passes/quantize_bias.py +0 -122
- tico-0.2.0.dev260511/tico/quantization/wrapq/examples/quantize_full_vlm_model_with_gptq.py +0 -257
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/LICENSE +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/README.md +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/pyproject.toml +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/setup.cfg +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/__init__.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/config/__init__.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/config/base.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/config/factory.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/config/v1.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/experimental/__init__.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/interpreter/__init__.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/interpreter/infer.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/interpreter/interpreter.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/passes/__init__.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/passes/cast_aten_where_arg_type.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/passes/cast_clamp_mixed_type_args.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/passes/cast_mixed_type_args.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/passes/const_prop_pass.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/passes/convert_conv1d_to_conv2d.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/passes/convert_conv3d_to_conv2d.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/passes/convert_expand_to_slice_cat.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/passes/convert_layout_op_to_reshape.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/passes/convert_matmul_to_linear.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/passes/convert_repeat_to_expand_copy.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/passes/convert_sym_size_to_circle_shape.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/passes/convert_to_relu6.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/passes/decompose_addmm.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/passes/decompose_batch_norm.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/passes/decompose_fake_quantize.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/passes/decompose_fake_quantize_tensor_qparams.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/passes/decompose_group_norm.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/passes/decompose_grouped_conv2d.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/passes/decompose_slice_scatter.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/passes/eliminate_rank_round_trip_region.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/passes/extract_dtype_kwargs.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/passes/fill_meta_val.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/passes/fuse_leading_unsqueeze_reshape.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/passes/fuse_redundant_reshape_to_mean.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/passes/legalize_causal_mask_value.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/passes/legalize_predefined_layout_operators.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/passes/lower_copy.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/passes/lower_pow2_to_mul.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/passes/lower_to_resize_nearest_neighbor.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/passes/lower_to_slice.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/passes/merge_consecutive_cat.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/passes/ops.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/passes/remove_nop.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/passes/remove_redundant_assert_nodes.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/passes/remove_redundant_expand.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/passes/remove_redundant_permute.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/passes/remove_redundant_reshape.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/passes/remove_redundant_slice.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/passes/remove_redundant_to_copy.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/passes/restore_linear.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/passes/segment_index_select.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/pt2_to_circle.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/__init__.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/algorithm/__init__.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/algorithm/cle/__init__.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/algorithm/cle/cle.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/algorithm/cle/quantizer.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/algorithm/fpi_gptq/__init__.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/algorithm/fpi_gptq/fpi_gptq.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/algorithm/fpi_gptq/quantizer.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/algorithm/gptq/__init__.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/algorithm/gptq/gptq.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/algorithm/gptq/quant.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/algorithm/gptq/quantizer.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/algorithm/gptq/utils.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/algorithm/qwen3_vl_gptq/__init__.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/algorithm/qwen3_vl_gptq/gptq.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/algorithm/qwen3_vl_gptq/quantizer.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/algorithm/qwen3_vl_gptq/utils.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/algorithm/smoothquant/__init__.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/algorithm/smoothquant/observer.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/algorithm/smoothquant/quantizer.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/algorithm/smoothquant/smooth_quant.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/algorithm/spinquant/__init__.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/algorithm/spinquant/fuse_norm_utils.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/algorithm/spinquant/hadamard_utils.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/algorithm/spinquant/quantizer.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/algorithm/spinquant/rotation_utils.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/algorithm/spinquant/spin_llama.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/config/__init__.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/config/base.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/config/cle.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/config/fpi_gptq.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/config/gptq.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/config/ptq.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/config/qwen3_vl_gptq.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/config/smoothquant.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/config/spinquant.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/config/utils.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/evaluation/__init__.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/evaluation/backend.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/evaluation/evaluate.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/evaluation/executor/__init__.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/evaluation/executor/backend_executor.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/evaluation/executor/circle_executor.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/evaluation/executor/triv24_executor.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/evaluation/metric.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/evaluation/mmlu_eval_utils.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/evaluation/script/llm_tasks_eval.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/evaluation/script/mini_vqa_eval.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/evaluation/utils.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/evaluation/vlm_eval_utils.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/passes/__init__.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/passes/fold_quant_ops.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/passes/insert_quantize_on_dtype_mismatch.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/passes/propagate_qparam_backward.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/passes/propagate_qparam_forward.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/passes/remove_weight_dequant_op.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/public_interface.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/quantizer.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/quantizer_registry.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/wrapq/__init__.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/wrapq/dtypes.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/wrapq/examples/__init__.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/wrapq/examples/compare_ppl.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/wrapq/examples/debug_quant_outputs.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/wrapq/examples/evaluate_fk_llama_model.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/wrapq/examples/llama/__init__.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/wrapq/examples/llama/quantize_attention_decode.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/wrapq/examples/llama/quantize_attention_prefill.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/wrapq/examples/llama/quantize_decoder_layer_decode.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/wrapq/examples/llama/quantize_decoder_layer_prefill.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/wrapq/examples/llama/quantize_mlp.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/wrapq/examples/nn/__init__.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/wrapq/examples/nn/quantize_conv3d.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/wrapq/examples/nn/quantize_conv3d_special_case.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/wrapq/examples/nn/quantize_layernorm.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/wrapq/examples/nn/quantize_linear.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/wrapq/examples/quantize_qwen3_vl_with_gptq.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/wrapq/examples/quantize_with_gptq.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/wrapq/examples/qwen/__init__.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/wrapq/examples/qwen/quantize_for_conditional_generation.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/wrapq/examples/qwen/quantize_model.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/wrapq/examples/qwen/quantize_text_attention.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/wrapq/examples/qwen/quantize_text_decoder_layer.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/wrapq/examples/qwen/quantize_text_mlp.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/wrapq/examples/qwen/quantize_text_model.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/wrapq/examples/qwen/quantize_vision_attention.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/wrapq/examples/qwen/quantize_vision_block.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/wrapq/examples/qwen/quantize_vision_mlp.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/wrapq/examples/qwen/quantize_vision_model.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/wrapq/examples/qwen/quantize_vision_patch_embed.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/wrapq/examples/qwen/quantize_vision_patch_merger.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/wrapq/examples/qwen/trace_qwen.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/wrapq/examples/static_llama_layer_runtime.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/wrapq/mode.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/wrapq/observers/__init__.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/wrapq/observers/affine_base.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/wrapq/observers/base.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/wrapq/observers/ema.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/wrapq/observers/identity.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/wrapq/observers/minmax.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/wrapq/observers/mx.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/wrapq/qscheme.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/wrapq/quantizer.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/wrapq/utils/__init__.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/wrapq/utils/check_missing_qparam.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/wrapq/utils/introspection.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/wrapq/utils/metrics.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/wrapq/utils/reduce_utils.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/wrapq/utils/utils.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/wrapq/utils/version.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/wrapq/wrap_helper.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/wrapq/wrappers/__init__.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/wrapq/wrappers/fairseq/__init__.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/wrapq/wrappers/fairseq/decoder_export_single_step.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/wrapq/wrappers/fairseq/quant_decoder.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/wrapq/wrappers/fairseq/quant_decoder_layer.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/wrapq/wrappers/fairseq/quant_encoder.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/wrapq/wrappers/fairseq/quant_encoder_layer.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/wrapq/wrappers/fairseq/quant_mha.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/wrapq/wrappers/llama/__init__.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/wrapq/wrappers/llama/export_adapters.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/wrapq/wrappers/llama/quant_mlp.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/wrapq/wrappers/llama/quant_model_for_causal_lm.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/wrapq/wrappers/nn/__init__.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/wrapq/wrappers/nn/quant_conv3d.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/wrapq/wrappers/nn/quant_conv3d_decomposed.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/wrapq/wrappers/nn/quant_embedding.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/wrapq/wrappers/nn/quant_layernorm.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/wrapq/wrappers/nn/quant_linear.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/wrapq/wrappers/nn/quant_silu.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/wrapq/wrappers/ops/__init__.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/wrapq/wrappers/ops/quant_rmsnorm.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/wrapq/wrappers/ptq_wrapper.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/wrapq/wrappers/quant_elementwise.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/wrapq/wrappers/quant_module_base.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/wrapq/wrappers/qwen_vl/quant_for_conditional_generation.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/wrapq/wrappers/qwen_vl/quant_model.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/wrapq/wrappers/qwen_vl/quant_text_attention.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/wrapq/wrappers/qwen_vl/quant_text_decoder_layer.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/wrapq/wrappers/qwen_vl/quant_text_mlp.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/wrapq/wrappers/qwen_vl/quant_text_model.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/wrapq/wrappers/qwen_vl/quant_vision_attention.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/wrapq/wrappers/qwen_vl/quant_vision_block.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/wrapq/wrappers/qwen_vl/quant_vision_mlp.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/wrapq/wrappers/qwen_vl/quant_vision_model.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/wrapq/wrappers/qwen_vl/quant_vision_patch_embed.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/wrapq/wrappers/qwen_vl/quant_vision_patch_merger.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/wrapq/wrappers/registry.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/serialize/__init__.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/serialize/circle_graph.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/serialize/circle_mapping.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/serialize/circle_serializer.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/serialize/operators/__init__.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/serialize/operators/adapters/__init__.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/serialize/operators/adapters/llama_rmsnorm.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/serialize/operators/adapters/onert/__init__.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/serialize/operators/adapters/onert/llama_attention.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/serialize/operators/hashable_opcode.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/serialize/operators/node_visitor.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/serialize/operators/op_abs.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/serialize/operators/op_add.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/serialize/operators/op_alias_copy.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/serialize/operators/op_any.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/serialize/operators/op_arange_start_step.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/serialize/operators/op_argmax.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/serialize/operators/op_attention.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/serialize/operators/op_avg_pool2d.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/serialize/operators/op_bmm.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/serialize/operators/op_cat.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/serialize/operators/op_circle_shape.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/serialize/operators/op_clamp.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/serialize/operators/op_clone.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/serialize/operators/op_constant_pad_nd.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/serialize/operators/op_conv2d.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/serialize/operators/op_cos.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/serialize/operators/op_cumsum.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/serialize/operators/op_depthwise_conv2d.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/serialize/operators/op_dequantize_per_channel.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/serialize/operators/op_dequantize_per_tensor.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/serialize/operators/op_div.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/serialize/operators/op_embedding.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/serialize/operators/op_eq.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/serialize/operators/op_exp.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/serialize/operators/op_expand.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/serialize/operators/op_full.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/serialize/operators/op_full_like.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/serialize/operators/op_ge.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/serialize/operators/op_gelu.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/serialize/operators/op_gt.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/serialize/operators/op_index.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/serialize/operators/op_index_select.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/serialize/operators/op_instance_norm.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/serialize/operators/op_le.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/serialize/operators/op_leaky_relu.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/serialize/operators/op_linear.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/serialize/operators/op_log.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/serialize/operators/op_log1p.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/serialize/operators/op_logical_and.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/serialize/operators/op_logical_not.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/serialize/operators/op_lt.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/serialize/operators/op_max_dim.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/serialize/operators/op_max_pool2d_with_indices.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/serialize/operators/op_maximum.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/serialize/operators/op_mean.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/serialize/operators/op_minimum.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/serialize/operators/op_mm.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/serialize/operators/op_mul.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/serialize/operators/op_ne.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/serialize/operators/op_neg.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/serialize/operators/op_permute.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/serialize/operators/op_pow.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/serialize/operators/op_prelu.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/serialize/operators/op_quantize_per_tensor.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/serialize/operators/op_reciprocal.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/serialize/operators/op_relu.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/serialize/operators/op_relu6.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/serialize/operators/op_repeat.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/serialize/operators/op_reshape.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/serialize/operators/op_resize_nearest_neighbor.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/serialize/operators/op_rmsnorm.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/serialize/operators/op_round.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/serialize/operators/op_rsqrt.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/serialize/operators/op_scalar_tensor.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/serialize/operators/op_select_copy.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/serialize/operators/op_sigmoid.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/serialize/operators/op_sin.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/serialize/operators/op_slice.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/serialize/operators/op_softmax.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/serialize/operators/op_split_with_sizes.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/serialize/operators/op_sqrt.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/serialize/operators/op_squeeze.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/serialize/operators/op_sub.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/serialize/operators/op_sum.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/serialize/operators/op_tanh.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/serialize/operators/op_to_copy.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/serialize/operators/op_transpose_conv.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/serialize/operators/op_unsqueeze.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/serialize/operators/op_view.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/serialize/operators/op_where.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/serialize/operators/utils.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/serialize/pack.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/serialize/quant_param.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/utils/__init__.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/utils/compat/__init__.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/utils/compat/torch.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/utils/compat/transformers.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/utils/define.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/utils/diff_graph.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/utils/dtype.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/utils/errors.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/utils/graph.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/utils/installed_packages.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/utils/logging.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/utils/model.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/utils/mx/__init__.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/utils/mx/elemwise_ops.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/utils/mx/formats.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/utils/mx/mx_ops.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/utils/padding.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/utils/passes.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/utils/pytree_utils.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/utils/record_input.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/utils/register_custom_op.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/utils/serialize.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/utils/signature.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/utils/trace_decorators.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/utils/utils.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/utils/validate_args_kwargs.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/utils/version.py +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico.egg-info/dependency_links.txt +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico.egg-info/entry_points.txt +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico.egg-info/requires.txt +0 -0
- {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico.egg-info/top_level.txt +0 -0
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__version__ = "0.2.0.dev260512"
|
|
@@ -0,0 +1,130 @@
|
|
|
1
|
+
# Copyright (c) 2026 Samsung Electronics Co., Ltd. All Rights Reserved
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
from typing import TYPE_CHECKING
|
|
16
|
+
|
|
17
|
+
if TYPE_CHECKING:
|
|
18
|
+
import torch.fx
|
|
19
|
+
|
|
20
|
+
import torch
|
|
21
|
+
from torch._export.utils import is_buffer, is_lifted_tensor_constant, is_param
|
|
22
|
+
from torch.export import ExportedProgram
|
|
23
|
+
|
|
24
|
+
from tico.utils import logging
|
|
25
|
+
from tico.utils.passes import PassBase, PassResult
|
|
26
|
+
from tico.utils.trace_decorators import (
|
|
27
|
+
trace_const_diff_on_pass,
|
|
28
|
+
trace_graph_diff_on_pass,
|
|
29
|
+
)
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def _is_constant_placeholder(
|
|
33
|
+
exported_program: ExportedProgram,
|
|
34
|
+
node: "torch.fx.Node",
|
|
35
|
+
) -> bool:
|
|
36
|
+
"""
|
|
37
|
+
Return whether the given placeholder represents a lifted constant.
|
|
38
|
+
|
|
39
|
+
Parameters, buffers, and lifted tensor constants are treated as constant
|
|
40
|
+
placeholders because they are backed by ExportedProgram state instead of
|
|
41
|
+
runtime user inputs.
|
|
42
|
+
"""
|
|
43
|
+
|
|
44
|
+
if node.op != "placeholder":
|
|
45
|
+
return False
|
|
46
|
+
|
|
47
|
+
return (
|
|
48
|
+
is_param(exported_program, node)
|
|
49
|
+
or is_buffer(exported_program, node)
|
|
50
|
+
or is_lifted_tensor_constant(exported_program, node)
|
|
51
|
+
)
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def _remove_constant_placeholder(
|
|
55
|
+
exported_program: ExportedProgram,
|
|
56
|
+
node: "torch.fx.Node",
|
|
57
|
+
) -> None:
|
|
58
|
+
"""
|
|
59
|
+
Remove an unused constant placeholder from the graph and ExportedProgram state.
|
|
60
|
+
|
|
61
|
+
The graph signature is updated by the caller after all unused placeholders are
|
|
62
|
+
removed.
|
|
63
|
+
"""
|
|
64
|
+
|
|
65
|
+
signature = exported_program.graph_signature
|
|
66
|
+
|
|
67
|
+
if name := signature.inputs_to_parameters.get(node.name, None):
|
|
68
|
+
exported_program.state_dict.pop(name, None)
|
|
69
|
+
elif name := signature.inputs_to_lifted_tensor_constants.get(node.name, None):
|
|
70
|
+
exported_program.constants.pop(name, None)
|
|
71
|
+
elif name := signature.inputs_to_buffers.get(node.name, None):
|
|
72
|
+
exported_program.constants.pop(name, None)
|
|
73
|
+
exported_program.state_dict.pop(name, None)
|
|
74
|
+
|
|
75
|
+
exported_program.graph.erase_node(node)
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
@trace_graph_diff_on_pass
|
|
79
|
+
@trace_const_diff_on_pass
|
|
80
|
+
class RemoveUnusedPlaceholder(PassBase):
|
|
81
|
+
"""
|
|
82
|
+
Remove unused constant placeholders from an exported graph.
|
|
83
|
+
|
|
84
|
+
FX dead-code elimination does not remove placeholder nodes even when they have
|
|
85
|
+
no users. This pass removes unused placeholders that correspond to parameters,
|
|
86
|
+
buffers, or lifted tensor constants, and then updates the ExportedProgram graph
|
|
87
|
+
signature accordingly.
|
|
88
|
+
|
|
89
|
+
Runtime user input placeholders are never removed by this pass.
|
|
90
|
+
"""
|
|
91
|
+
|
|
92
|
+
def __init__(self) -> None:
|
|
93
|
+
super().__init__()
|
|
94
|
+
|
|
95
|
+
def call(self, exported_program: ExportedProgram) -> PassResult:
|
|
96
|
+
logger = logging.getLogger(__name__)
|
|
97
|
+
|
|
98
|
+
graph_module = exported_program.graph_module
|
|
99
|
+
graph: torch.fx.Graph = graph_module.graph
|
|
100
|
+
|
|
101
|
+
unused_placeholders = [
|
|
102
|
+
node
|
|
103
|
+
for node in graph.nodes
|
|
104
|
+
if _is_constant_placeholder(exported_program, node) and len(node.users) == 0
|
|
105
|
+
]
|
|
106
|
+
|
|
107
|
+
if not unused_placeholders:
|
|
108
|
+
return PassResult(False)
|
|
109
|
+
|
|
110
|
+
removed_names = [node.name for node in unused_placeholders]
|
|
111
|
+
|
|
112
|
+
for node in unused_placeholders:
|
|
113
|
+
_remove_constant_placeholder(exported_program, node)
|
|
114
|
+
|
|
115
|
+
existing_name_to_spec = {
|
|
116
|
+
spec.arg.name: spec for spec in exported_program.graph_signature.input_specs
|
|
117
|
+
}
|
|
118
|
+
exported_program.graph_signature.input_specs = [
|
|
119
|
+
existing_name_to_spec[node.name]
|
|
120
|
+
for node in graph.nodes
|
|
121
|
+
if node.op == "placeholder"
|
|
122
|
+
]
|
|
123
|
+
|
|
124
|
+
graph.lint()
|
|
125
|
+
graph_module.recompile()
|
|
126
|
+
|
|
127
|
+
logger.debug(f"Unused constant placeholders are removed: {removed_names}")
|
|
128
|
+
|
|
129
|
+
# Run only once.
|
|
130
|
+
return PassResult(False)
|
|
@@ -13,9 +13,13 @@
|
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
15
|
import copy
|
|
16
|
-
from dataclasses import dataclass, field
|
|
17
16
|
from typing import Any, Dict, Mapping, Optional, Tuple, Type
|
|
18
17
|
|
|
18
|
+
from tico.quantization.config.llama_attention import (
|
|
19
|
+
DEFAULT_EXECUTION_PROFILE,
|
|
20
|
+
ExecutionProfile,
|
|
21
|
+
normalize_execution_profile,
|
|
22
|
+
)
|
|
19
23
|
from tico.quantization.config.ptq import PTQConfig
|
|
20
24
|
from tico.quantization.config.utils import auto_qscheme_for
|
|
21
25
|
from tico.quantization.wrapq.dtypes import DType
|
|
@@ -336,6 +340,7 @@ def build_llm_ptq_config(
|
|
|
336
340
|
norm_weight_bits: Optional[int] = None,
|
|
337
341
|
norm_weight_dtype: Optional[DType] = None,
|
|
338
342
|
strict_wrap: bool = True,
|
|
343
|
+
profile: ExecutionProfile = DEFAULT_EXECUTION_PROFILE,
|
|
339
344
|
) -> PTQConfig:
|
|
340
345
|
"""
|
|
341
346
|
Build a PTQConfig for an LLM using model-family-aware override generation.
|
|
@@ -363,9 +368,7 @@ def build_llm_ptq_config(
|
|
|
363
368
|
explicit override.
|
|
364
369
|
default_observer : Type[ObserverBase], default=MinMaxObserver
|
|
365
370
|
Observer class to instantiate when no explicit observer is provided
|
|
366
|
-
|
|
367
|
-
This should be a subclass of `ObserverBase` (e.g., MinMaxObserver,
|
|
368
|
-
EMAObserver). The class itself (not an instance) must be passed.
|
|
371
|
+
through overrides.
|
|
369
372
|
linear_weight_bits : Optional[int], default=None
|
|
370
373
|
Convenience bit-width for decoder-layer linear projection weights.
|
|
371
374
|
Used only when `linear_weight_dtype` is not provided.
|
|
@@ -391,6 +394,12 @@ def build_llm_ptq_config(
|
|
|
391
394
|
strict_wrap : bool, default=True
|
|
392
395
|
If True, preparing a model will raise when a required module cannot be
|
|
393
396
|
wrapped.
|
|
397
|
+
profile : ExecutionProfile, default="npu_export"
|
|
398
|
+
Execution profile stored as `PTQConfig.model_args["profile"]`.
|
|
399
|
+
"reference_eval" selects a GPU-friendly, Hugging Face-like path.
|
|
400
|
+
"npu_export" preserves the existing NPU-export-oriented graph.
|
|
401
|
+
Advanced users may override or extend `qcfg.model_args` directly
|
|
402
|
+
before calling `prepare()`.
|
|
394
403
|
|
|
395
404
|
Returns
|
|
396
405
|
-------
|
|
@@ -402,6 +411,11 @@ def build_llm_ptq_config(
|
|
|
402
411
|
NotImplementedError
|
|
403
412
|
If the requested `model_type` is not supported.
|
|
404
413
|
"""
|
|
414
|
+
profile = normalize_execution_profile(
|
|
415
|
+
profile,
|
|
416
|
+
context="build_llm_ptq_config.profile",
|
|
417
|
+
)
|
|
418
|
+
|
|
405
419
|
resolved_linear_weight_dtype = _resolve_weight_dtype(
|
|
406
420
|
dtype=linear_weight_dtype,
|
|
407
421
|
bits=linear_weight_bits,
|
|
@@ -438,6 +452,7 @@ def build_llm_ptq_config(
|
|
|
438
452
|
default_qscheme=default_qscheme,
|
|
439
453
|
default_observer=default_observer,
|
|
440
454
|
overrides=overrides,
|
|
455
|
+
model_args={"profile": profile},
|
|
441
456
|
strict_wrap=strict_wrap,
|
|
442
457
|
)
|
|
443
458
|
|
|
@@ -448,7 +463,10 @@ def _build_qwen3_vl_norm_override(
|
|
|
448
463
|
norm_weight_dtype: Optional[DType],
|
|
449
464
|
) -> Dict[str, Any]:
|
|
450
465
|
"""
|
|
451
|
-
Build an override dictionary for Qwen3-VL norm modules
|
|
466
|
+
Build an override dictionary for Qwen3-VL norm modules.
|
|
467
|
+
|
|
468
|
+
The generated override covers both RMSNorm-style observers used by text
|
|
469
|
+
modules and LayerNorm-style observers used by vision modules.
|
|
452
470
|
|
|
453
471
|
Parameters
|
|
454
472
|
----------
|
|
@@ -0,0 +1,209 @@
|
|
|
1
|
+
# Copyright (c) 2026 Samsung Electronics Co., Ltd. All Rights Reserved
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
from dataclasses import dataclass, fields, replace
|
|
16
|
+
from typing import Any, cast, Literal, Mapping, Optional
|
|
17
|
+
|
|
18
|
+
from tico.quantization.config.ptq import PTQConfig
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
ExecutionProfile = Literal["reference_eval", "npu_export"]
|
|
22
|
+
ScaleFusion = Literal["none", "q_proj", "k_proj"]
|
|
23
|
+
RopeConvention = Literal["hf", "pre_negated_sin"]
|
|
24
|
+
AttentionLayout = Literal["batched", "unrolled"]
|
|
25
|
+
|
|
26
|
+
DEFAULT_EXECUTION_PROFILE: ExecutionProfile = "npu_export"
|
|
27
|
+
SUPPORTED_EXECUTION_PROFILES: tuple[ExecutionProfile, ...] = (
|
|
28
|
+
"reference_eval",
|
|
29
|
+
"npu_export",
|
|
30
|
+
)
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
@dataclass(frozen=True)
|
|
34
|
+
class LlamaAttentionOptions:
|
|
35
|
+
"""
|
|
36
|
+
Execution options for quantized Llama attention wrappers.
|
|
37
|
+
|
|
38
|
+
These options describe graph-level implementation choices, not quantization
|
|
39
|
+
policy. They are intentionally read from `PTQConfig.model_args` instead of
|
|
40
|
+
`PTQConfig.overrides`.
|
|
41
|
+
|
|
42
|
+
Attributes
|
|
43
|
+
----------
|
|
44
|
+
scale_fusion : ScaleFusion
|
|
45
|
+
Where to apply the attention scale `1 / sqrt(head_dim)`.
|
|
46
|
+
"none" applies it to logits at runtime, while "q_proj" and
|
|
47
|
+
"k_proj" fold it into the corresponding projection weights.
|
|
48
|
+
rope : RopeConvention
|
|
49
|
+
Rotary embedding sign convention. "hf" uses `rotate_half` as
|
|
50
|
+
`(-x2, x1)` with normal sine values. "pre_negated_sin" expects the
|
|
51
|
+
first half of sine values to be pre-negated and uses `(x2, x1)` in the
|
|
52
|
+
rotate-half operation.
|
|
53
|
+
layout : AttentionLayout
|
|
54
|
+
Attention implementation layout. "batched" is closer to the
|
|
55
|
+
Hugging Face implementation and is preferable for GPU evaluation.
|
|
56
|
+
"unrolled" preserves the NPU-export-friendly per-head loop.
|
|
57
|
+
"""
|
|
58
|
+
|
|
59
|
+
scale_fusion: ScaleFusion = "k_proj"
|
|
60
|
+
rope: RopeConvention = "pre_negated_sin"
|
|
61
|
+
layout: AttentionLayout = "unrolled"
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
_PRESETS: dict[ExecutionProfile, LlamaAttentionOptions] = {
|
|
65
|
+
"reference_eval": LlamaAttentionOptions(
|
|
66
|
+
scale_fusion="none",
|
|
67
|
+
rope="hf",
|
|
68
|
+
layout="batched",
|
|
69
|
+
),
|
|
70
|
+
"npu_export": LlamaAttentionOptions(
|
|
71
|
+
scale_fusion="k_proj",
|
|
72
|
+
rope="pre_negated_sin",
|
|
73
|
+
layout="unrolled",
|
|
74
|
+
),
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
def normalize_execution_profile(
|
|
79
|
+
profile: Any,
|
|
80
|
+
*,
|
|
81
|
+
context: str = "profile",
|
|
82
|
+
) -> ExecutionProfile:
|
|
83
|
+
"""
|
|
84
|
+
Validate and return an execution profile string.
|
|
85
|
+
|
|
86
|
+
Parameters
|
|
87
|
+
----------
|
|
88
|
+
profile : Any
|
|
89
|
+
User-provided profile value.
|
|
90
|
+
context : str
|
|
91
|
+
Human-readable location used in error messages.
|
|
92
|
+
|
|
93
|
+
Returns
|
|
94
|
+
-------
|
|
95
|
+
ExecutionProfile
|
|
96
|
+
Validated profile value.
|
|
97
|
+
|
|
98
|
+
Raises
|
|
99
|
+
------
|
|
100
|
+
TypeError
|
|
101
|
+
If the profile value is not a string.
|
|
102
|
+
ValueError
|
|
103
|
+
If the profile string is not supported.
|
|
104
|
+
"""
|
|
105
|
+
if not isinstance(profile, str):
|
|
106
|
+
raise TypeError(f"{context} must be a string, got {type(profile).__name__}.")
|
|
107
|
+
if profile not in SUPPORTED_EXECUTION_PROFILES:
|
|
108
|
+
raise ValueError(
|
|
109
|
+
f"Unsupported execution profile at {context}: {profile!r}. "
|
|
110
|
+
f"Supported profiles: {list(SUPPORTED_EXECUTION_PROFILES)}."
|
|
111
|
+
)
|
|
112
|
+
return cast(ExecutionProfile, profile)
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
def get_llama_attention_options(
|
|
116
|
+
qcfg: Optional[PTQConfig],
|
|
117
|
+
) -> LlamaAttentionOptions:
|
|
118
|
+
"""
|
|
119
|
+
Resolve Llama attention implementation options from a PTQConfig.
|
|
120
|
+
|
|
121
|
+
The root-level `model_args["profile"]` selects the default execution
|
|
122
|
+
profile for all profile-aware wrappers. The attention wrapper may override
|
|
123
|
+
that default through `model_args["attention"]`.
|
|
124
|
+
|
|
125
|
+
Supported examples are::
|
|
126
|
+
|
|
127
|
+
PTQConfig(..., model_args={"profile": "reference_eval"})
|
|
128
|
+
|
|
129
|
+
and::
|
|
130
|
+
|
|
131
|
+
PTQConfig(
|
|
132
|
+
...,
|
|
133
|
+
model_args={
|
|
134
|
+
"profile": "reference_eval",
|
|
135
|
+
"attention": {
|
|
136
|
+
"layout": "unrolled",
|
|
137
|
+
},
|
|
138
|
+
},
|
|
139
|
+
)
|
|
140
|
+
|
|
141
|
+
`model_args["attention"]` may also be a plain profile string, for example
|
|
142
|
+
"npu_export". When no option is provided, the default profile is
|
|
143
|
+
"npu_export" to preserve the existing export-oriented graph.
|
|
144
|
+
|
|
145
|
+
Parameters
|
|
146
|
+
----------
|
|
147
|
+
qcfg : Optional[PTQConfig]
|
|
148
|
+
PTQ configuration associated with the wrapper.
|
|
149
|
+
|
|
150
|
+
Returns
|
|
151
|
+
-------
|
|
152
|
+
LlamaAttentionOptions
|
|
153
|
+
Validated execution options.
|
|
154
|
+
"""
|
|
155
|
+
if qcfg is None:
|
|
156
|
+
return _PRESETS[DEFAULT_EXECUTION_PROFILE]
|
|
157
|
+
|
|
158
|
+
root_profile = normalize_execution_profile(
|
|
159
|
+
qcfg.get_model_arg("profile", DEFAULT_EXECUTION_PROFILE),
|
|
160
|
+
context="PTQConfig.model_args['profile']",
|
|
161
|
+
)
|
|
162
|
+
|
|
163
|
+
raw_attention = qcfg.get_model_arg("attention", {})
|
|
164
|
+
if raw_attention is None:
|
|
165
|
+
raw_attention = {}
|
|
166
|
+
if isinstance(raw_attention, str):
|
|
167
|
+
raw_attention = {"profile": raw_attention}
|
|
168
|
+
if not isinstance(raw_attention, Mapping):
|
|
169
|
+
raise TypeError(
|
|
170
|
+
"PTQConfig.model_args['attention'] must be a mapping, a string, or None."
|
|
171
|
+
)
|
|
172
|
+
|
|
173
|
+
raw = dict(raw_attention)
|
|
174
|
+
profile = normalize_execution_profile(
|
|
175
|
+
raw.pop("profile", root_profile),
|
|
176
|
+
context="PTQConfig.model_args['attention']['profile']",
|
|
177
|
+
)
|
|
178
|
+
|
|
179
|
+
valid_keys = {field.name for field in fields(LlamaAttentionOptions)}
|
|
180
|
+
unknown_keys = sorted(set(raw) - valid_keys)
|
|
181
|
+
if unknown_keys:
|
|
182
|
+
raise ValueError(f"Unknown Llama attention option(s): {unknown_keys}.")
|
|
183
|
+
|
|
184
|
+
options = replace(_PRESETS[profile], **raw)
|
|
185
|
+
_validate_llama_attention_options(options)
|
|
186
|
+
return options
|
|
187
|
+
|
|
188
|
+
|
|
189
|
+
def is_npu_export_attention_options(options: LlamaAttentionOptions) -> bool:
|
|
190
|
+
"""
|
|
191
|
+
Return whether the options match the NPU-export-friendly attention graph.
|
|
192
|
+
"""
|
|
193
|
+
return (
|
|
194
|
+
options.scale_fusion == "k_proj"
|
|
195
|
+
and options.rope == "pre_negated_sin"
|
|
196
|
+
and options.layout == "unrolled"
|
|
197
|
+
)
|
|
198
|
+
|
|
199
|
+
|
|
200
|
+
def _validate_llama_attention_options(options: LlamaAttentionOptions) -> None:
|
|
201
|
+
"""
|
|
202
|
+
Validate a fully resolved LlamaAttentionOptions instance.
|
|
203
|
+
"""
|
|
204
|
+
if options.scale_fusion not in ("none", "q_proj", "k_proj"):
|
|
205
|
+
raise ValueError(f"Unsupported scale_fusion: {options.scale_fusion!r}.")
|
|
206
|
+
if options.rope not in ("hf", "pre_negated_sin"):
|
|
207
|
+
raise ValueError(f"Unsupported rope convention: {options.rope!r}.")
|
|
208
|
+
if options.layout not in ("batched", "unrolled"):
|
|
209
|
+
raise ValueError(f"Unsupported attention layout: {options.layout!r}.")
|
|
@@ -0,0 +1,145 @@
|
|
|
1
|
+
# Copyright (c) 2025 Samsung Electronics Co., Ltd. All Rights Reserved
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
from typing import Optional, Tuple, TYPE_CHECKING
|
|
16
|
+
|
|
17
|
+
if TYPE_CHECKING:
|
|
18
|
+
import torch.fx
|
|
19
|
+
|
|
20
|
+
import torch
|
|
21
|
+
from torch.export import ExportedProgram
|
|
22
|
+
|
|
23
|
+
from tico.serialize.quant_param import QPARAM_KEY, QuantParam, to_qparam_dtype
|
|
24
|
+
from tico.utils import logging
|
|
25
|
+
from tico.utils.graph import add_placeholder, get_torch_param_value, is_torch_param
|
|
26
|
+
from tico.utils.passes import PassBase, PassResult
|
|
27
|
+
from tico.utils.trace_decorators import trace_graph_diff_on_pass
|
|
28
|
+
from tico.utils.validate_args_kwargs import Conv2DArgs, LinearArgs
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def _get_input_weight_bias_for_bias_quantization(
|
|
32
|
+
node: "torch.fx.Node",
|
|
33
|
+
) -> Optional[Tuple["torch.fx.Node", "torch.fx.Node", "torch.fx.Node"]]:
|
|
34
|
+
"""
|
|
35
|
+
Return input, weight, and bias nodes for operators whose bias can be quantized.
|
|
36
|
+
|
|
37
|
+
The returned tuple follows the common bias quantization rule where the bias
|
|
38
|
+
scale is computed from the input scale and the per-output-channel weight scale.
|
|
39
|
+
"""
|
|
40
|
+
|
|
41
|
+
if node.target == torch.ops.aten.linear.default:
|
|
42
|
+
lin_args = LinearArgs(*node.args, **node.kwargs)
|
|
43
|
+
if lin_args.bias is None:
|
|
44
|
+
return None
|
|
45
|
+
return lin_args.input, lin_args.weight, lin_args.bias
|
|
46
|
+
|
|
47
|
+
if node.target in [
|
|
48
|
+
torch.ops.circle_custom.conv2d,
|
|
49
|
+
torch.ops.circle_custom.conv2d.padding,
|
|
50
|
+
]:
|
|
51
|
+
conv_args = Conv2DArgs(*node.args, **node.kwargs)
|
|
52
|
+
if conv_args.bias is None:
|
|
53
|
+
return None
|
|
54
|
+
return conv_args.input, conv_args.weight, conv_args.bias
|
|
55
|
+
|
|
56
|
+
return None
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
@trace_graph_diff_on_pass
|
|
60
|
+
class QuantizeBias(PassBase):
|
|
61
|
+
"""
|
|
62
|
+
Quantize bias.
|
|
63
|
+
|
|
64
|
+
This pass identifies fp32 biases, quantizes them using scales of input and weights.
|
|
65
|
+
|
|
66
|
+
This pass assumes that if bias is fp32, input and weights must have been quantized.
|
|
67
|
+
"""
|
|
68
|
+
|
|
69
|
+
def __init__(self):
|
|
70
|
+
super().__init__()
|
|
71
|
+
|
|
72
|
+
def call(self, exported_program: ExportedProgram) -> PassResult:
|
|
73
|
+
logger = logging.getLogger(__name__)
|
|
74
|
+
|
|
75
|
+
graph_module = exported_program.graph_module
|
|
76
|
+
graph: torch.fx.Graph = graph_module.graph
|
|
77
|
+
for node in graph.nodes:
|
|
78
|
+
if node.op != "call_function":
|
|
79
|
+
continue
|
|
80
|
+
|
|
81
|
+
op_args = _get_input_weight_bias_for_bias_quantization(node)
|
|
82
|
+
if op_args is None:
|
|
83
|
+
continue
|
|
84
|
+
|
|
85
|
+
inp, weights, bias = op_args
|
|
86
|
+
|
|
87
|
+
# Only support bias is Parameter.
|
|
88
|
+
# TODO Is it possible that bias is not Parameter?
|
|
89
|
+
if not is_torch_param(bias, exported_program):
|
|
90
|
+
continue
|
|
91
|
+
|
|
92
|
+
bias_val: torch.Tensor = get_torch_param_value(bias, exported_program)
|
|
93
|
+
if bias_val.dtype != torch.float32:
|
|
94
|
+
continue
|
|
95
|
+
|
|
96
|
+
if QPARAM_KEY not in inp.meta:
|
|
97
|
+
continue
|
|
98
|
+
|
|
99
|
+
if QPARAM_KEY not in weights.meta:
|
|
100
|
+
continue
|
|
101
|
+
|
|
102
|
+
quant_dtype = None
|
|
103
|
+
if inp.meta[QPARAM_KEY].dtype == "int16":
|
|
104
|
+
quant_dtype = torch.int64
|
|
105
|
+
elif inp.meta[QPARAM_KEY].dtype == "uint8":
|
|
106
|
+
quant_dtype = torch.int32
|
|
107
|
+
else:
|
|
108
|
+
continue
|
|
109
|
+
|
|
110
|
+
assert quant_dtype is not None
|
|
111
|
+
type_info = torch.iinfo(quant_dtype)
|
|
112
|
+
|
|
113
|
+
i_scale = inp.meta[QPARAM_KEY].scale
|
|
114
|
+
w_scale = weights.meta[QPARAM_KEY].scale
|
|
115
|
+
|
|
116
|
+
assert i_scale is not None
|
|
117
|
+
assert w_scale is not None
|
|
118
|
+
assert len(i_scale) == 1
|
|
119
|
+
assert len(w_scale) == bias_val.shape[0]
|
|
120
|
+
|
|
121
|
+
bias_scale = torch.tensor(i_scale) * torch.tensor(w_scale)
|
|
122
|
+
q_bias = torch.round(bias_val / bias_scale)
|
|
123
|
+
q_bias = torch.clamp(q_bias, min=type_info.min, max=type_info.max)
|
|
124
|
+
q_bias = q_bias.to(quant_dtype)
|
|
125
|
+
|
|
126
|
+
q_bias_node = add_placeholder(exported_program, q_bias, bias.name)
|
|
127
|
+
|
|
128
|
+
qparam = QuantParam()
|
|
129
|
+
qparam.scale = bias_scale.tolist()
|
|
130
|
+
assert qparam.scale is not None
|
|
131
|
+
qparam.zero_point = [0] * len(qparam.scale)
|
|
132
|
+
qparam.dtype = to_qparam_dtype(quant_dtype)
|
|
133
|
+
qparam.quantized_dimension = 0
|
|
134
|
+
q_bias_node.meta[QPARAM_KEY] = qparam
|
|
135
|
+
|
|
136
|
+
node.update_arg(2, q_bias_node)
|
|
137
|
+
|
|
138
|
+
logger.debug(f"Bias ({bias.name}) is quantized to {q_bias_node.name}.")
|
|
139
|
+
|
|
140
|
+
graph.eliminate_dead_code()
|
|
141
|
+
graph.lint()
|
|
142
|
+
graph_module.recompile()
|
|
143
|
+
|
|
144
|
+
# Run only once.
|
|
145
|
+
return PassResult(False)
|