tico 0.2.0.dev260415__tar.gz → 0.2.0.dev260416__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/PKG-INFO +1 -1
- tico-0.2.0.dev260416/tico/_version.py +1 -0
- tico-0.2.0.dev260416/tico/quantization/wrapq/examples/nn/quantize_layernorm.py +108 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/quantization/wrapq/examples/quantize_full_qmodel_with_gptq.py +46 -8
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/quantization/wrapq/observers/affine_base.py +14 -18
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/quantization/wrapq/wrappers/llama/quant_model.py +1 -1
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/quantization/wrapq/wrappers/llama/quant_model_for_causal_lm.py +2 -4
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico.egg-info/PKG-INFO +1 -1
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico.egg-info/SOURCES.txt +1 -0
- tico-0.2.0.dev260415/tico/_version.py +0 -1
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/LICENSE +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/README.md +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/pyproject.toml +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/setup.cfg +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/__init__.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/config/__init__.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/config/base.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/config/factory.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/config/v1.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/experimental/__init__.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/interpreter/__init__.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/interpreter/infer.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/interpreter/interpreter.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/passes/__init__.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/passes/cast_aten_where_arg_type.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/passes/cast_clamp_mixed_type_args.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/passes/cast_mixed_type_args.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/passes/const_prop_pass.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/passes/convert_conv1d_to_conv2d.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/passes/convert_conv3d_to_conv2d.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/passes/convert_expand_to_slice_cat.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/passes/convert_layout_op_to_reshape.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/passes/convert_matmul_to_linear.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/passes/convert_repeat_to_expand_copy.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/passes/convert_sym_size_to_circle_shape.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/passes/convert_to_relu6.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/passes/decompose_addmm.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/passes/decompose_batch_norm.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/passes/decompose_fake_quantize.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/passes/decompose_fake_quantize_tensor_qparams.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/passes/decompose_group_norm.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/passes/decompose_grouped_conv2d.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/passes/decompose_slice_scatter.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/passes/extract_dtype_kwargs.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/passes/fill_meta_val.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/passes/fuse_leading_unsqueeze_reshape.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/passes/fuse_redundant_reshape_to_mean.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/passes/legalize_causal_mask_value.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/passes/legalize_predefined_layout_operators.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/passes/lower_copy.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/passes/lower_pow2_to_mul.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/passes/lower_to_resize_nearest_neighbor.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/passes/lower_to_slice.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/passes/merge_consecutive_cat.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/passes/ops.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/passes/remove_nop.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/passes/remove_redundant_assert_nodes.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/passes/remove_redundant_expand.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/passes/remove_redundant_permute.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/passes/remove_redundant_reshape.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/passes/remove_redundant_slice.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/passes/remove_redundant_to_copy.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/passes/restore_linear.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/passes/segment_index_select.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/pt2_to_circle.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/quantization/__init__.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/quantization/algorithm/__init__.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/quantization/algorithm/fpi_gptq/__init__.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/quantization/algorithm/fpi_gptq/fpi_gptq.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/quantization/algorithm/fpi_gptq/quantizer.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/quantization/algorithm/gptq/__init__.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/quantization/algorithm/gptq/gptq.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/quantization/algorithm/gptq/quant.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/quantization/algorithm/gptq/quantizer.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/quantization/algorithm/gptq/utils.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/quantization/algorithm/qwen3_vl_gptq/__init__.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/quantization/algorithm/qwen3_vl_gptq/gptq.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/quantization/algorithm/qwen3_vl_gptq/quantizer.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/quantization/algorithm/qwen3_vl_gptq/utils.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/quantization/algorithm/smoothquant/__init__.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/quantization/algorithm/smoothquant/observer.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/quantization/algorithm/smoothquant/quantizer.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/quantization/algorithm/smoothquant/smooth_quant.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/quantization/algorithm/spinquant/__init__.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/quantization/algorithm/spinquant/fuse_norm_utils.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/quantization/algorithm/spinquant/hadamard_utils.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/quantization/algorithm/spinquant/quantizer.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/quantization/algorithm/spinquant/rotation_utils.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/quantization/algorithm/spinquant/spin_llama.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/quantization/config/__init__.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/quantization/config/base.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/quantization/config/builders.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/quantization/config/fpi_gptq.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/quantization/config/gptq.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/quantization/config/ptq.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/quantization/config/qwen3_vl_gptq.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/quantization/config/smoothquant.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/quantization/config/spinquant.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/quantization/config/utils.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/quantization/evaluation/__init__.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/quantization/evaluation/backend.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/quantization/evaluation/evaluate.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/quantization/evaluation/executor/__init__.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/quantization/evaluation/executor/backend_executor.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/quantization/evaluation/executor/circle_executor.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/quantization/evaluation/executor/triv24_executor.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/quantization/evaluation/metric.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/quantization/evaluation/script/llm_tasks_eval.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/quantization/evaluation/script/mini_vqa_eval.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/quantization/evaluation/utils.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/quantization/evaluation/vlm_eval_utils.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/quantization/passes/__init__.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/quantization/passes/fold_quant_ops.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/quantization/passes/insert_quantize_on_dtype_mismatch.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/quantization/passes/propagate_qparam_backward.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/quantization/passes/propagate_qparam_forward.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/quantization/passes/quantize_bias.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/quantization/passes/remove_weight_dequant_op.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/quantization/public_interface.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/quantization/quantizer.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/quantization/quantizer_registry.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/quantization/wrapq/__init__.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/quantization/wrapq/dtypes.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/quantization/wrapq/examples/__init__.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/quantization/wrapq/examples/compare_ppl.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/quantization/wrapq/examples/debug_quant_outputs.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/quantization/wrapq/examples/evaluate_fk_llama_model.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/quantization/wrapq/examples/llama/__init__.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/quantization/wrapq/examples/llama/quantize_attn_decode.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/quantization/wrapq/examples/llama/quantize_attn_prefill.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/quantization/wrapq/examples/llama/quantize_decoder_layer_decode.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/quantization/wrapq/examples/llama/quantize_decoder_layer_prefill.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/quantization/wrapq/examples/llama/quantize_mlp.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/quantization/wrapq/examples/nn/__init__.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/quantization/wrapq/examples/nn/quantize_conv3d.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/quantization/wrapq/examples/nn/quantize_conv3d_special_case.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/quantization/wrapq/examples/nn/quantize_linear.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/quantization/wrapq/examples/quantize_full_vlm_model_with_gptq.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/quantization/wrapq/examples/quantize_qwen3_vl_with_gptq.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/quantization/wrapq/examples/quantize_with_gptq.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/quantization/wrapq/examples/qwen/__init__.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/quantization/wrapq/examples/qwen/quantize_for_conditional_generation.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/quantization/wrapq/examples/qwen/quantize_model.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/quantization/wrapq/examples/qwen/quantize_text_attn.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/quantization/wrapq/examples/qwen/quantize_text_decoder_layer.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/quantization/wrapq/examples/qwen/quantize_text_mlp.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/quantization/wrapq/examples/qwen/quantize_text_model.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/quantization/wrapq/examples/qwen/quantize_vision_attn.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/quantization/wrapq/examples/qwen/quantize_vision_block.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/quantization/wrapq/examples/qwen/quantize_vision_mlp.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/quantization/wrapq/examples/qwen/quantize_vision_model.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/quantization/wrapq/examples/qwen/quantize_vision_patch_embed.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/quantization/wrapq/examples/qwen/quantize_vision_patch_merger.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/quantization/wrapq/examples/qwen/trace_qwen.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/quantization/wrapq/examples/static_llama_layer_runtime.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/quantization/wrapq/mode.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/quantization/wrapq/observers/__init__.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/quantization/wrapq/observers/base.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/quantization/wrapq/observers/ema.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/quantization/wrapq/observers/identity.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/quantization/wrapq/observers/minmax.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/quantization/wrapq/observers/mx.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/quantization/wrapq/qscheme.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/quantization/wrapq/quantizer.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/quantization/wrapq/utils/__init__.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/quantization/wrapq/utils/check_missing_qparam.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/quantization/wrapq/utils/introspection.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/quantization/wrapq/utils/metrics.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/quantization/wrapq/utils/reduce_utils.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/quantization/wrapq/utils/version.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/quantization/wrapq/wrap_helper.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/quantization/wrapq/wrappers/__init__.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/quantization/wrapq/wrappers/fairseq/__init__.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/quantization/wrapq/wrappers/fairseq/decoder_export_single_step.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/quantization/wrapq/wrappers/fairseq/quant_decoder.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/quantization/wrapq/wrappers/fairseq/quant_decoder_layer.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/quantization/wrapq/wrappers/fairseq/quant_encoder.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/quantization/wrapq/wrappers/fairseq/quant_encoder_layer.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/quantization/wrapq/wrappers/fairseq/quant_mha.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/quantization/wrapq/wrappers/llama/__init__.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/quantization/wrapq/wrappers/llama/export_adapters.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/quantization/wrapq/wrappers/llama/quant_attention.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/quantization/wrapq/wrappers/llama/quant_decoder_layer.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/quantization/wrapq/wrappers/llama/quant_mlp.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/quantization/wrapq/wrappers/nn/__init__.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/quantization/wrapq/wrappers/nn/quant_conv3d.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/quantization/wrapq/wrappers/nn/quant_conv3d_decomposed.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/quantization/wrapq/wrappers/nn/quant_embedding.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/quantization/wrapq/wrappers/nn/quant_layernorm.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/quantization/wrapq/wrappers/nn/quant_linear.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/quantization/wrapq/wrappers/nn/quant_silu.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/quantization/wrapq/wrappers/ops/__init__.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/quantization/wrapq/wrappers/ops/quant_rmsnorm.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/quantization/wrapq/wrappers/ptq_wrapper.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/quantization/wrapq/wrappers/quant_elementwise.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/quantization/wrapq/wrappers/quant_module_base.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/quantization/wrapq/wrappers/qwen_vl/quant_for_conditional_generation.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/quantization/wrapq/wrappers/qwen_vl/quant_model.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/quantization/wrapq/wrappers/qwen_vl/quant_text_attn.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/quantization/wrapq/wrappers/qwen_vl/quant_text_decoder_layer.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/quantization/wrapq/wrappers/qwen_vl/quant_text_mlp.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/quantization/wrapq/wrappers/qwen_vl/quant_text_model.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/quantization/wrapq/wrappers/qwen_vl/quant_vision_attn.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/quantization/wrapq/wrappers/qwen_vl/quant_vision_block.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/quantization/wrapq/wrappers/qwen_vl/quant_vision_mlp.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/quantization/wrapq/wrappers/qwen_vl/quant_vision_model.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/quantization/wrapq/wrappers/qwen_vl/quant_vision_patch_embed.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/quantization/wrapq/wrappers/qwen_vl/quant_vision_patch_merger.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/quantization/wrapq/wrappers/registry.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/serialize/__init__.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/serialize/circle_graph.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/serialize/circle_mapping.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/serialize/circle_serializer.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/serialize/operators/__init__.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/serialize/operators/adapters/__init__.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/serialize/operators/adapters/llama_rmsnorm.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/serialize/operators/adapters/onert/__init__.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/serialize/operators/adapters/onert/llama_attention.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/serialize/operators/hashable_opcode.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/serialize/operators/node_visitor.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/serialize/operators/op_abs.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/serialize/operators/op_add.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/serialize/operators/op_alias_copy.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/serialize/operators/op_any.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/serialize/operators/op_arange_start_step.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/serialize/operators/op_argmax.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/serialize/operators/op_attention.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/serialize/operators/op_avg_pool2d.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/serialize/operators/op_bmm.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/serialize/operators/op_cat.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/serialize/operators/op_circle_shape.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/serialize/operators/op_clamp.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/serialize/operators/op_clone.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/serialize/operators/op_constant_pad_nd.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/serialize/operators/op_conv2d.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/serialize/operators/op_cos.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/serialize/operators/op_cumsum.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/serialize/operators/op_depthwise_conv2d.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/serialize/operators/op_dequantize_per_channel.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/serialize/operators/op_dequantize_per_tensor.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/serialize/operators/op_div.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/serialize/operators/op_embedding.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/serialize/operators/op_eq.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/serialize/operators/op_exp.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/serialize/operators/op_expand.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/serialize/operators/op_full.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/serialize/operators/op_full_like.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/serialize/operators/op_ge.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/serialize/operators/op_gelu.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/serialize/operators/op_gt.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/serialize/operators/op_index.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/serialize/operators/op_index_select.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/serialize/operators/op_instance_norm.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/serialize/operators/op_le.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/serialize/operators/op_leaky_relu.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/serialize/operators/op_linear.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/serialize/operators/op_log.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/serialize/operators/op_log1p.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/serialize/operators/op_logical_and.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/serialize/operators/op_logical_not.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/serialize/operators/op_lt.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/serialize/operators/op_max_dim.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/serialize/operators/op_max_pool2d_with_indices.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/serialize/operators/op_maximum.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/serialize/operators/op_mean.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/serialize/operators/op_minimum.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/serialize/operators/op_mm.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/serialize/operators/op_mul.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/serialize/operators/op_ne.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/serialize/operators/op_neg.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/serialize/operators/op_permute.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/serialize/operators/op_pow.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/serialize/operators/op_prelu.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/serialize/operators/op_quantize_per_tensor.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/serialize/operators/op_reciprocal.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/serialize/operators/op_relu.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/serialize/operators/op_relu6.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/serialize/operators/op_repeat.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/serialize/operators/op_reshape.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/serialize/operators/op_resize_nearest_neighbor.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/serialize/operators/op_rmsnorm.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/serialize/operators/op_round.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/serialize/operators/op_rsqrt.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/serialize/operators/op_scalar_tensor.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/serialize/operators/op_select_copy.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/serialize/operators/op_sigmoid.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/serialize/operators/op_sin.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/serialize/operators/op_slice.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/serialize/operators/op_softmax.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/serialize/operators/op_split_with_sizes.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/serialize/operators/op_sqrt.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/serialize/operators/op_squeeze.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/serialize/operators/op_sub.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/serialize/operators/op_sum.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/serialize/operators/op_tanh.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/serialize/operators/op_to_copy.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/serialize/operators/op_transpose_conv.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/serialize/operators/op_unsqueeze.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/serialize/operators/op_view.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/serialize/operators/op_where.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/serialize/operators/utils.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/serialize/pack.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/serialize/quant_param.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/utils/__init__.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/utils/compat/__init__.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/utils/compat/torch.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/utils/compat/transformers.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/utils/convert.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/utils/define.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/utils/diff_graph.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/utils/dtype.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/utils/errors.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/utils/graph.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/utils/installed_packages.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/utils/logging.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/utils/model.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/utils/mx/__init__.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/utils/mx/elemwise_ops.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/utils/mx/formats.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/utils/mx/mx_ops.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/utils/padding.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/utils/passes.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/utils/pytree_utils.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/utils/record_input.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/utils/register_custom_op.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/utils/serialize.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/utils/signature.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/utils/trace_decorators.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/utils/utils.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/utils/validate_args_kwargs.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico.egg-info/dependency_links.txt +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico.egg-info/entry_points.txt +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico.egg-info/requires.txt +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico.egg-info/top_level.txt +0 -0
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__version__ = "0.2.0.dev260416"
|
|
@@ -0,0 +1,108 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# Copyright (c) 2026 Samsung Electronics Co., Ltd. All Rights Reserved
|
|
3
|
+
#
|
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
# you may not use this file except in compliance with the License.
|
|
6
|
+
# You may obtain a copy of the License at
|
|
7
|
+
#
|
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
#
|
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
# See the License for the specific language governing permissions and
|
|
14
|
+
# limitations under the License.
|
|
15
|
+
|
|
16
|
+
import copy
|
|
17
|
+
import sys
|
|
18
|
+
|
|
19
|
+
import torch
|
|
20
|
+
import torch.nn as nn
|
|
21
|
+
|
|
22
|
+
import tico
|
|
23
|
+
import tico.quantization
|
|
24
|
+
import tico.quantization.config.ptq
|
|
25
|
+
from tico.quantization.evaluation.metric import compute_peir
|
|
26
|
+
from tico.quantization.evaluation.utils import plot_two_outputs
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
torch.manual_seed(123)
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def generate_calibration_data(
|
|
33
|
+
num_batches: int,
|
|
34
|
+
batch_size: int,
|
|
35
|
+
normalized_shape: tuple,
|
|
36
|
+
) -> list:
|
|
37
|
+
"""Generate calibration data for PTQ"""
|
|
38
|
+
calibration_data = []
|
|
39
|
+
for i in range(num_batches):
|
|
40
|
+
x = torch.randn(batch_size, *normalized_shape)
|
|
41
|
+
calibration_data.append(x)
|
|
42
|
+
return calibration_data
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def main():
|
|
46
|
+
# Create LayerNorm model
|
|
47
|
+
# Using a common configuration for transformer models
|
|
48
|
+
normalized_shape = (768,) # Hidden dimension size
|
|
49
|
+
model = nn.LayerNorm(
|
|
50
|
+
normalized_shape=normalized_shape,
|
|
51
|
+
eps=1e-5,
|
|
52
|
+
elementwise_affine=True,
|
|
53
|
+
)
|
|
54
|
+
orig_model = copy.deepcopy(model)
|
|
55
|
+
model.eval()
|
|
56
|
+
|
|
57
|
+
# Generate calibration data
|
|
58
|
+
# Input shape: (batch_size, *normalized_shape)
|
|
59
|
+
# Example: (10, 768) - 10 samples, 768 features
|
|
60
|
+
batch_size = 10
|
|
61
|
+
calibration_data = generate_calibration_data(
|
|
62
|
+
num_batches=5,
|
|
63
|
+
batch_size=batch_size,
|
|
64
|
+
normalized_shape=normalized_shape,
|
|
65
|
+
)
|
|
66
|
+
example_input = calibration_data[0]
|
|
67
|
+
|
|
68
|
+
# Configure PTQ
|
|
69
|
+
ptq_config = tico.quantization.config.ptq.PTQConfig()
|
|
70
|
+
|
|
71
|
+
# Prepare the model for quantization
|
|
72
|
+
prepared_model = tico.quantization.prepare(
|
|
73
|
+
model, ptq_config, inplace=True # Transform the model in place
|
|
74
|
+
)
|
|
75
|
+
|
|
76
|
+
# Calibrate the model (collect statistics)
|
|
77
|
+
with torch.no_grad():
|
|
78
|
+
for i, batch in enumerate(calibration_data):
|
|
79
|
+
prepared_model(batch)
|
|
80
|
+
|
|
81
|
+
# Convert to quantized model
|
|
82
|
+
quantized_model = tico.quantization.convert(prepared_model, inplace=True)
|
|
83
|
+
|
|
84
|
+
# Compute PEIR (Peak Error-to-Interval Ratio) between quantized model and original model
|
|
85
|
+
with torch.no_grad():
|
|
86
|
+
quant_out = quantized_model(example_input)
|
|
87
|
+
fp_out = orig_model(example_input)
|
|
88
|
+
|
|
89
|
+
print(f"Input shape: {example_input.shape}")
|
|
90
|
+
print(f"Output shape (FP32): {fp_out.shape}")
|
|
91
|
+
print(f"Output shape (Quantized): {quant_out.shape}")
|
|
92
|
+
print(f"┌───────────── Quantization Error Summary ─────────────")
|
|
93
|
+
print(f"│ Mean |diff|: {(quant_out - fp_out).abs().mean().item():.6f}")
|
|
94
|
+
print(f"│ PEIR : {compute_peir(fp_out, quant_out) * 100:.6f} %")
|
|
95
|
+
print(f"└──────────────────────────────────────────────────────")
|
|
96
|
+
print(plot_two_outputs(fp_out, quant_out))
|
|
97
|
+
|
|
98
|
+
# Convert to Circle format
|
|
99
|
+
circle_model = tico.convert(quantized_model.eval(), (example_input,))
|
|
100
|
+
|
|
101
|
+
# Save the Circle model
|
|
102
|
+
filename = "quantized_layernorm.circle"
|
|
103
|
+
circle_model.save(filename)
|
|
104
|
+
print(f"Circle model saved as '{filename}'")
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
if __name__ == "__main__":
|
|
108
|
+
main()
|
|
@@ -72,26 +72,59 @@ def inject_gptq_qparams(
|
|
|
72
72
|
root: torch.nn.Module,
|
|
73
73
|
gptq_quantizers: dict[str, Any], # {fp_name: quantizer}
|
|
74
74
|
weight_obs_name: str = "weight",
|
|
75
|
+
*,
|
|
76
|
+
verbose: bool = False,
|
|
75
77
|
):
|
|
76
78
|
"""
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
79
|
+
Inject GPTQ (scale, zero-point) into PTQ observers.
|
|
80
|
+
|
|
81
|
+
When verbose=True, prints a summary of matched / missed / unused entries.
|
|
80
82
|
"""
|
|
83
|
+
seen = set()
|
|
84
|
+
missed_modules = []
|
|
85
|
+
|
|
81
86
|
for m in root.modules():
|
|
82
87
|
if not isinstance(m, QuantModuleBase):
|
|
83
88
|
continue
|
|
84
89
|
if m.fp_name is None:
|
|
85
90
|
continue
|
|
91
|
+
|
|
86
92
|
quantizer = gptq_quantizers.get(m.fp_name)
|
|
87
|
-
if quantizer is None:
|
|
88
|
-
continue
|
|
89
93
|
obs = m.get_observer(weight_obs_name)
|
|
94
|
+
|
|
95
|
+
# Only care about modules that should have weight observers
|
|
90
96
|
if obs is None:
|
|
91
97
|
continue
|
|
98
|
+
|
|
99
|
+
if quantizer is None:
|
|
100
|
+
missed_modules.append(m.fp_name)
|
|
101
|
+
continue
|
|
102
|
+
|
|
92
103
|
assert isinstance(obs, AffineObserverBase)
|
|
93
|
-
# GPTQ quantizer attributes
|
|
94
104
|
obs.load_qparams(quantizer.scale, quantizer.zero, lock=True)
|
|
105
|
+
seen.add(m.fp_name)
|
|
106
|
+
|
|
107
|
+
unused = set(gptq_quantizers.keys()) - seen
|
|
108
|
+
|
|
109
|
+
if verbose:
|
|
110
|
+
print("\n[GPTQ → PTQ injection summary]")
|
|
111
|
+
print(f" matched : {len(seen)}")
|
|
112
|
+
print(f" missed : {len(missed_modules)}")
|
|
113
|
+
print(f" unused : {len(unused)}")
|
|
114
|
+
|
|
115
|
+
# Print samples (not all, to avoid spam)
|
|
116
|
+
def _print_sample(title, items):
|
|
117
|
+
items = list(items)
|
|
118
|
+
if not items:
|
|
119
|
+
return
|
|
120
|
+
print(f"\n {title}:")
|
|
121
|
+
for name in items[:10]:
|
|
122
|
+
print(f" - {name}")
|
|
123
|
+
if len(items) > 10:
|
|
124
|
+
print(f" ... and {len(items) - 10} more")
|
|
125
|
+
|
|
126
|
+
_print_sample("missed modules", missed_modules)
|
|
127
|
+
_print_sample("unused GPTQ entries", unused)
|
|
95
128
|
|
|
96
129
|
|
|
97
130
|
# -------------------------------------------------------------------------
|
|
@@ -172,13 +205,13 @@ def quantize_using_PTQ(q_m, calib_inputs, args):
|
|
|
172
205
|
|
|
173
206
|
# Overwrite weight observers with GPTQ statistics
|
|
174
207
|
if hasattr(q_m, "quantizers") and isinstance(q_m.quantizers, dict):
|
|
175
|
-
inject_gptq_qparams(q_m, q_m.quantizers)
|
|
208
|
+
inject_gptq_qparams(q_m, q_m.quantizers, verbose=args.verbose)
|
|
176
209
|
elif (
|
|
177
210
|
hasattr(q_m, "wrapped")
|
|
178
211
|
and hasattr(q_m.wrapped, "quantizers")
|
|
179
212
|
and isinstance(q_m.wrapped.quantizers, dict)
|
|
180
213
|
):
|
|
181
|
-
inject_gptq_qparams(q_m.wrapped, q_m.wrapped.quantizers)
|
|
214
|
+
inject_gptq_qparams(q_m.wrapped, q_m.wrapped.quantizers, verbose=args.verbose)
|
|
182
215
|
else:
|
|
183
216
|
print(
|
|
184
217
|
"[Warn] q_m.quantizers not found or not a dict; skipping GPTQ qparam injection."
|
|
@@ -376,6 +409,11 @@ def main():
|
|
|
376
409
|
type=str,
|
|
377
410
|
default=None,
|
|
378
411
|
)
|
|
412
|
+
parser.add_argument(
|
|
413
|
+
"--verbose",
|
|
414
|
+
action="store_true",
|
|
415
|
+
help="Verbose logging for debugging (e.g., GPTQ injection coverage)",
|
|
416
|
+
)
|
|
379
417
|
args = parser.parse_args()
|
|
380
418
|
print(args)
|
|
381
419
|
|
{tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/quantization/wrapq/observers/affine_base.py
RENAMED
|
@@ -106,26 +106,22 @@ class AffineObserverBase(ObserverBase):
|
|
|
106
106
|
self._cached_scale, self._cached_zp = scale, zp
|
|
107
107
|
return scale, zp
|
|
108
108
|
|
|
109
|
-
if self.channel_axis is None:
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
zp = torch.zeros_like(C, dtype=torch.int)
|
|
118
|
-
else:
|
|
119
|
-
scale = torch.clamp(C.abs(), min=eps)
|
|
120
|
-
zp = torch.full_like(C, qmax, dtype=torch.int)
|
|
109
|
+
if (self.channel_axis is None) and torch.all(rng.abs() < 1e-8):
|
|
110
|
+
C = self.min_val
|
|
111
|
+
if torch.allclose(C, torch.zeros_like(C)):
|
|
112
|
+
scale = torch.ones_like(C)
|
|
113
|
+
zp = torch.zeros_like(C, dtype=torch.int)
|
|
114
|
+
elif (C > 0).all():
|
|
115
|
+
scale = torch.clamp(C, min=eps)
|
|
116
|
+
zp = torch.zeros_like(C, dtype=torch.int)
|
|
121
117
|
else:
|
|
122
|
-
scale = torch.clamp(
|
|
123
|
-
zp = (
|
|
124
|
-
torch.round(qmin - self.min_val / scale)
|
|
125
|
-
.clamp(qmin, qmax)
|
|
126
|
-
.to(torch.int)
|
|
127
|
-
)
|
|
118
|
+
scale = torch.clamp(C.abs(), min=eps)
|
|
119
|
+
zp = torch.full_like(C, qmax, dtype=torch.int)
|
|
128
120
|
else:
|
|
121
|
+
# Force the range to include 0
|
|
122
|
+
rng = torch.where(0 < self.min_val, self.max_val, rng)
|
|
123
|
+
rng = torch.where(0 > self.max_val, -self.min_val, rng)
|
|
124
|
+
|
|
129
125
|
scale = torch.clamp(rng, min=eps) / (qmax - qmin)
|
|
130
126
|
zp = (
|
|
131
127
|
torch.round(qmin - self.min_val / scale).clamp(qmin, qmax).to(torch.int)
|
{tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/quantization/wrapq/wrappers/llama/quant_model.py
RENAMED
|
@@ -78,7 +78,7 @@ class QuantLlamaModel(QuantModuleBase):
|
|
|
78
78
|
|
|
79
79
|
new_list = nn.ModuleList()
|
|
80
80
|
for idx, layer in enumerate(model_fp.layers):
|
|
81
|
-
child_scope = f"{idx}"
|
|
81
|
+
child_scope = f"{fp_name}.layers.{idx}"
|
|
82
82
|
child_cfg = layers_cfg.child(child_scope) if layers_cfg is not None else None # type: ignore[union-attr]
|
|
83
83
|
new_list.append(
|
|
84
84
|
PTQWrapper(
|
|
@@ -54,12 +54,10 @@ class QuantLlamaForCausalLM(QuantModuleBase):
|
|
|
54
54
|
model_fp.lm_head, torch.nn.Module
|
|
55
55
|
)
|
|
56
56
|
|
|
57
|
-
self.model = PTQWrapper(
|
|
58
|
-
model_fp.model, qcfg=model_cfg, fp_name=f"{fp_name}.model"
|
|
59
|
-
)
|
|
57
|
+
self.model = PTQWrapper(model_fp.model, qcfg=model_cfg, fp_name=f"model")
|
|
60
58
|
|
|
61
59
|
self.lm_head = PTQWrapper(
|
|
62
|
-
model_fp.lm_head, qcfg=lm_head_cfg, fp_name=f"
|
|
60
|
+
model_fp.lm_head, qcfg=lm_head_cfg, fp_name=f"lm_head"
|
|
63
61
|
)
|
|
64
62
|
|
|
65
63
|
# `rotate_lm_head` exists only for SpinQuant-style custom models.
|
|
@@ -139,6 +139,7 @@ tico/quantization/wrapq/examples/llama/quantize_mlp.py
|
|
|
139
139
|
tico/quantization/wrapq/examples/nn/__init__.py
|
|
140
140
|
tico/quantization/wrapq/examples/nn/quantize_conv3d.py
|
|
141
141
|
tico/quantization/wrapq/examples/nn/quantize_conv3d_special_case.py
|
|
142
|
+
tico/quantization/wrapq/examples/nn/quantize_layernorm.py
|
|
142
143
|
tico/quantization/wrapq/examples/nn/quantize_linear.py
|
|
143
144
|
tico/quantization/wrapq/examples/qwen/__init__.py
|
|
144
145
|
tico/quantization/wrapq/examples/qwen/quantize_for_conditional_generation.py
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
__version__ = "0.2.0.dev260415"
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/passes/convert_sym_size_to_circle_shape.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/passes/decompose_fake_quantize_tensor_qparams.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/passes/legalize_predefined_layout_operators.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/passes/lower_to_resize_nearest_neighbor.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/quantization/algorithm/fpi_gptq/__init__.py
RENAMED
|
File without changes
|
{tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/quantization/algorithm/fpi_gptq/fpi_gptq.py
RENAMED
|
File without changes
|
{tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/quantization/algorithm/fpi_gptq/quantizer.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/quantization/algorithm/qwen3_vl_gptq/__init__.py
RENAMED
|
File without changes
|
{tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/quantization/algorithm/qwen3_vl_gptq/gptq.py
RENAMED
|
File without changes
|
{tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/quantization/algorithm/qwen3_vl_gptq/quantizer.py
RENAMED
|
File without changes
|
{tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/quantization/algorithm/qwen3_vl_gptq/utils.py
RENAMED
|
File without changes
|
{tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/quantization/algorithm/smoothquant/__init__.py
RENAMED
|
File without changes
|
{tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/quantization/algorithm/smoothquant/observer.py
RENAMED
|
File without changes
|
{tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/quantization/algorithm/smoothquant/quantizer.py
RENAMED
|
File without changes
|
|
File without changes
|
{tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/quantization/algorithm/spinquant/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/quantization/algorithm/spinquant/quantizer.py
RENAMED
|
File without changes
|
|
File without changes
|
{tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/quantization/algorithm/spinquant/spin_llama.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/quantization/evaluation/executor/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/quantization/evaluation/script/llm_tasks_eval.py
RENAMED
|
File without changes
|
{tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/quantization/evaluation/script/mini_vqa_eval.py
RENAMED
|
File without changes
|
|
File without changes
|
{tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/quantization/evaluation/vlm_eval_utils.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/quantization/passes/propagate_qparam_backward.py
RENAMED
|
File without changes
|
{tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/quantization/passes/propagate_qparam_forward.py
RENAMED
|
File without changes
|
|
File without changes
|
{tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/quantization/passes/remove_weight_dequant_op.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/quantization/wrapq/examples/compare_ppl.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{tico-0.2.0.dev260415 → tico-0.2.0.dev260416}/tico/quantization/wrapq/examples/llama/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|