tico 0.2.0.dev260415__tar.gz → 0.2.0.dev260417__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/PKG-INFO +1 -1
- tico-0.2.0.dev260417/tico/_version.py +1 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/quantization/algorithm/gptq/quantizer.py +93 -0
- tico-0.2.0.dev260417/tico/quantization/wrapq/examples/nn/quantize_layernorm.py +108 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/quantization/wrapq/examples/quantize_full_qmodel_with_gptq.py +46 -8
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/quantization/wrapq/observers/affine_base.py +14 -18
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/quantization/wrapq/wrappers/llama/quant_model.py +1 -1
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/quantization/wrapq/wrappers/llama/quant_model_for_causal_lm.py +2 -4
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico.egg-info/PKG-INFO +1 -1
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico.egg-info/SOURCES.txt +1 -0
- tico-0.2.0.dev260415/tico/_version.py +0 -1
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/LICENSE +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/README.md +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/pyproject.toml +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/setup.cfg +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/__init__.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/config/__init__.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/config/base.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/config/factory.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/config/v1.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/experimental/__init__.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/interpreter/__init__.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/interpreter/infer.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/interpreter/interpreter.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/passes/__init__.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/passes/cast_aten_where_arg_type.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/passes/cast_clamp_mixed_type_args.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/passes/cast_mixed_type_args.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/passes/const_prop_pass.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/passes/convert_conv1d_to_conv2d.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/passes/convert_conv3d_to_conv2d.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/passes/convert_expand_to_slice_cat.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/passes/convert_layout_op_to_reshape.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/passes/convert_matmul_to_linear.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/passes/convert_repeat_to_expand_copy.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/passes/convert_sym_size_to_circle_shape.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/passes/convert_to_relu6.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/passes/decompose_addmm.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/passes/decompose_batch_norm.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/passes/decompose_fake_quantize.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/passes/decompose_fake_quantize_tensor_qparams.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/passes/decompose_group_norm.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/passes/decompose_grouped_conv2d.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/passes/decompose_slice_scatter.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/passes/extract_dtype_kwargs.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/passes/fill_meta_val.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/passes/fuse_leading_unsqueeze_reshape.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/passes/fuse_redundant_reshape_to_mean.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/passes/legalize_causal_mask_value.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/passes/legalize_predefined_layout_operators.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/passes/lower_copy.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/passes/lower_pow2_to_mul.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/passes/lower_to_resize_nearest_neighbor.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/passes/lower_to_slice.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/passes/merge_consecutive_cat.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/passes/ops.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/passes/remove_nop.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/passes/remove_redundant_assert_nodes.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/passes/remove_redundant_expand.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/passes/remove_redundant_permute.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/passes/remove_redundant_reshape.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/passes/remove_redundant_slice.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/passes/remove_redundant_to_copy.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/passes/restore_linear.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/passes/segment_index_select.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/pt2_to_circle.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/quantization/__init__.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/quantization/algorithm/__init__.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/quantization/algorithm/fpi_gptq/__init__.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/quantization/algorithm/fpi_gptq/fpi_gptq.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/quantization/algorithm/fpi_gptq/quantizer.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/quantization/algorithm/gptq/__init__.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/quantization/algorithm/gptq/gptq.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/quantization/algorithm/gptq/quant.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/quantization/algorithm/gptq/utils.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/quantization/algorithm/qwen3_vl_gptq/__init__.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/quantization/algorithm/qwen3_vl_gptq/gptq.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/quantization/algorithm/qwen3_vl_gptq/quantizer.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/quantization/algorithm/qwen3_vl_gptq/utils.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/quantization/algorithm/smoothquant/__init__.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/quantization/algorithm/smoothquant/observer.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/quantization/algorithm/smoothquant/quantizer.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/quantization/algorithm/smoothquant/smooth_quant.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/quantization/algorithm/spinquant/__init__.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/quantization/algorithm/spinquant/fuse_norm_utils.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/quantization/algorithm/spinquant/hadamard_utils.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/quantization/algorithm/spinquant/quantizer.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/quantization/algorithm/spinquant/rotation_utils.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/quantization/algorithm/spinquant/spin_llama.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/quantization/config/__init__.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/quantization/config/base.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/quantization/config/builders.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/quantization/config/fpi_gptq.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/quantization/config/gptq.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/quantization/config/ptq.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/quantization/config/qwen3_vl_gptq.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/quantization/config/smoothquant.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/quantization/config/spinquant.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/quantization/config/utils.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/quantization/evaluation/__init__.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/quantization/evaluation/backend.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/quantization/evaluation/evaluate.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/quantization/evaluation/executor/__init__.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/quantization/evaluation/executor/backend_executor.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/quantization/evaluation/executor/circle_executor.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/quantization/evaluation/executor/triv24_executor.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/quantization/evaluation/metric.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/quantization/evaluation/script/llm_tasks_eval.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/quantization/evaluation/script/mini_vqa_eval.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/quantization/evaluation/utils.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/quantization/evaluation/vlm_eval_utils.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/quantization/passes/__init__.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/quantization/passes/fold_quant_ops.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/quantization/passes/insert_quantize_on_dtype_mismatch.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/quantization/passes/propagate_qparam_backward.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/quantization/passes/propagate_qparam_forward.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/quantization/passes/quantize_bias.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/quantization/passes/remove_weight_dequant_op.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/quantization/public_interface.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/quantization/quantizer.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/quantization/quantizer_registry.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/quantization/wrapq/__init__.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/quantization/wrapq/dtypes.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/quantization/wrapq/examples/__init__.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/quantization/wrapq/examples/compare_ppl.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/quantization/wrapq/examples/debug_quant_outputs.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/quantization/wrapq/examples/evaluate_fk_llama_model.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/quantization/wrapq/examples/llama/__init__.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/quantization/wrapq/examples/llama/quantize_attn_decode.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/quantization/wrapq/examples/llama/quantize_attn_prefill.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/quantization/wrapq/examples/llama/quantize_decoder_layer_decode.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/quantization/wrapq/examples/llama/quantize_decoder_layer_prefill.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/quantization/wrapq/examples/llama/quantize_mlp.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/quantization/wrapq/examples/nn/__init__.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/quantization/wrapq/examples/nn/quantize_conv3d.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/quantization/wrapq/examples/nn/quantize_conv3d_special_case.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/quantization/wrapq/examples/nn/quantize_linear.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/quantization/wrapq/examples/quantize_full_vlm_model_with_gptq.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/quantization/wrapq/examples/quantize_qwen3_vl_with_gptq.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/quantization/wrapq/examples/quantize_with_gptq.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/quantization/wrapq/examples/qwen/__init__.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/quantization/wrapq/examples/qwen/quantize_for_conditional_generation.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/quantization/wrapq/examples/qwen/quantize_model.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/quantization/wrapq/examples/qwen/quantize_text_attn.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/quantization/wrapq/examples/qwen/quantize_text_decoder_layer.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/quantization/wrapq/examples/qwen/quantize_text_mlp.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/quantization/wrapq/examples/qwen/quantize_text_model.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/quantization/wrapq/examples/qwen/quantize_vision_attn.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/quantization/wrapq/examples/qwen/quantize_vision_block.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/quantization/wrapq/examples/qwen/quantize_vision_mlp.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/quantization/wrapq/examples/qwen/quantize_vision_model.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/quantization/wrapq/examples/qwen/quantize_vision_patch_embed.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/quantization/wrapq/examples/qwen/quantize_vision_patch_merger.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/quantization/wrapq/examples/qwen/trace_qwen.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/quantization/wrapq/examples/static_llama_layer_runtime.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/quantization/wrapq/mode.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/quantization/wrapq/observers/__init__.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/quantization/wrapq/observers/base.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/quantization/wrapq/observers/ema.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/quantization/wrapq/observers/identity.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/quantization/wrapq/observers/minmax.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/quantization/wrapq/observers/mx.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/quantization/wrapq/qscheme.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/quantization/wrapq/quantizer.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/quantization/wrapq/utils/__init__.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/quantization/wrapq/utils/check_missing_qparam.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/quantization/wrapq/utils/introspection.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/quantization/wrapq/utils/metrics.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/quantization/wrapq/utils/reduce_utils.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/quantization/wrapq/utils/version.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/quantization/wrapq/wrap_helper.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/quantization/wrapq/wrappers/__init__.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/quantization/wrapq/wrappers/fairseq/__init__.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/quantization/wrapq/wrappers/fairseq/decoder_export_single_step.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/quantization/wrapq/wrappers/fairseq/quant_decoder.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/quantization/wrapq/wrappers/fairseq/quant_decoder_layer.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/quantization/wrapq/wrappers/fairseq/quant_encoder.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/quantization/wrapq/wrappers/fairseq/quant_encoder_layer.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/quantization/wrapq/wrappers/fairseq/quant_mha.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/quantization/wrapq/wrappers/llama/__init__.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/quantization/wrapq/wrappers/llama/export_adapters.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/quantization/wrapq/wrappers/llama/quant_attention.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/quantization/wrapq/wrappers/llama/quant_decoder_layer.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/quantization/wrapq/wrappers/llama/quant_mlp.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/quantization/wrapq/wrappers/nn/__init__.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/quantization/wrapq/wrappers/nn/quant_conv3d.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/quantization/wrapq/wrappers/nn/quant_conv3d_decomposed.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/quantization/wrapq/wrappers/nn/quant_embedding.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/quantization/wrapq/wrappers/nn/quant_layernorm.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/quantization/wrapq/wrappers/nn/quant_linear.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/quantization/wrapq/wrappers/nn/quant_silu.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/quantization/wrapq/wrappers/ops/__init__.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/quantization/wrapq/wrappers/ops/quant_rmsnorm.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/quantization/wrapq/wrappers/ptq_wrapper.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/quantization/wrapq/wrappers/quant_elementwise.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/quantization/wrapq/wrappers/quant_module_base.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/quantization/wrapq/wrappers/qwen_vl/quant_for_conditional_generation.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/quantization/wrapq/wrappers/qwen_vl/quant_model.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/quantization/wrapq/wrappers/qwen_vl/quant_text_attn.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/quantization/wrapq/wrappers/qwen_vl/quant_text_decoder_layer.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/quantization/wrapq/wrappers/qwen_vl/quant_text_mlp.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/quantization/wrapq/wrappers/qwen_vl/quant_text_model.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/quantization/wrapq/wrappers/qwen_vl/quant_vision_attn.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/quantization/wrapq/wrappers/qwen_vl/quant_vision_block.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/quantization/wrapq/wrappers/qwen_vl/quant_vision_mlp.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/quantization/wrapq/wrappers/qwen_vl/quant_vision_model.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/quantization/wrapq/wrappers/qwen_vl/quant_vision_patch_embed.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/quantization/wrapq/wrappers/qwen_vl/quant_vision_patch_merger.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/quantization/wrapq/wrappers/registry.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/serialize/__init__.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/serialize/circle_graph.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/serialize/circle_mapping.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/serialize/circle_serializer.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/serialize/operators/__init__.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/serialize/operators/adapters/__init__.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/serialize/operators/adapters/llama_rmsnorm.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/serialize/operators/adapters/onert/__init__.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/serialize/operators/adapters/onert/llama_attention.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/serialize/operators/hashable_opcode.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/serialize/operators/node_visitor.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/serialize/operators/op_abs.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/serialize/operators/op_add.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/serialize/operators/op_alias_copy.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/serialize/operators/op_any.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/serialize/operators/op_arange_start_step.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/serialize/operators/op_argmax.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/serialize/operators/op_attention.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/serialize/operators/op_avg_pool2d.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/serialize/operators/op_bmm.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/serialize/operators/op_cat.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/serialize/operators/op_circle_shape.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/serialize/operators/op_clamp.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/serialize/operators/op_clone.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/serialize/operators/op_constant_pad_nd.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/serialize/operators/op_conv2d.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/serialize/operators/op_cos.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/serialize/operators/op_cumsum.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/serialize/operators/op_depthwise_conv2d.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/serialize/operators/op_dequantize_per_channel.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/serialize/operators/op_dequantize_per_tensor.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/serialize/operators/op_div.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/serialize/operators/op_embedding.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/serialize/operators/op_eq.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/serialize/operators/op_exp.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/serialize/operators/op_expand.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/serialize/operators/op_full.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/serialize/operators/op_full_like.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/serialize/operators/op_ge.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/serialize/operators/op_gelu.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/serialize/operators/op_gt.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/serialize/operators/op_index.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/serialize/operators/op_index_select.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/serialize/operators/op_instance_norm.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/serialize/operators/op_le.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/serialize/operators/op_leaky_relu.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/serialize/operators/op_linear.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/serialize/operators/op_log.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/serialize/operators/op_log1p.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/serialize/operators/op_logical_and.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/serialize/operators/op_logical_not.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/serialize/operators/op_lt.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/serialize/operators/op_max_dim.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/serialize/operators/op_max_pool2d_with_indices.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/serialize/operators/op_maximum.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/serialize/operators/op_mean.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/serialize/operators/op_minimum.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/serialize/operators/op_mm.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/serialize/operators/op_mul.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/serialize/operators/op_ne.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/serialize/operators/op_neg.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/serialize/operators/op_permute.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/serialize/operators/op_pow.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/serialize/operators/op_prelu.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/serialize/operators/op_quantize_per_tensor.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/serialize/operators/op_reciprocal.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/serialize/operators/op_relu.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/serialize/operators/op_relu6.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/serialize/operators/op_repeat.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/serialize/operators/op_reshape.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/serialize/operators/op_resize_nearest_neighbor.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/serialize/operators/op_rmsnorm.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/serialize/operators/op_round.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/serialize/operators/op_rsqrt.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/serialize/operators/op_scalar_tensor.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/serialize/operators/op_select_copy.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/serialize/operators/op_sigmoid.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/serialize/operators/op_sin.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/serialize/operators/op_slice.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/serialize/operators/op_softmax.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/serialize/operators/op_split_with_sizes.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/serialize/operators/op_sqrt.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/serialize/operators/op_squeeze.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/serialize/operators/op_sub.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/serialize/operators/op_sum.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/serialize/operators/op_tanh.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/serialize/operators/op_to_copy.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/serialize/operators/op_transpose_conv.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/serialize/operators/op_unsqueeze.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/serialize/operators/op_view.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/serialize/operators/op_where.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/serialize/operators/utils.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/serialize/pack.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/serialize/quant_param.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/utils/__init__.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/utils/compat/__init__.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/utils/compat/torch.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/utils/compat/transformers.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/utils/convert.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/utils/define.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/utils/diff_graph.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/utils/dtype.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/utils/errors.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/utils/graph.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/utils/installed_packages.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/utils/logging.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/utils/model.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/utils/mx/__init__.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/utils/mx/elemwise_ops.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/utils/mx/formats.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/utils/mx/mx_ops.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/utils/padding.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/utils/passes.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/utils/pytree_utils.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/utils/record_input.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/utils/register_custom_op.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/utils/serialize.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/utils/signature.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/utils/trace_decorators.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/utils/utils.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/utils/validate_args_kwargs.py +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico.egg-info/dependency_links.txt +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico.egg-info/entry_points.txt +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico.egg-info/requires.txt +0 -0
- {tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico.egg-info/top_level.txt +0 -0
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__version__ = "0.2.0.dev260417"
|
|
@@ -361,6 +361,13 @@ class GPTQQuantizer(BaseQuantizer):
|
|
|
361
361
|
if torch.cuda.is_available():
|
|
362
362
|
torch.cuda.empty_cache()
|
|
363
363
|
|
|
364
|
+
if (
|
|
365
|
+
hasattr(model, "model")
|
|
366
|
+
and hasattr(model.model, "norm")
|
|
367
|
+
and hasattr(model, "lm_head")
|
|
368
|
+
): # quantize lm_head
|
|
369
|
+
self._quantize_lm_head(model, quantizers)
|
|
370
|
+
|
|
364
371
|
# Restore the original cache configuration.
|
|
365
372
|
if orig_use_cache is not None:
|
|
366
373
|
model.config.use_cache = orig_use_cache
|
|
@@ -373,3 +380,89 @@ class GPTQQuantizer(BaseQuantizer):
|
|
|
373
380
|
model.quantizers = quantizers
|
|
374
381
|
|
|
375
382
|
return model
|
|
383
|
+
|
|
384
|
+
def _quantize_lm_head(self, model, quantizers):
|
|
385
|
+
gptq_conf = self.config
|
|
386
|
+
assert isinstance(gptq_conf, GPTQConfig)
|
|
387
|
+
# TODO reduce code duplication with layer-wise quantization
|
|
388
|
+
|
|
389
|
+
# prepare data for lm_head
|
|
390
|
+
batch_num = self.num_batches
|
|
391
|
+
device = next(model.parameters()).device
|
|
392
|
+
for batch_idx in tqdm(
|
|
393
|
+
range(batch_num),
|
|
394
|
+
desc=f"[model.norm] re-forward",
|
|
395
|
+
leave=False,
|
|
396
|
+
unit="batch",
|
|
397
|
+
disable=not gptq_conf.show_progress,
|
|
398
|
+
):
|
|
399
|
+
hidden_states = gather_single_batch_from_list(self.cache_args, batch_idx)[0]
|
|
400
|
+
hidden_states = move_to_device(hidden_states, device)
|
|
401
|
+
|
|
402
|
+
hidden_states = model.model.norm(hidden_states)
|
|
403
|
+
if len(self.cache_args) > 0:
|
|
404
|
+
self.cache_args[0][batch_idx] = move_to_cpu(hidden_states)
|
|
405
|
+
|
|
406
|
+
layer = model.lm_head
|
|
407
|
+
gptq = GPTQ(layer)
|
|
408
|
+
full_module_name = "lm_head"
|
|
409
|
+
weight_bits = self._resolve_weight_bits(
|
|
410
|
+
gptq_conf,
|
|
411
|
+
full_module_name=full_module_name,
|
|
412
|
+
local_module_name="lm_head",
|
|
413
|
+
)
|
|
414
|
+
if (
|
|
415
|
+
gptq_conf.sensitivity is not None
|
|
416
|
+
and isinstance(gptq_conf.sensitivity, dict)
|
|
417
|
+
and full_module_name in gptq_conf.sensitivity
|
|
418
|
+
):
|
|
419
|
+
cur_sensitivity = gptq_conf.sensitivity[full_module_name]
|
|
420
|
+
else:
|
|
421
|
+
cur_sensitivity = None
|
|
422
|
+
gptq.quantizer.configure(
|
|
423
|
+
bits=weight_bits,
|
|
424
|
+
perchannel=gptq_conf.perchannel,
|
|
425
|
+
sym=gptq_conf.symmetric,
|
|
426
|
+
mse=gptq_conf.mse,
|
|
427
|
+
sensitivity=cur_sensitivity,
|
|
428
|
+
)
|
|
429
|
+
|
|
430
|
+
# Hook to collect (inp, out) for GPTQ
|
|
431
|
+
def add_batch():
|
|
432
|
+
def _hook(_, inp, out):
|
|
433
|
+
gptq.add_batch(inp[0].data, out.data)
|
|
434
|
+
|
|
435
|
+
return _hook
|
|
436
|
+
|
|
437
|
+
handles = [layer.register_forward_hook(add_batch())]
|
|
438
|
+
|
|
439
|
+
# Run layer forward over all cached batches to build Hessian/statistics
|
|
440
|
+
device = next(layer.parameters()).device # in case lm_head is located on cpu
|
|
441
|
+
for batch_idx in tqdm(
|
|
442
|
+
range(batch_num),
|
|
443
|
+
desc=f"[lm_head] collecting",
|
|
444
|
+
leave=False,
|
|
445
|
+
unit="batch",
|
|
446
|
+
disable=not gptq_conf.show_progress,
|
|
447
|
+
):
|
|
448
|
+
hidden_states = gather_single_batch_from_list(self.cache_args, batch_idx)[0]
|
|
449
|
+
hidden_states = move_to_device(hidden_states, device)
|
|
450
|
+
|
|
451
|
+
layer(hidden_states)
|
|
452
|
+
|
|
453
|
+
# Remove handles
|
|
454
|
+
for h in handles:
|
|
455
|
+
h.remove()
|
|
456
|
+
|
|
457
|
+
# Quantize
|
|
458
|
+
if gptq_conf.verbose:
|
|
459
|
+
print(f"[lm_head] -> Quantizing ...")
|
|
460
|
+
gptq.fasterquant(
|
|
461
|
+
percdamp=gptq_conf.percdamp,
|
|
462
|
+
groupsize=gptq_conf.groupsize,
|
|
463
|
+
actorder=gptq_conf.actorder,
|
|
464
|
+
static_groups=gptq_conf.static_groups,
|
|
465
|
+
verbose=gptq_conf.verbose,
|
|
466
|
+
)
|
|
467
|
+
quantizers[f"lm_head"] = gptq.quantizer
|
|
468
|
+
gptq.free()
|
|
@@ -0,0 +1,108 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# Copyright (c) 2026 Samsung Electronics Co., Ltd. All Rights Reserved
|
|
3
|
+
#
|
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
# you may not use this file except in compliance with the License.
|
|
6
|
+
# You may obtain a copy of the License at
|
|
7
|
+
#
|
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
#
|
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
# See the License for the specific language governing permissions and
|
|
14
|
+
# limitations under the License.
|
|
15
|
+
|
|
16
|
+
import copy
|
|
17
|
+
import sys
|
|
18
|
+
|
|
19
|
+
import torch
|
|
20
|
+
import torch.nn as nn
|
|
21
|
+
|
|
22
|
+
import tico
|
|
23
|
+
import tico.quantization
|
|
24
|
+
import tico.quantization.config.ptq
|
|
25
|
+
from tico.quantization.evaluation.metric import compute_peir
|
|
26
|
+
from tico.quantization.evaluation.utils import plot_two_outputs
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
torch.manual_seed(123)
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def generate_calibration_data(
|
|
33
|
+
num_batches: int,
|
|
34
|
+
batch_size: int,
|
|
35
|
+
normalized_shape: tuple,
|
|
36
|
+
) -> list:
|
|
37
|
+
"""Generate calibration data for PTQ"""
|
|
38
|
+
calibration_data = []
|
|
39
|
+
for i in range(num_batches):
|
|
40
|
+
x = torch.randn(batch_size, *normalized_shape)
|
|
41
|
+
calibration_data.append(x)
|
|
42
|
+
return calibration_data
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def main():
|
|
46
|
+
# Create LayerNorm model
|
|
47
|
+
# Using a common configuration for transformer models
|
|
48
|
+
normalized_shape = (768,) # Hidden dimension size
|
|
49
|
+
model = nn.LayerNorm(
|
|
50
|
+
normalized_shape=normalized_shape,
|
|
51
|
+
eps=1e-5,
|
|
52
|
+
elementwise_affine=True,
|
|
53
|
+
)
|
|
54
|
+
orig_model = copy.deepcopy(model)
|
|
55
|
+
model.eval()
|
|
56
|
+
|
|
57
|
+
# Generate calibration data
|
|
58
|
+
# Input shape: (batch_size, *normalized_shape)
|
|
59
|
+
# Example: (10, 768) - 10 samples, 768 features
|
|
60
|
+
batch_size = 10
|
|
61
|
+
calibration_data = generate_calibration_data(
|
|
62
|
+
num_batches=5,
|
|
63
|
+
batch_size=batch_size,
|
|
64
|
+
normalized_shape=normalized_shape,
|
|
65
|
+
)
|
|
66
|
+
example_input = calibration_data[0]
|
|
67
|
+
|
|
68
|
+
# Configure PTQ
|
|
69
|
+
ptq_config = tico.quantization.config.ptq.PTQConfig()
|
|
70
|
+
|
|
71
|
+
# Prepare the model for quantization
|
|
72
|
+
prepared_model = tico.quantization.prepare(
|
|
73
|
+
model, ptq_config, inplace=True # Transform the model in place
|
|
74
|
+
)
|
|
75
|
+
|
|
76
|
+
# Calibrate the model (collect statistics)
|
|
77
|
+
with torch.no_grad():
|
|
78
|
+
for i, batch in enumerate(calibration_data):
|
|
79
|
+
prepared_model(batch)
|
|
80
|
+
|
|
81
|
+
# Convert to quantized model
|
|
82
|
+
quantized_model = tico.quantization.convert(prepared_model, inplace=True)
|
|
83
|
+
|
|
84
|
+
# Compute PEIR (Peak Error-to-Interval Ratio) between quantized model and original model
|
|
85
|
+
with torch.no_grad():
|
|
86
|
+
quant_out = quantized_model(example_input)
|
|
87
|
+
fp_out = orig_model(example_input)
|
|
88
|
+
|
|
89
|
+
print(f"Input shape: {example_input.shape}")
|
|
90
|
+
print(f"Output shape (FP32): {fp_out.shape}")
|
|
91
|
+
print(f"Output shape (Quantized): {quant_out.shape}")
|
|
92
|
+
print(f"┌───────────── Quantization Error Summary ─────────────")
|
|
93
|
+
print(f"│ Mean |diff|: {(quant_out - fp_out).abs().mean().item():.6f}")
|
|
94
|
+
print(f"│ PEIR : {compute_peir(fp_out, quant_out) * 100:.6f} %")
|
|
95
|
+
print(f"└──────────────────────────────────────────────────────")
|
|
96
|
+
print(plot_two_outputs(fp_out, quant_out))
|
|
97
|
+
|
|
98
|
+
# Convert to Circle format
|
|
99
|
+
circle_model = tico.convert(quantized_model.eval(), (example_input,))
|
|
100
|
+
|
|
101
|
+
# Save the Circle model
|
|
102
|
+
filename = "quantized_layernorm.circle"
|
|
103
|
+
circle_model.save(filename)
|
|
104
|
+
print(f"Circle model saved as '{filename}'")
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
if __name__ == "__main__":
|
|
108
|
+
main()
|
|
@@ -72,26 +72,59 @@ def inject_gptq_qparams(
|
|
|
72
72
|
root: torch.nn.Module,
|
|
73
73
|
gptq_quantizers: dict[str, Any], # {fp_name: quantizer}
|
|
74
74
|
weight_obs_name: str = "weight",
|
|
75
|
+
*,
|
|
76
|
+
verbose: bool = False,
|
|
75
77
|
):
|
|
76
78
|
"""
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
79
|
+
Inject GPTQ (scale, zero-point) into PTQ observers.
|
|
80
|
+
|
|
81
|
+
When verbose=True, prints a summary of matched / missed / unused entries.
|
|
80
82
|
"""
|
|
83
|
+
seen = set()
|
|
84
|
+
missed_modules = []
|
|
85
|
+
|
|
81
86
|
for m in root.modules():
|
|
82
87
|
if not isinstance(m, QuantModuleBase):
|
|
83
88
|
continue
|
|
84
89
|
if m.fp_name is None:
|
|
85
90
|
continue
|
|
91
|
+
|
|
86
92
|
quantizer = gptq_quantizers.get(m.fp_name)
|
|
87
|
-
if quantizer is None:
|
|
88
|
-
continue
|
|
89
93
|
obs = m.get_observer(weight_obs_name)
|
|
94
|
+
|
|
95
|
+
# Only care about modules that should have weight observers
|
|
90
96
|
if obs is None:
|
|
91
97
|
continue
|
|
98
|
+
|
|
99
|
+
if quantizer is None:
|
|
100
|
+
missed_modules.append(m.fp_name)
|
|
101
|
+
continue
|
|
102
|
+
|
|
92
103
|
assert isinstance(obs, AffineObserverBase)
|
|
93
|
-
# GPTQ quantizer attributes
|
|
94
104
|
obs.load_qparams(quantizer.scale, quantizer.zero, lock=True)
|
|
105
|
+
seen.add(m.fp_name)
|
|
106
|
+
|
|
107
|
+
unused = set(gptq_quantizers.keys()) - seen
|
|
108
|
+
|
|
109
|
+
if verbose:
|
|
110
|
+
print("\n[GPTQ → PTQ injection summary]")
|
|
111
|
+
print(f" matched : {len(seen)}")
|
|
112
|
+
print(f" missed : {len(missed_modules)}")
|
|
113
|
+
print(f" unused : {len(unused)}")
|
|
114
|
+
|
|
115
|
+
# Print samples (not all, to avoid spam)
|
|
116
|
+
def _print_sample(title, items):
|
|
117
|
+
items = list(items)
|
|
118
|
+
if not items:
|
|
119
|
+
return
|
|
120
|
+
print(f"\n {title}:")
|
|
121
|
+
for name in items[:10]:
|
|
122
|
+
print(f" - {name}")
|
|
123
|
+
if len(items) > 10:
|
|
124
|
+
print(f" ... and {len(items) - 10} more")
|
|
125
|
+
|
|
126
|
+
_print_sample("missed modules", missed_modules)
|
|
127
|
+
_print_sample("unused GPTQ entries", unused)
|
|
95
128
|
|
|
96
129
|
|
|
97
130
|
# -------------------------------------------------------------------------
|
|
@@ -172,13 +205,13 @@ def quantize_using_PTQ(q_m, calib_inputs, args):
|
|
|
172
205
|
|
|
173
206
|
# Overwrite weight observers with GPTQ statistics
|
|
174
207
|
if hasattr(q_m, "quantizers") and isinstance(q_m.quantizers, dict):
|
|
175
|
-
inject_gptq_qparams(q_m, q_m.quantizers)
|
|
208
|
+
inject_gptq_qparams(q_m, q_m.quantizers, verbose=args.verbose)
|
|
176
209
|
elif (
|
|
177
210
|
hasattr(q_m, "wrapped")
|
|
178
211
|
and hasattr(q_m.wrapped, "quantizers")
|
|
179
212
|
and isinstance(q_m.wrapped.quantizers, dict)
|
|
180
213
|
):
|
|
181
|
-
inject_gptq_qparams(q_m.wrapped, q_m.wrapped.quantizers)
|
|
214
|
+
inject_gptq_qparams(q_m.wrapped, q_m.wrapped.quantizers, verbose=args.verbose)
|
|
182
215
|
else:
|
|
183
216
|
print(
|
|
184
217
|
"[Warn] q_m.quantizers not found or not a dict; skipping GPTQ qparam injection."
|
|
@@ -376,6 +409,11 @@ def main():
|
|
|
376
409
|
type=str,
|
|
377
410
|
default=None,
|
|
378
411
|
)
|
|
412
|
+
parser.add_argument(
|
|
413
|
+
"--verbose",
|
|
414
|
+
action="store_true",
|
|
415
|
+
help="Verbose logging for debugging (e.g., GPTQ injection coverage)",
|
|
416
|
+
)
|
|
379
417
|
args = parser.parse_args()
|
|
380
418
|
print(args)
|
|
381
419
|
|
{tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/quantization/wrapq/observers/affine_base.py
RENAMED
|
@@ -106,26 +106,22 @@ class AffineObserverBase(ObserverBase):
|
|
|
106
106
|
self._cached_scale, self._cached_zp = scale, zp
|
|
107
107
|
return scale, zp
|
|
108
108
|
|
|
109
|
-
if self.channel_axis is None:
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
zp = torch.zeros_like(C, dtype=torch.int)
|
|
118
|
-
else:
|
|
119
|
-
scale = torch.clamp(C.abs(), min=eps)
|
|
120
|
-
zp = torch.full_like(C, qmax, dtype=torch.int)
|
|
109
|
+
if (self.channel_axis is None) and torch.all(rng.abs() < 1e-8):
|
|
110
|
+
C = self.min_val
|
|
111
|
+
if torch.allclose(C, torch.zeros_like(C)):
|
|
112
|
+
scale = torch.ones_like(C)
|
|
113
|
+
zp = torch.zeros_like(C, dtype=torch.int)
|
|
114
|
+
elif (C > 0).all():
|
|
115
|
+
scale = torch.clamp(C, min=eps)
|
|
116
|
+
zp = torch.zeros_like(C, dtype=torch.int)
|
|
121
117
|
else:
|
|
122
|
-
scale = torch.clamp(
|
|
123
|
-
zp = (
|
|
124
|
-
torch.round(qmin - self.min_val / scale)
|
|
125
|
-
.clamp(qmin, qmax)
|
|
126
|
-
.to(torch.int)
|
|
127
|
-
)
|
|
118
|
+
scale = torch.clamp(C.abs(), min=eps)
|
|
119
|
+
zp = torch.full_like(C, qmax, dtype=torch.int)
|
|
128
120
|
else:
|
|
121
|
+
# Force the range to include 0
|
|
122
|
+
rng = torch.where(0 < self.min_val, self.max_val, rng)
|
|
123
|
+
rng = torch.where(0 > self.max_val, -self.min_val, rng)
|
|
124
|
+
|
|
129
125
|
scale = torch.clamp(rng, min=eps) / (qmax - qmin)
|
|
130
126
|
zp = (
|
|
131
127
|
torch.round(qmin - self.min_val / scale).clamp(qmin, qmax).to(torch.int)
|
{tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/quantization/wrapq/wrappers/llama/quant_model.py
RENAMED
|
@@ -78,7 +78,7 @@ class QuantLlamaModel(QuantModuleBase):
|
|
|
78
78
|
|
|
79
79
|
new_list = nn.ModuleList()
|
|
80
80
|
for idx, layer in enumerate(model_fp.layers):
|
|
81
|
-
child_scope = f"{idx}"
|
|
81
|
+
child_scope = f"{fp_name}.layers.{idx}"
|
|
82
82
|
child_cfg = layers_cfg.child(child_scope) if layers_cfg is not None else None # type: ignore[union-attr]
|
|
83
83
|
new_list.append(
|
|
84
84
|
PTQWrapper(
|
|
@@ -54,12 +54,10 @@ class QuantLlamaForCausalLM(QuantModuleBase):
|
|
|
54
54
|
model_fp.lm_head, torch.nn.Module
|
|
55
55
|
)
|
|
56
56
|
|
|
57
|
-
self.model = PTQWrapper(
|
|
58
|
-
model_fp.model, qcfg=model_cfg, fp_name=f"{fp_name}.model"
|
|
59
|
-
)
|
|
57
|
+
self.model = PTQWrapper(model_fp.model, qcfg=model_cfg, fp_name=f"model")
|
|
60
58
|
|
|
61
59
|
self.lm_head = PTQWrapper(
|
|
62
|
-
model_fp.lm_head, qcfg=lm_head_cfg, fp_name=f"
|
|
60
|
+
model_fp.lm_head, qcfg=lm_head_cfg, fp_name=f"lm_head"
|
|
63
61
|
)
|
|
64
62
|
|
|
65
63
|
# `rotate_lm_head` exists only for SpinQuant-style custom models.
|
|
@@ -139,6 +139,7 @@ tico/quantization/wrapq/examples/llama/quantize_mlp.py
|
|
|
139
139
|
tico/quantization/wrapq/examples/nn/__init__.py
|
|
140
140
|
tico/quantization/wrapq/examples/nn/quantize_conv3d.py
|
|
141
141
|
tico/quantization/wrapq/examples/nn/quantize_conv3d_special_case.py
|
|
142
|
+
tico/quantization/wrapq/examples/nn/quantize_layernorm.py
|
|
142
143
|
tico/quantization/wrapq/examples/nn/quantize_linear.py
|
|
143
144
|
tico/quantization/wrapq/examples/qwen/__init__.py
|
|
144
145
|
tico/quantization/wrapq/examples/qwen/quantize_for_conditional_generation.py
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
__version__ = "0.2.0.dev260415"
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/passes/convert_sym_size_to_circle_shape.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/passes/decompose_fake_quantize_tensor_qparams.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/passes/legalize_predefined_layout_operators.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/passes/lower_to_resize_nearest_neighbor.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/quantization/algorithm/fpi_gptq/__init__.py
RENAMED
|
File without changes
|
{tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/quantization/algorithm/fpi_gptq/fpi_gptq.py
RENAMED
|
File without changes
|
{tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/quantization/algorithm/fpi_gptq/quantizer.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/quantization/algorithm/qwen3_vl_gptq/__init__.py
RENAMED
|
File without changes
|
{tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/quantization/algorithm/qwen3_vl_gptq/gptq.py
RENAMED
|
File without changes
|
{tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/quantization/algorithm/qwen3_vl_gptq/quantizer.py
RENAMED
|
File without changes
|
{tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/quantization/algorithm/qwen3_vl_gptq/utils.py
RENAMED
|
File without changes
|
{tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/quantization/algorithm/smoothquant/__init__.py
RENAMED
|
File without changes
|
{tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/quantization/algorithm/smoothquant/observer.py
RENAMED
|
File without changes
|
{tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/quantization/algorithm/smoothquant/quantizer.py
RENAMED
|
File without changes
|
|
File without changes
|
{tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/quantization/algorithm/spinquant/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/quantization/algorithm/spinquant/quantizer.py
RENAMED
|
File without changes
|
|
File without changes
|
{tico-0.2.0.dev260415 → tico-0.2.0.dev260417}/tico/quantization/algorithm/spinquant/spin_llama.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|