tico 0.2.0.dev260507__tar.gz → 0.2.0.dev260511__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/PKG-INFO +1 -1
- tico-0.2.0.dev260511/tico/_version.py +1 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/quantization/algorithm/smoothquant/smooth_quant.py +104 -4
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/quantization/wrapq/examples/quantize_qwen3_vl_with_gptq.py +37 -4
- tico-0.2.0.dev260511/tico/quantization/wrapq/examples/static_llama_layer_runtime.py +1251 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/quantization/wrapq/wrappers/llama/quant_attention.py +15 -12
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico.egg-info/PKG-INFO +1 -1
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico.egg-info/SOURCES.txt +2 -2
- tico-0.2.0.dev260507/tico/_version.py +0 -1
- tico-0.2.0.dev260507/tico/quantization/wrapq/examples/static_llama_layer_runtime.py +0 -681
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/LICENSE +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/README.md +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/pyproject.toml +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/setup.cfg +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/__init__.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/config/__init__.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/config/base.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/config/factory.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/config/v1.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/experimental/__init__.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/interpreter/__init__.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/interpreter/infer.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/interpreter/interpreter.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/passes/__init__.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/passes/cast_aten_where_arg_type.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/passes/cast_clamp_mixed_type_args.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/passes/cast_mixed_type_args.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/passes/const_prop_pass.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/passes/convert_conv1d_to_conv2d.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/passes/convert_conv3d_to_conv2d.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/passes/convert_expand_to_slice_cat.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/passes/convert_layout_op_to_reshape.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/passes/convert_matmul_to_linear.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/passes/convert_repeat_to_expand_copy.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/passes/convert_sym_size_to_circle_shape.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/passes/convert_to_relu6.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/passes/decompose_addmm.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/passes/decompose_batch_norm.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/passes/decompose_fake_quantize.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/passes/decompose_fake_quantize_tensor_qparams.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/passes/decompose_group_norm.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/passes/decompose_grouped_conv2d.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/passes/decompose_slice_scatter.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/passes/eliminate_rank_round_trip_region.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/passes/extract_dtype_kwargs.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/passes/fill_meta_val.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/passes/fuse_leading_unsqueeze_reshape.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/passes/fuse_redundant_reshape_to_mean.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/passes/legalize_causal_mask_value.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/passes/legalize_predefined_layout_operators.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/passes/lower_copy.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/passes/lower_pow2_to_mul.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/passes/lower_to_resize_nearest_neighbor.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/passes/lower_to_slice.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/passes/merge_consecutive_cat.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/passes/ops.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/passes/remove_nop.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/passes/remove_redundant_assert_nodes.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/passes/remove_redundant_expand.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/passes/remove_redundant_permute.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/passes/remove_redundant_reshape.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/passes/remove_redundant_slice.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/passes/remove_redundant_to_copy.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/passes/restore_linear.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/passes/segment_index_select.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/pt2_to_circle.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/quantization/__init__.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/quantization/algorithm/__init__.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/quantization/algorithm/cle/__init__.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/quantization/algorithm/cle/cle.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/quantization/algorithm/cle/quantizer.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/quantization/algorithm/fpi_gptq/__init__.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/quantization/algorithm/fpi_gptq/fpi_gptq.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/quantization/algorithm/fpi_gptq/quantizer.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/quantization/algorithm/gptq/__init__.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/quantization/algorithm/gptq/gptq.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/quantization/algorithm/gptq/quant.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/quantization/algorithm/gptq/quantizer.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/quantization/algorithm/gptq/utils.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/quantization/algorithm/qwen3_vl_gptq/__init__.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/quantization/algorithm/qwen3_vl_gptq/gptq.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/quantization/algorithm/qwen3_vl_gptq/quantizer.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/quantization/algorithm/qwen3_vl_gptq/utils.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/quantization/algorithm/smoothquant/__init__.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/quantization/algorithm/smoothquant/observer.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/quantization/algorithm/smoothquant/quantizer.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/quantization/algorithm/spinquant/__init__.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/quantization/algorithm/spinquant/fuse_norm_utils.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/quantization/algorithm/spinquant/hadamard_utils.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/quantization/algorithm/spinquant/quantizer.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/quantization/algorithm/spinquant/rotation_utils.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/quantization/algorithm/spinquant/spin_llama.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/quantization/config/__init__.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/quantization/config/base.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/quantization/config/builders.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/quantization/config/cle.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/quantization/config/fpi_gptq.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/quantization/config/gptq.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/quantization/config/ptq.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/quantization/config/qwen3_vl_gptq.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/quantization/config/smoothquant.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/quantization/config/spinquant.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/quantization/config/utils.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/quantization/evaluation/__init__.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/quantization/evaluation/backend.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/quantization/evaluation/evaluate.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/quantization/evaluation/executor/__init__.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/quantization/evaluation/executor/backend_executor.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/quantization/evaluation/executor/circle_executor.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/quantization/evaluation/executor/triv24_executor.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/quantization/evaluation/metric.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/quantization/evaluation/mmlu_eval_utils.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/quantization/evaluation/script/llm_tasks_eval.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/quantization/evaluation/script/mini_vqa_eval.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/quantization/evaluation/utils.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/quantization/evaluation/vlm_eval_utils.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/quantization/passes/__init__.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/quantization/passes/fold_quant_ops.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/quantization/passes/insert_quantize_on_dtype_mismatch.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/quantization/passes/propagate_qparam_backward.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/quantization/passes/propagate_qparam_forward.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/quantization/passes/quantize_bias.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/quantization/passes/remove_weight_dequant_op.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/quantization/public_interface.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/quantization/quantizer.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/quantization/quantizer_registry.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/quantization/wrapq/__init__.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/quantization/wrapq/dtypes.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/quantization/wrapq/examples/__init__.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/quantization/wrapq/examples/compare_ppl.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/quantization/wrapq/examples/debug_quant_outputs.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/quantization/wrapq/examples/evaluate_fk_llama_model.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/quantization/wrapq/examples/llama/__init__.py +0 -0
- /tico-0.2.0.dev260507/tico/quantization/wrapq/examples/llama/quantize_attn_decode.py → /tico-0.2.0.dev260511/tico/quantization/wrapq/examples/llama/quantize_attention_decode.py +0 -0
- /tico-0.2.0.dev260507/tico/quantization/wrapq/examples/llama/quantize_attn_prefill.py → /tico-0.2.0.dev260511/tico/quantization/wrapq/examples/llama/quantize_attention_prefill.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/quantization/wrapq/examples/llama/quantize_decoder_layer_decode.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/quantization/wrapq/examples/llama/quantize_decoder_layer_prefill.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/quantization/wrapq/examples/llama/quantize_mlp.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/quantization/wrapq/examples/nn/__init__.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/quantization/wrapq/examples/nn/quantize_conv3d.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/quantization/wrapq/examples/nn/quantize_conv3d_special_case.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/quantization/wrapq/examples/nn/quantize_layernorm.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/quantization/wrapq/examples/nn/quantize_linear.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/quantization/wrapq/examples/quantize_full_qmodel_with_gptq.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/quantization/wrapq/examples/quantize_full_vlm_model_with_gptq.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/quantization/wrapq/examples/quantize_with_gptq.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/quantization/wrapq/examples/qwen/__init__.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/quantization/wrapq/examples/qwen/quantize_for_conditional_generation.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/quantization/wrapq/examples/qwen/quantize_model.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/quantization/wrapq/examples/qwen/quantize_text_attention.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/quantization/wrapq/examples/qwen/quantize_text_decoder_layer.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/quantization/wrapq/examples/qwen/quantize_text_mlp.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/quantization/wrapq/examples/qwen/quantize_text_model.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/quantization/wrapq/examples/qwen/quantize_vision_attention.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/quantization/wrapq/examples/qwen/quantize_vision_block.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/quantization/wrapq/examples/qwen/quantize_vision_mlp.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/quantization/wrapq/examples/qwen/quantize_vision_model.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/quantization/wrapq/examples/qwen/quantize_vision_patch_embed.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/quantization/wrapq/examples/qwen/quantize_vision_patch_merger.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/quantization/wrapq/examples/qwen/trace_qwen.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/quantization/wrapq/mode.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/quantization/wrapq/observers/__init__.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/quantization/wrapq/observers/affine_base.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/quantization/wrapq/observers/base.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/quantization/wrapq/observers/ema.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/quantization/wrapq/observers/identity.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/quantization/wrapq/observers/minmax.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/quantization/wrapq/observers/mx.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/quantization/wrapq/qscheme.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/quantization/wrapq/quantizer.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/quantization/wrapq/utils/__init__.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/quantization/wrapq/utils/check_missing_qparam.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/quantization/wrapq/utils/introspection.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/quantization/wrapq/utils/metrics.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/quantization/wrapq/utils/reduce_utils.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/quantization/wrapq/utils/utils.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/quantization/wrapq/utils/version.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/quantization/wrapq/wrap_helper.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/quantization/wrapq/wrappers/__init__.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/quantization/wrapq/wrappers/fairseq/__init__.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/quantization/wrapq/wrappers/fairseq/decoder_export_single_step.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/quantization/wrapq/wrappers/fairseq/quant_decoder.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/quantization/wrapq/wrappers/fairseq/quant_decoder_layer.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/quantization/wrapq/wrappers/fairseq/quant_encoder.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/quantization/wrapq/wrappers/fairseq/quant_encoder_layer.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/quantization/wrapq/wrappers/fairseq/quant_mha.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/quantization/wrapq/wrappers/llama/__init__.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/quantization/wrapq/wrappers/llama/export_adapters.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/quantization/wrapq/wrappers/llama/quant_decoder_layer.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/quantization/wrapq/wrappers/llama/quant_mlp.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/quantization/wrapq/wrappers/llama/quant_model.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/quantization/wrapq/wrappers/llama/quant_model_for_causal_lm.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/quantization/wrapq/wrappers/nn/__init__.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/quantization/wrapq/wrappers/nn/quant_conv3d.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/quantization/wrapq/wrappers/nn/quant_conv3d_decomposed.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/quantization/wrapq/wrappers/nn/quant_embedding.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/quantization/wrapq/wrappers/nn/quant_layernorm.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/quantization/wrapq/wrappers/nn/quant_linear.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/quantization/wrapq/wrappers/nn/quant_silu.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/quantization/wrapq/wrappers/ops/__init__.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/quantization/wrapq/wrappers/ops/quant_rmsnorm.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/quantization/wrapq/wrappers/ptq_wrapper.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/quantization/wrapq/wrappers/quant_elementwise.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/quantization/wrapq/wrappers/quant_module_base.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/quantization/wrapq/wrappers/qwen_vl/quant_for_conditional_generation.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/quantization/wrapq/wrappers/qwen_vl/quant_model.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/quantization/wrapq/wrappers/qwen_vl/quant_text_attention.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/quantization/wrapq/wrappers/qwen_vl/quant_text_decoder_layer.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/quantization/wrapq/wrappers/qwen_vl/quant_text_mlp.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/quantization/wrapq/wrappers/qwen_vl/quant_text_model.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/quantization/wrapq/wrappers/qwen_vl/quant_vision_attention.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/quantization/wrapq/wrappers/qwen_vl/quant_vision_block.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/quantization/wrapq/wrappers/qwen_vl/quant_vision_mlp.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/quantization/wrapq/wrappers/qwen_vl/quant_vision_model.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/quantization/wrapq/wrappers/qwen_vl/quant_vision_patch_embed.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/quantization/wrapq/wrappers/qwen_vl/quant_vision_patch_merger.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/quantization/wrapq/wrappers/registry.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/serialize/__init__.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/serialize/circle_graph.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/serialize/circle_mapping.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/serialize/circle_serializer.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/serialize/operators/__init__.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/serialize/operators/adapters/__init__.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/serialize/operators/adapters/llama_rmsnorm.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/serialize/operators/adapters/onert/__init__.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/serialize/operators/adapters/onert/llama_attention.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/serialize/operators/hashable_opcode.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/serialize/operators/node_visitor.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/serialize/operators/op_abs.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/serialize/operators/op_add.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/serialize/operators/op_alias_copy.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/serialize/operators/op_any.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/serialize/operators/op_arange_start_step.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/serialize/operators/op_argmax.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/serialize/operators/op_attention.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/serialize/operators/op_avg_pool2d.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/serialize/operators/op_bmm.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/serialize/operators/op_cat.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/serialize/operators/op_circle_shape.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/serialize/operators/op_clamp.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/serialize/operators/op_clone.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/serialize/operators/op_constant_pad_nd.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/serialize/operators/op_conv2d.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/serialize/operators/op_cos.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/serialize/operators/op_cumsum.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/serialize/operators/op_depthwise_conv2d.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/serialize/operators/op_dequantize_per_channel.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/serialize/operators/op_dequantize_per_tensor.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/serialize/operators/op_div.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/serialize/operators/op_embedding.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/serialize/operators/op_eq.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/serialize/operators/op_exp.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/serialize/operators/op_expand.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/serialize/operators/op_full.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/serialize/operators/op_full_like.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/serialize/operators/op_ge.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/serialize/operators/op_gelu.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/serialize/operators/op_gt.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/serialize/operators/op_index.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/serialize/operators/op_index_select.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/serialize/operators/op_instance_norm.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/serialize/operators/op_le.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/serialize/operators/op_leaky_relu.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/serialize/operators/op_linear.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/serialize/operators/op_log.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/serialize/operators/op_log1p.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/serialize/operators/op_logical_and.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/serialize/operators/op_logical_not.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/serialize/operators/op_lt.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/serialize/operators/op_max_dim.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/serialize/operators/op_max_pool2d_with_indices.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/serialize/operators/op_maximum.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/serialize/operators/op_mean.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/serialize/operators/op_minimum.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/serialize/operators/op_mm.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/serialize/operators/op_mul.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/serialize/operators/op_ne.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/serialize/operators/op_neg.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/serialize/operators/op_permute.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/serialize/operators/op_pow.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/serialize/operators/op_prelu.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/serialize/operators/op_quantize_per_tensor.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/serialize/operators/op_reciprocal.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/serialize/operators/op_relu.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/serialize/operators/op_relu6.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/serialize/operators/op_repeat.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/serialize/operators/op_reshape.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/serialize/operators/op_resize_nearest_neighbor.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/serialize/operators/op_rmsnorm.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/serialize/operators/op_round.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/serialize/operators/op_rsqrt.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/serialize/operators/op_scalar_tensor.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/serialize/operators/op_select_copy.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/serialize/operators/op_sigmoid.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/serialize/operators/op_sin.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/serialize/operators/op_slice.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/serialize/operators/op_softmax.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/serialize/operators/op_split_with_sizes.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/serialize/operators/op_sqrt.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/serialize/operators/op_squeeze.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/serialize/operators/op_sub.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/serialize/operators/op_sum.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/serialize/operators/op_tanh.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/serialize/operators/op_to_copy.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/serialize/operators/op_transpose_conv.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/serialize/operators/op_unsqueeze.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/serialize/operators/op_view.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/serialize/operators/op_where.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/serialize/operators/utils.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/serialize/pack.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/serialize/quant_param.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/utils/__init__.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/utils/compat/__init__.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/utils/compat/torch.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/utils/compat/transformers.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/utils/convert.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/utils/define.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/utils/diff_graph.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/utils/dtype.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/utils/errors.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/utils/graph.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/utils/installed_packages.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/utils/logging.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/utils/model.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/utils/mx/__init__.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/utils/mx/elemwise_ops.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/utils/mx/formats.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/utils/mx/mx_ops.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/utils/padding.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/utils/passes.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/utils/pytree_utils.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/utils/record_input.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/utils/register_custom_op.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/utils/serialize.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/utils/signature.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/utils/trace_decorators.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/utils/utils.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/utils/validate_args_kwargs.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico/utils/version.py +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico.egg-info/dependency_links.txt +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico.egg-info/entry_points.txt +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico.egg-info/requires.txt +0 -0
- {tico-0.2.0.dev260507 → tico-0.2.0.dev260511}/tico.egg-info/top_level.txt +0 -0
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__version__ = "0.2.0.dev260511"
|
|
@@ -72,9 +72,22 @@ def smooth_weights(
|
|
|
72
72
|
elif isinstance(front_module, torch.nn.LayerNorm):
|
|
73
73
|
front_numel = front_module.weight.numel()
|
|
74
74
|
else:
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
75
|
+
# Try Qwen3VLTextRMSNorm
|
|
76
|
+
try:
|
|
77
|
+
from transformers.models.qwen3_vl.modeling_qwen3_vl import (
|
|
78
|
+
Qwen3VLTextRMSNorm,
|
|
79
|
+
)
|
|
80
|
+
|
|
81
|
+
if isinstance(front_module, Qwen3VLTextRMSNorm):
|
|
82
|
+
front_numel = front_module.weight.numel()
|
|
83
|
+
else:
|
|
84
|
+
raise NotImplementedError(
|
|
85
|
+
f"Unsupported module type: {type(front_module).__name__}"
|
|
86
|
+
)
|
|
87
|
+
except ImportError:
|
|
88
|
+
raise NotImplementedError(
|
|
89
|
+
f"Unsupported module type: {type(front_module).__name__}"
|
|
90
|
+
)
|
|
78
91
|
for back_m in back_modules:
|
|
79
92
|
if isinstance(back_m, torch.nn.Linear):
|
|
80
93
|
back_numel = back_m.in_features
|
|
@@ -282,6 +295,78 @@ def _apply_if_fairseq_relu_bridge(
|
|
|
282
295
|
return True
|
|
283
296
|
|
|
284
297
|
|
|
298
|
+
# ────────────────────────────────────────────────────────────
|
|
299
|
+
# Qwen3-VL Text Model Components (RMSNorm-based)
|
|
300
|
+
# ────────────────────────────────────────────────────────────
|
|
301
|
+
|
|
302
|
+
|
|
303
|
+
@torch.no_grad()
|
|
304
|
+
def _apply_if_qwen3vl_text_decoder(
|
|
305
|
+
name: str,
|
|
306
|
+
module: torch.nn.Module,
|
|
307
|
+
activation_max: Dict[str, torch.Tensor],
|
|
308
|
+
alpha_to_apply: float,
|
|
309
|
+
) -> bool:
|
|
310
|
+
"""
|
|
311
|
+
Apply SmoothQuant smoothing to Qwen3VLTextDecoderLayer (RMSNorm-based).
|
|
312
|
+
|
|
313
|
+
Qwen3VLTextDecoderLayer structure:
|
|
314
|
+
- input_layernorm (RMSNorm) → self_attn (q_proj, k_proj, v_proj)
|
|
315
|
+
- post_attention_layernorm (RMSNorm) → mlp (gate_proj, up_proj)
|
|
316
|
+
|
|
317
|
+
Returns True if this handler applied smoothing to `module`.
|
|
318
|
+
"""
|
|
319
|
+
try:
|
|
320
|
+
from transformers.models.qwen3_vl.modeling_qwen3_vl import (
|
|
321
|
+
Qwen3VLTextDecoderLayer,
|
|
322
|
+
)
|
|
323
|
+
except Exception:
|
|
324
|
+
return False
|
|
325
|
+
|
|
326
|
+
if not isinstance(module, Qwen3VLTextDecoderLayer):
|
|
327
|
+
return False
|
|
328
|
+
|
|
329
|
+
# Check for required attributes
|
|
330
|
+
if not hasattr(module, "input_layernorm") or not hasattr(
|
|
331
|
+
module, "post_attention_layernorm"
|
|
332
|
+
):
|
|
333
|
+
return False
|
|
334
|
+
if not hasattr(module, "self_attn") or not hasattr(module, "mlp"):
|
|
335
|
+
return False
|
|
336
|
+
|
|
337
|
+
# Smooth input_layernorm → q_proj, k_proj, v_proj
|
|
338
|
+
attn_ln = module.input_layernorm
|
|
339
|
+
qkv = [
|
|
340
|
+
module.self_attn.q_proj,
|
|
341
|
+
module.self_attn.k_proj,
|
|
342
|
+
module.self_attn.v_proj,
|
|
343
|
+
]
|
|
344
|
+
# Input-hook stats for q_proj input
|
|
345
|
+
qkv_input_scales = activation_max.get(name + ".self_attn.q_proj")
|
|
346
|
+
if qkv_input_scales is not None:
|
|
347
|
+
smooth_weights(attn_ln, qkv, qkv_input_scales, alpha_to_apply)
|
|
348
|
+
else:
|
|
349
|
+
print(
|
|
350
|
+
f"[SmoothQuant] Warning: activation stats not found for "
|
|
351
|
+
f"{name} self_attn.q_proj input."
|
|
352
|
+
)
|
|
353
|
+
|
|
354
|
+
# Smooth post_attention_layernorm → gate_proj, up_proj
|
|
355
|
+
ffn_ln = module.post_attention_layernorm
|
|
356
|
+
fcs = [module.mlp.gate_proj, module.mlp.up_proj]
|
|
357
|
+
# Input-hook stats for gate_proj input
|
|
358
|
+
fcs_input_scales = activation_max.get(name + ".mlp.gate_proj")
|
|
359
|
+
if fcs_input_scales is not None:
|
|
360
|
+
smooth_weights(ffn_ln, fcs, fcs_input_scales, alpha_to_apply)
|
|
361
|
+
else:
|
|
362
|
+
print(
|
|
363
|
+
f"[SmoothQuant] Warning: activation stats not found for "
|
|
364
|
+
f"{name} mlp.gate_proj input."
|
|
365
|
+
)
|
|
366
|
+
|
|
367
|
+
return True
|
|
368
|
+
|
|
369
|
+
|
|
285
370
|
# ────────────────────────────────────────────────────────────
|
|
286
371
|
# Qwen3-VL Vision Components (LayerNorm-based)
|
|
287
372
|
# ────────────────────────────────────────────────────────────
|
|
@@ -467,6 +552,7 @@ _APPLIERS: List[
|
|
|
467
552
|
Callable[[str, torch.nn.Module, Dict[str, torch.Tensor], float], bool]
|
|
468
553
|
] = [
|
|
469
554
|
_apply_if_llama_decoder,
|
|
555
|
+
_apply_if_qwen3vl_text_decoder,
|
|
470
556
|
_apply_if_qwen3vl_vision_block,
|
|
471
557
|
_apply_if_qwen3vl_vision_patch_merger,
|
|
472
558
|
_apply_if_fairseq_relu_bridge,
|
|
@@ -479,6 +565,7 @@ def apply_smoothing(
|
|
|
479
565
|
activation_max: Dict[str, torch.Tensor],
|
|
480
566
|
alpha: float = 0.5,
|
|
481
567
|
custom_alpha_map: Optional[Dict[str, float]] = None,
|
|
568
|
+
exclude_appliers: Optional[List[str]] = None,
|
|
482
569
|
):
|
|
483
570
|
"""
|
|
484
571
|
Applies SmoothQuant-style smoothing to the model's weights using activation maximum values.
|
|
@@ -495,7 +582,20 @@ def apply_smoothing(
|
|
|
495
582
|
A dictionary mapping layer/module names to custom alpha values.
|
|
496
583
|
Layers specified in this dictionary will use the corresponding alpha
|
|
497
584
|
value instead of the default.
|
|
585
|
+
exclude_appliers
|
|
586
|
+
A list of applier function names to exclude from processing.
|
|
587
|
+
Valid names: '_apply_if_llama_decoder', '_apply_if_qwen3vl_text_decoder',
|
|
588
|
+
'_apply_if_qwen3vl_vision_block', '_apply_if_qwen3vl_vision_patch_merger',
|
|
589
|
+
'_apply_if_fairseq_relu_bridge'.
|
|
498
590
|
"""
|
|
591
|
+
# Build list of appliers to use (excluding specified ones)
|
|
592
|
+
if exclude_appliers is None:
|
|
593
|
+
appliers_to_use = _APPLIERS
|
|
594
|
+
else:
|
|
595
|
+
appliers_to_use = [
|
|
596
|
+
applier for applier in _APPLIERS if applier.__name__ not in exclude_appliers
|
|
597
|
+
]
|
|
598
|
+
|
|
499
599
|
for name, module in model.named_modules():
|
|
500
600
|
alpha_to_apply = (
|
|
501
601
|
custom_alpha_map.get(name, alpha) if custom_alpha_map else alpha
|
|
@@ -506,6 +606,6 @@ def apply_smoothing(
|
|
|
506
606
|
)
|
|
507
607
|
|
|
508
608
|
# Try each applier until one succeeds.
|
|
509
|
-
for applier in
|
|
609
|
+
for applier in appliers_to_use:
|
|
510
610
|
if applier(name, module, activation_max, alpha_to_apply):
|
|
511
611
|
break # applied → stop trying others
|
|
@@ -277,6 +277,12 @@ def parse_args():
|
|
|
277
277
|
help="SmoothQuant alpha for vision components (Qwen3VLVisionBlock, Qwen3VLVisionPatchMerger). "
|
|
278
278
|
"Range: 0.0-1.0. Higher = more weight smoothing.",
|
|
279
279
|
)
|
|
280
|
+
parser.add_argument(
|
|
281
|
+
"--smoothquant_components",
|
|
282
|
+
choices=["vision", "text", "both"],
|
|
283
|
+
default=None,
|
|
284
|
+
help="Target components for SmoothQuant.",
|
|
285
|
+
)
|
|
280
286
|
parser.add_argument(
|
|
281
287
|
"--print_quantized_model",
|
|
282
288
|
action="store_true",
|
|
@@ -331,6 +337,13 @@ def parse_args():
|
|
|
331
337
|
help="Sliding window stride for perplexity calculation.",
|
|
332
338
|
)
|
|
333
339
|
|
|
340
|
+
parser.add_argument(
|
|
341
|
+
"--ppl_split",
|
|
342
|
+
type=str,
|
|
343
|
+
default="test",
|
|
344
|
+
help="Split for PPL evaluation",
|
|
345
|
+
)
|
|
346
|
+
|
|
334
347
|
return parser.parse_args()
|
|
335
348
|
|
|
336
349
|
|
|
@@ -829,7 +842,7 @@ def main() -> None:
|
|
|
829
842
|
# PPL evaluation on original model
|
|
830
843
|
if args.ppl_dataset:
|
|
831
844
|
print("\n=== PPL Evaluation (Original Model) ===")
|
|
832
|
-
ds_ppl, _ = get_dataset(args.ppl_dataset,
|
|
845
|
+
ds_ppl, _ = get_dataset(args.ppl_dataset, split=args.ppl_split, n=-1)
|
|
833
846
|
original_ppl = evaluate_ppl(
|
|
834
847
|
model=model,
|
|
835
848
|
tokenizer=processor.tokenizer,
|
|
@@ -849,10 +862,29 @@ def main() -> None:
|
|
|
849
862
|
)
|
|
850
863
|
|
|
851
864
|
# -------------------------------------------------------------------------
|
|
852
|
-
# Apply SmoothQuant transformation
|
|
865
|
+
# Apply SmoothQuant transformation
|
|
853
866
|
# -------------------------------------------------------------------------
|
|
854
867
|
if args.smoothquant:
|
|
855
|
-
|
|
868
|
+
if args.smoothquant_components is None:
|
|
869
|
+
raise ValueError(
|
|
870
|
+
"--smoothquant_components must be specified when "
|
|
871
|
+
"--smoothquant is enabled."
|
|
872
|
+
)
|
|
873
|
+
# Build exclude_appliers list based on arguments
|
|
874
|
+
exclude_appliers = []
|
|
875
|
+
if args.smoothquant_components == "text":
|
|
876
|
+
exclude_appliers.extend(
|
|
877
|
+
[
|
|
878
|
+
"_apply_if_qwen3vl_vision_block",
|
|
879
|
+
"_apply_if_qwen3vl_vision_patch_merger",
|
|
880
|
+
]
|
|
881
|
+
)
|
|
882
|
+
if args.smoothquant_components == "vision":
|
|
883
|
+
exclude_appliers.append("_apply_if_qwen3vl_text_decoder")
|
|
884
|
+
|
|
885
|
+
print(
|
|
886
|
+
f"Applying SmoothQuant smoothing for {args.smoothquant_components} components"
|
|
887
|
+
)
|
|
856
888
|
|
|
857
889
|
# Compute activation maximum values from calibration data
|
|
858
890
|
print("Computing activation maximum values for SmoothQuant …")
|
|
@@ -902,6 +934,7 @@ def main() -> None:
|
|
|
902
934
|
model,
|
|
903
935
|
activation_max,
|
|
904
936
|
alpha=args.smoothquant_alpha,
|
|
937
|
+
exclude_appliers=exclude_appliers if exclude_appliers else None,
|
|
905
938
|
)
|
|
906
939
|
print("SmoothQuant smoothing complete.")
|
|
907
940
|
|
|
@@ -1021,7 +1054,7 @@ def main() -> None:
|
|
|
1021
1054
|
# PPL evaluation on quantized model
|
|
1022
1055
|
if args.ppl_dataset:
|
|
1023
1056
|
print("\n=== PPL Evaluation (Quantized Model) ===")
|
|
1024
|
-
ds_ppl, _ = get_dataset(args.ppl_dataset,
|
|
1057
|
+
ds_ppl, _ = get_dataset(args.ppl_dataset, split=args.ppl_split, n=-1)
|
|
1025
1058
|
quantized_ppl = evaluate_ppl(
|
|
1026
1059
|
model=q_m,
|
|
1027
1060
|
tokenizer=processor.tokenizer,
|