tico 0.2.0.dev260414__tar.gz → 0.2.0.dev260415__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/PKG-INFO +1 -1
- tico-0.2.0.dev260415/tico/_version.py +1 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/quantization/algorithm/gptq/quantizer.py +25 -3
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/quantization/wrapq/examples/quantize_full_qmodel_with_gptq.py +3 -3
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/utils/utils.py +21 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico.egg-info/PKG-INFO +1 -1
- tico-0.2.0.dev260414/tico/_version.py +0 -1
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/LICENSE +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/README.md +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/pyproject.toml +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/setup.cfg +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/__init__.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/config/__init__.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/config/base.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/config/factory.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/config/v1.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/experimental/__init__.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/interpreter/__init__.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/interpreter/infer.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/interpreter/interpreter.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/passes/__init__.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/passes/cast_aten_where_arg_type.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/passes/cast_clamp_mixed_type_args.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/passes/cast_mixed_type_args.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/passes/const_prop_pass.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/passes/convert_conv1d_to_conv2d.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/passes/convert_conv3d_to_conv2d.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/passes/convert_expand_to_slice_cat.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/passes/convert_layout_op_to_reshape.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/passes/convert_matmul_to_linear.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/passes/convert_repeat_to_expand_copy.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/passes/convert_sym_size_to_circle_shape.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/passes/convert_to_relu6.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/passes/decompose_addmm.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/passes/decompose_batch_norm.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/passes/decompose_fake_quantize.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/passes/decompose_fake_quantize_tensor_qparams.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/passes/decompose_group_norm.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/passes/decompose_grouped_conv2d.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/passes/decompose_slice_scatter.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/passes/extract_dtype_kwargs.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/passes/fill_meta_val.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/passes/fuse_leading_unsqueeze_reshape.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/passes/fuse_redundant_reshape_to_mean.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/passes/legalize_causal_mask_value.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/passes/legalize_predefined_layout_operators.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/passes/lower_copy.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/passes/lower_pow2_to_mul.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/passes/lower_to_resize_nearest_neighbor.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/passes/lower_to_slice.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/passes/merge_consecutive_cat.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/passes/ops.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/passes/remove_nop.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/passes/remove_redundant_assert_nodes.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/passes/remove_redundant_expand.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/passes/remove_redundant_permute.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/passes/remove_redundant_reshape.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/passes/remove_redundant_slice.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/passes/remove_redundant_to_copy.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/passes/restore_linear.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/passes/segment_index_select.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/pt2_to_circle.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/quantization/__init__.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/quantization/algorithm/__init__.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/quantization/algorithm/fpi_gptq/__init__.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/quantization/algorithm/fpi_gptq/fpi_gptq.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/quantization/algorithm/fpi_gptq/quantizer.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/quantization/algorithm/gptq/__init__.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/quantization/algorithm/gptq/gptq.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/quantization/algorithm/gptq/quant.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/quantization/algorithm/gptq/utils.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/quantization/algorithm/qwen3_vl_gptq/__init__.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/quantization/algorithm/qwen3_vl_gptq/gptq.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/quantization/algorithm/qwen3_vl_gptq/quantizer.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/quantization/algorithm/qwen3_vl_gptq/utils.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/quantization/algorithm/smoothquant/__init__.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/quantization/algorithm/smoothquant/observer.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/quantization/algorithm/smoothquant/quantizer.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/quantization/algorithm/smoothquant/smooth_quant.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/quantization/algorithm/spinquant/__init__.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/quantization/algorithm/spinquant/fuse_norm_utils.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/quantization/algorithm/spinquant/hadamard_utils.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/quantization/algorithm/spinquant/quantizer.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/quantization/algorithm/spinquant/rotation_utils.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/quantization/algorithm/spinquant/spin_llama.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/quantization/config/__init__.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/quantization/config/base.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/quantization/config/builders.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/quantization/config/fpi_gptq.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/quantization/config/gptq.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/quantization/config/ptq.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/quantization/config/qwen3_vl_gptq.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/quantization/config/smoothquant.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/quantization/config/spinquant.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/quantization/config/utils.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/quantization/evaluation/__init__.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/quantization/evaluation/backend.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/quantization/evaluation/evaluate.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/quantization/evaluation/executor/__init__.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/quantization/evaluation/executor/backend_executor.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/quantization/evaluation/executor/circle_executor.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/quantization/evaluation/executor/triv24_executor.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/quantization/evaluation/metric.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/quantization/evaluation/script/llm_tasks_eval.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/quantization/evaluation/script/mini_vqa_eval.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/quantization/evaluation/utils.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/quantization/evaluation/vlm_eval_utils.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/quantization/passes/__init__.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/quantization/passes/fold_quant_ops.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/quantization/passes/insert_quantize_on_dtype_mismatch.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/quantization/passes/propagate_qparam_backward.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/quantization/passes/propagate_qparam_forward.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/quantization/passes/quantize_bias.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/quantization/passes/remove_weight_dequant_op.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/quantization/public_interface.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/quantization/quantizer.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/quantization/quantizer_registry.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/quantization/wrapq/__init__.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/quantization/wrapq/dtypes.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/quantization/wrapq/examples/__init__.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/quantization/wrapq/examples/compare_ppl.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/quantization/wrapq/examples/debug_quant_outputs.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/quantization/wrapq/examples/evaluate_fk_llama_model.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/quantization/wrapq/examples/llama/__init__.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/quantization/wrapq/examples/llama/quantize_attn_decode.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/quantization/wrapq/examples/llama/quantize_attn_prefill.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/quantization/wrapq/examples/llama/quantize_decoder_layer_decode.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/quantization/wrapq/examples/llama/quantize_decoder_layer_prefill.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/quantization/wrapq/examples/llama/quantize_mlp.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/quantization/wrapq/examples/nn/__init__.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/quantization/wrapq/examples/nn/quantize_conv3d.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/quantization/wrapq/examples/nn/quantize_conv3d_special_case.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/quantization/wrapq/examples/nn/quantize_linear.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/quantization/wrapq/examples/quantize_full_vlm_model_with_gptq.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/quantization/wrapq/examples/quantize_qwen3_vl_with_gptq.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/quantization/wrapq/examples/quantize_with_gptq.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/quantization/wrapq/examples/qwen/__init__.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/quantization/wrapq/examples/qwen/quantize_for_conditional_generation.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/quantization/wrapq/examples/qwen/quantize_model.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/quantization/wrapq/examples/qwen/quantize_text_attn.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/quantization/wrapq/examples/qwen/quantize_text_decoder_layer.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/quantization/wrapq/examples/qwen/quantize_text_mlp.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/quantization/wrapq/examples/qwen/quantize_text_model.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/quantization/wrapq/examples/qwen/quantize_vision_attn.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/quantization/wrapq/examples/qwen/quantize_vision_block.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/quantization/wrapq/examples/qwen/quantize_vision_mlp.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/quantization/wrapq/examples/qwen/quantize_vision_model.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/quantization/wrapq/examples/qwen/quantize_vision_patch_embed.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/quantization/wrapq/examples/qwen/quantize_vision_patch_merger.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/quantization/wrapq/examples/qwen/trace_qwen.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/quantization/wrapq/examples/static_llama_layer_runtime.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/quantization/wrapq/mode.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/quantization/wrapq/observers/__init__.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/quantization/wrapq/observers/affine_base.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/quantization/wrapq/observers/base.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/quantization/wrapq/observers/ema.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/quantization/wrapq/observers/identity.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/quantization/wrapq/observers/minmax.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/quantization/wrapq/observers/mx.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/quantization/wrapq/qscheme.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/quantization/wrapq/quantizer.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/quantization/wrapq/utils/__init__.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/quantization/wrapq/utils/check_missing_qparam.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/quantization/wrapq/utils/introspection.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/quantization/wrapq/utils/metrics.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/quantization/wrapq/utils/reduce_utils.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/quantization/wrapq/utils/version.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/quantization/wrapq/wrap_helper.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/quantization/wrapq/wrappers/__init__.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/quantization/wrapq/wrappers/fairseq/__init__.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/quantization/wrapq/wrappers/fairseq/decoder_export_single_step.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/quantization/wrapq/wrappers/fairseq/quant_decoder.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/quantization/wrapq/wrappers/fairseq/quant_decoder_layer.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/quantization/wrapq/wrappers/fairseq/quant_encoder.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/quantization/wrapq/wrappers/fairseq/quant_encoder_layer.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/quantization/wrapq/wrappers/fairseq/quant_mha.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/quantization/wrapq/wrappers/llama/__init__.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/quantization/wrapq/wrappers/llama/export_adapters.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/quantization/wrapq/wrappers/llama/quant_attention.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/quantization/wrapq/wrappers/llama/quant_decoder_layer.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/quantization/wrapq/wrappers/llama/quant_mlp.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/quantization/wrapq/wrappers/llama/quant_model.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/quantization/wrapq/wrappers/llama/quant_model_for_causal_lm.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/quantization/wrapq/wrappers/nn/__init__.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/quantization/wrapq/wrappers/nn/quant_conv3d.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/quantization/wrapq/wrappers/nn/quant_conv3d_decomposed.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/quantization/wrapq/wrappers/nn/quant_embedding.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/quantization/wrapq/wrappers/nn/quant_layernorm.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/quantization/wrapq/wrappers/nn/quant_linear.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/quantization/wrapq/wrappers/nn/quant_silu.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/quantization/wrapq/wrappers/ops/__init__.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/quantization/wrapq/wrappers/ops/quant_rmsnorm.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/quantization/wrapq/wrappers/ptq_wrapper.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/quantization/wrapq/wrappers/quant_elementwise.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/quantization/wrapq/wrappers/quant_module_base.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/quantization/wrapq/wrappers/qwen_vl/quant_for_conditional_generation.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/quantization/wrapq/wrappers/qwen_vl/quant_model.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/quantization/wrapq/wrappers/qwen_vl/quant_text_attn.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/quantization/wrapq/wrappers/qwen_vl/quant_text_decoder_layer.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/quantization/wrapq/wrappers/qwen_vl/quant_text_mlp.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/quantization/wrapq/wrappers/qwen_vl/quant_text_model.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/quantization/wrapq/wrappers/qwen_vl/quant_vision_attn.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/quantization/wrapq/wrappers/qwen_vl/quant_vision_block.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/quantization/wrapq/wrappers/qwen_vl/quant_vision_mlp.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/quantization/wrapq/wrappers/qwen_vl/quant_vision_model.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/quantization/wrapq/wrappers/qwen_vl/quant_vision_patch_embed.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/quantization/wrapq/wrappers/qwen_vl/quant_vision_patch_merger.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/quantization/wrapq/wrappers/registry.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/serialize/__init__.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/serialize/circle_graph.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/serialize/circle_mapping.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/serialize/circle_serializer.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/serialize/operators/__init__.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/serialize/operators/adapters/__init__.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/serialize/operators/adapters/llama_rmsnorm.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/serialize/operators/adapters/onert/__init__.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/serialize/operators/adapters/onert/llama_attention.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/serialize/operators/hashable_opcode.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/serialize/operators/node_visitor.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/serialize/operators/op_abs.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/serialize/operators/op_add.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/serialize/operators/op_alias_copy.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/serialize/operators/op_any.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/serialize/operators/op_arange_start_step.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/serialize/operators/op_argmax.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/serialize/operators/op_attention.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/serialize/operators/op_avg_pool2d.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/serialize/operators/op_bmm.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/serialize/operators/op_cat.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/serialize/operators/op_circle_shape.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/serialize/operators/op_clamp.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/serialize/operators/op_clone.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/serialize/operators/op_constant_pad_nd.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/serialize/operators/op_conv2d.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/serialize/operators/op_cos.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/serialize/operators/op_cumsum.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/serialize/operators/op_depthwise_conv2d.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/serialize/operators/op_dequantize_per_channel.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/serialize/operators/op_dequantize_per_tensor.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/serialize/operators/op_div.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/serialize/operators/op_embedding.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/serialize/operators/op_eq.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/serialize/operators/op_exp.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/serialize/operators/op_expand.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/serialize/operators/op_full.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/serialize/operators/op_full_like.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/serialize/operators/op_ge.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/serialize/operators/op_gelu.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/serialize/operators/op_gt.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/serialize/operators/op_index.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/serialize/operators/op_index_select.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/serialize/operators/op_instance_norm.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/serialize/operators/op_le.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/serialize/operators/op_leaky_relu.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/serialize/operators/op_linear.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/serialize/operators/op_log.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/serialize/operators/op_log1p.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/serialize/operators/op_logical_and.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/serialize/operators/op_logical_not.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/serialize/operators/op_lt.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/serialize/operators/op_max_dim.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/serialize/operators/op_max_pool2d_with_indices.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/serialize/operators/op_maximum.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/serialize/operators/op_mean.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/serialize/operators/op_minimum.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/serialize/operators/op_mm.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/serialize/operators/op_mul.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/serialize/operators/op_ne.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/serialize/operators/op_neg.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/serialize/operators/op_permute.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/serialize/operators/op_pow.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/serialize/operators/op_prelu.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/serialize/operators/op_quantize_per_tensor.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/serialize/operators/op_reciprocal.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/serialize/operators/op_relu.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/serialize/operators/op_relu6.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/serialize/operators/op_repeat.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/serialize/operators/op_reshape.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/serialize/operators/op_resize_nearest_neighbor.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/serialize/operators/op_rmsnorm.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/serialize/operators/op_round.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/serialize/operators/op_rsqrt.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/serialize/operators/op_scalar_tensor.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/serialize/operators/op_select_copy.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/serialize/operators/op_sigmoid.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/serialize/operators/op_sin.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/serialize/operators/op_slice.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/serialize/operators/op_softmax.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/serialize/operators/op_split_with_sizes.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/serialize/operators/op_sqrt.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/serialize/operators/op_squeeze.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/serialize/operators/op_sub.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/serialize/operators/op_sum.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/serialize/operators/op_tanh.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/serialize/operators/op_to_copy.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/serialize/operators/op_transpose_conv.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/serialize/operators/op_unsqueeze.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/serialize/operators/op_view.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/serialize/operators/op_where.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/serialize/operators/utils.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/serialize/pack.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/serialize/quant_param.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/utils/__init__.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/utils/compat/__init__.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/utils/compat/torch.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/utils/compat/transformers.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/utils/convert.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/utils/define.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/utils/diff_graph.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/utils/dtype.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/utils/errors.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/utils/graph.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/utils/installed_packages.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/utils/logging.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/utils/model.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/utils/mx/__init__.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/utils/mx/elemwise_ops.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/utils/mx/formats.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/utils/mx/mx_ops.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/utils/padding.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/utils/passes.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/utils/pytree_utils.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/utils/record_input.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/utils/register_custom_op.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/utils/serialize.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/utils/signature.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/utils/trace_decorators.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/utils/validate_args_kwargs.py +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico.egg-info/SOURCES.txt +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico.egg-info/dependency_links.txt +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico.egg-info/entry_points.txt +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico.egg-info/requires.txt +0 -0
- {tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico.egg-info/top_level.txt +0 -0
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__version__ = "0.2.0.dev260415"
|
|
@@ -28,6 +28,11 @@ from tico.quantization.algorithm.gptq.utils import (
|
|
|
28
28
|
from tico.quantization.config.gptq import GPTQConfig
|
|
29
29
|
from tico.quantization.quantizer import BaseQuantizer
|
|
30
30
|
from tico.quantization.quantizer_registry import register_quantizer
|
|
31
|
+
from tico.utils.utils import move_to_device
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def move_to_cpu(obj):
|
|
35
|
+
return move_to_device(obj, "cpu")
|
|
31
36
|
|
|
32
37
|
|
|
33
38
|
class StopForward(Exception):
|
|
@@ -118,12 +123,12 @@ class GPTQQuantizer(BaseQuantizer):
|
|
|
118
123
|
for idx, item in enumerate(args):
|
|
119
124
|
if (idx + 1) > len(self.cache_args):
|
|
120
125
|
self.cache_args.append([])
|
|
121
|
-
self.cache_args[idx].append(item)
|
|
126
|
+
self.cache_args[idx].append(move_to_cpu(item))
|
|
122
127
|
# Store keyword args
|
|
123
128
|
for k, v in kwargs.items():
|
|
124
129
|
if self.cache_kwargs.get(k, None) is None:
|
|
125
130
|
self.cache_kwargs[k] = []
|
|
126
|
-
self.cache_kwargs[k].append(v)
|
|
131
|
+
self.cache_kwargs[k].append(move_to_cpu(v))
|
|
127
132
|
|
|
128
133
|
self.num_batches += 1
|
|
129
134
|
raise StopForward # stop after the first layer
|
|
@@ -280,6 +285,7 @@ class GPTQQuantizer(BaseQuantizer):
|
|
|
280
285
|
|
|
281
286
|
# Run layer forward over all cached batches to build Hessian/statistics
|
|
282
287
|
batch_num = self.num_batches
|
|
288
|
+
device = next(model.parameters()).device
|
|
283
289
|
for batch_idx in tqdm(
|
|
284
290
|
range(batch_num),
|
|
285
291
|
desc=f"[L{l_idx}] collecting",
|
|
@@ -290,9 +296,13 @@ class GPTQQuantizer(BaseQuantizer):
|
|
|
290
296
|
cache_args_batch = gather_single_batch_from_list(
|
|
291
297
|
self.cache_args, batch_idx
|
|
292
298
|
)
|
|
299
|
+
cache_args_batch = move_to_device(cache_args_batch, device)
|
|
300
|
+
|
|
293
301
|
cache_kwargs_batch = gather_single_batch_from_dict(
|
|
294
302
|
self.cache_kwargs, batch_idx
|
|
295
303
|
)
|
|
304
|
+
cache_kwargs_batch = move_to_device(cache_kwargs_batch, device)
|
|
305
|
+
|
|
296
306
|
layer(*cache_args_batch, **cache_kwargs_batch)
|
|
297
307
|
|
|
298
308
|
# Remove handles
|
|
@@ -314,6 +324,7 @@ class GPTQQuantizer(BaseQuantizer):
|
|
|
314
324
|
gptq[name].free()
|
|
315
325
|
|
|
316
326
|
# 4) After quantization, re-run the layer to produce outputs for the next layer
|
|
327
|
+
device = next(model.parameters()).device
|
|
317
328
|
for batch_idx in tqdm(
|
|
318
329
|
range(batch_num),
|
|
319
330
|
desc=f"[L{l_idx}] re-forward",
|
|
@@ -324,9 +335,13 @@ class GPTQQuantizer(BaseQuantizer):
|
|
|
324
335
|
cache_args_batch = gather_single_batch_from_list(
|
|
325
336
|
self.cache_args, batch_idx
|
|
326
337
|
)
|
|
338
|
+
cache_args_batch = move_to_device(cache_args_batch, device)
|
|
339
|
+
|
|
327
340
|
cache_kwargs_batch = gather_single_batch_from_dict(
|
|
328
341
|
self.cache_kwargs, batch_idx
|
|
329
342
|
)
|
|
343
|
+
cache_kwargs_batch = move_to_device(cache_kwargs_batch, device)
|
|
344
|
+
|
|
330
345
|
outs = layer(*cache_args_batch, **cache_kwargs_batch)
|
|
331
346
|
# LLaMA's decoder layer return type differs across Transformers versions:
|
|
332
347
|
# some return a tuple (hidden_states, ...), others return just a tensor.
|
|
@@ -334,7 +349,14 @@ class GPTQQuantizer(BaseQuantizer):
|
|
|
334
349
|
outs = outs[0] if isinstance(outs, tuple) else outs
|
|
335
350
|
# Update inputs for next iteration.
|
|
336
351
|
if len(self.cache_args) > 0:
|
|
337
|
-
|
|
352
|
+
if hasattr(outs, "to") and hasattr(
|
|
353
|
+
self.cache_args[0][batch_idx], "device"
|
|
354
|
+
):
|
|
355
|
+
self.cache_args[0][batch_idx] = outs.to(
|
|
356
|
+
self.cache_args[0][batch_idx].device
|
|
357
|
+
)
|
|
358
|
+
else:
|
|
359
|
+
self.cache_args[0][batch_idx] = outs
|
|
338
360
|
|
|
339
361
|
if torch.cuda.is_available():
|
|
340
362
|
torch.cuda.empty_cache()
|
|
@@ -313,7 +313,7 @@ def main():
|
|
|
313
313
|
"--save",
|
|
314
314
|
nargs="*",
|
|
315
315
|
type=str,
|
|
316
|
-
choices=["
|
|
316
|
+
choices=["circle_full", "circle_per_layer", "ptq_checkpoint", "sensitivity"],
|
|
317
317
|
help="which artifacts should be saved to output_dir",
|
|
318
318
|
)
|
|
319
319
|
parser.add_argument(
|
|
@@ -512,10 +512,10 @@ def main():
|
|
|
512
512
|
# after PTQ quantizer only fixed-length input sequences are valid
|
|
513
513
|
evaluate(q_m, tokenizer, dataset_test, args)
|
|
514
514
|
|
|
515
|
-
if args.output_dir is not None and "
|
|
515
|
+
if args.output_dir is not None and "circle_per_layer" in args.save:
|
|
516
516
|
save_layers_to(q_m, args.max_seq_len, args.output_dir)
|
|
517
517
|
|
|
518
|
-
if args.output_dir is not None and "
|
|
518
|
+
if args.output_dir is not None and "circle_full" in args.save:
|
|
519
519
|
calib_inputs = list(torch.stack(calib_inputs).reshape(-1, 1, args.max_seq_len))
|
|
520
520
|
save_model_to(q_m, calib_inputs, args.output_dir)
|
|
521
521
|
|
|
@@ -396,3 +396,24 @@ def is_target_node(
|
|
|
396
396
|
return False
|
|
397
397
|
|
|
398
398
|
return True
|
|
399
|
+
|
|
400
|
+
|
|
401
|
+
def move_to_device(obj, device):
|
|
402
|
+
"""
|
|
403
|
+
Recursively move tensors inside a nested structure to the given device.
|
|
404
|
+
Non-tensor objects are preserved as-is.
|
|
405
|
+
"""
|
|
406
|
+
if isinstance(obj, torch.Tensor):
|
|
407
|
+
return obj.to(device)
|
|
408
|
+
|
|
409
|
+
elif isinstance(obj, tuple):
|
|
410
|
+
return tuple(move_to_device(x, device) for x in obj)
|
|
411
|
+
|
|
412
|
+
elif isinstance(obj, list):
|
|
413
|
+
return [move_to_device(x, device) for x in obj]
|
|
414
|
+
|
|
415
|
+
elif isinstance(obj, dict):
|
|
416
|
+
return {k: move_to_device(v, device) for k, v in obj.items()}
|
|
417
|
+
|
|
418
|
+
# preserve everything else (bool, int, None, custom objects, etc.)
|
|
419
|
+
return obj
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
__version__ = "0.2.0.dev260414"
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/passes/convert_sym_size_to_circle_shape.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/passes/decompose_fake_quantize_tensor_qparams.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/passes/legalize_predefined_layout_operators.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/passes/lower_to_resize_nearest_neighbor.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/quantization/algorithm/fpi_gptq/__init__.py
RENAMED
|
File without changes
|
{tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/quantization/algorithm/fpi_gptq/fpi_gptq.py
RENAMED
|
File without changes
|
{tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/quantization/algorithm/fpi_gptq/quantizer.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/quantization/algorithm/qwen3_vl_gptq/__init__.py
RENAMED
|
File without changes
|
{tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/quantization/algorithm/qwen3_vl_gptq/gptq.py
RENAMED
|
File without changes
|
{tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/quantization/algorithm/qwen3_vl_gptq/quantizer.py
RENAMED
|
File without changes
|
{tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/quantization/algorithm/qwen3_vl_gptq/utils.py
RENAMED
|
File without changes
|
{tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/quantization/algorithm/smoothquant/__init__.py
RENAMED
|
File without changes
|
{tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/quantization/algorithm/smoothquant/observer.py
RENAMED
|
File without changes
|
{tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/quantization/algorithm/smoothquant/quantizer.py
RENAMED
|
File without changes
|
|
File without changes
|
{tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/quantization/algorithm/spinquant/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/quantization/algorithm/spinquant/quantizer.py
RENAMED
|
File without changes
|
|
File without changes
|
{tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/quantization/algorithm/spinquant/spin_llama.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/quantization/evaluation/executor/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/quantization/evaluation/script/llm_tasks_eval.py
RENAMED
|
File without changes
|
{tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/quantization/evaluation/script/mini_vqa_eval.py
RENAMED
|
File without changes
|
|
File without changes
|
{tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/quantization/evaluation/vlm_eval_utils.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/quantization/passes/propagate_qparam_backward.py
RENAMED
|
File without changes
|
{tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/quantization/passes/propagate_qparam_forward.py
RENAMED
|
File without changes
|
|
File without changes
|
{tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/quantization/passes/remove_weight_dequant_op.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/quantization/wrapq/examples/compare_ppl.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/quantization/wrapq/examples/llama/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/quantization/wrapq/examples/llama/quantize_mlp.py
RENAMED
|
File without changes
|
{tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/quantization/wrapq/examples/nn/__init__.py
RENAMED
|
File without changes
|
{tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/quantization/wrapq/examples/nn/quantize_conv3d.py
RENAMED
|
File without changes
|
|
File without changes
|
{tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/quantization/wrapq/examples/nn/quantize_linear.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/quantization/wrapq/examples/quantize_with_gptq.py
RENAMED
|
File without changes
|
{tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/quantization/wrapq/examples/qwen/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/quantization/wrapq/examples/qwen/trace_qwen.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/quantization/wrapq/observers/affine_base.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/quantization/wrapq/utils/check_missing_qparam.py
RENAMED
|
File without changes
|
{tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/quantization/wrapq/utils/introspection.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{tico-0.2.0.dev260414 → tico-0.2.0.dev260415}/tico/quantization/wrapq/wrappers/fairseq/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|