tico 0.2.0.dev260520__tar.gz → 0.2.0.dev260522__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/PKG-INFO +1 -1
- tico-0.2.0.dev260522/tico/_version.py +1 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/quantization/evaluation/mmmu_eval_utils.py +194 -75
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/quantization/evaluation/vlm_eval_utils.py +79 -0
- tico-0.2.0.dev260522/tico/quantization/examples/evaluate.py +74 -0
- tico-0.2.0.dev260522/tico/quantization/examples/inspect.py +87 -0
- tico-0.2.0.dev260522/tico/quantization/examples/quantize.py +58 -0
- tico-0.2.0.dev260522/tico/quantization/recipes/__init__.py +5 -0
- tico-0.2.0.dev260522/tico/quantization/recipes/adapters/__init__.py +30 -0
- tico-0.2.0.dev260522/tico/quantization/recipes/adapters/base.py +66 -0
- tico-0.2.0.dev260522/tico/quantization/recipes/adapters/llama.py +310 -0
- tico-0.2.0.dev260522/tico/quantization/recipes/adapters/qwen3_vl.py +464 -0
- tico-0.2.0.dev260522/tico/quantization/recipes/config.py +207 -0
- tico-0.2.0.dev260522/tico/quantization/recipes/context.py +41 -0
- tico-0.2.0.dev260522/tico/quantization/recipes/data/llm.py +51 -0
- tico-0.2.0.dev260522/tico/quantization/recipes/data/vlm.py +32 -0
- tico-0.2.0.dev260522/tico/quantization/recipes/debug/static_llama_runtime.py +567 -0
- tico-0.2.0.dev260522/tico/quantization/recipes/debug/tied_embedding.py +227 -0
- tico-0.2.0.dev260522/tico/quantization/recipes/debug/trace.py +165 -0
- tico-0.2.0.dev260522/tico/quantization/recipes/evaluation/hellaswag.py +46 -0
- tico-0.2.0.dev260522/tico/quantization/recipes/evaluation/llm.py +53 -0
- tico-0.2.0.dev260522/tico/quantization/recipes/evaluation/mmlu.py +45 -0
- tico-0.2.0.dev260522/tico/quantization/recipes/evaluation/mmmu.py +51 -0
- tico-0.2.0.dev260522/tico/quantization/recipes/evaluation/vlm.py +105 -0
- tico-0.2.0.dev260522/tico/quantization/recipes/export/checkpoint.py +29 -0
- tico-0.2.0.dev260522/tico/quantization/recipes/export/circle.py +68 -0
- tico-0.2.0.dev260522/tico/quantization/recipes/export/llama.py +168 -0
- tico-0.2.0.dev260522/tico/quantization/recipes/qparams.py +106 -0
- tico-0.2.0.dev260522/tico/quantization/recipes/runner.py +65 -0
- tico-0.2.0.dev260522/tico/quantization/recipes/stages/__init__.py +23 -0
- tico-0.2.0.dev260522/tico/quantization/recipes/stages/base.py +26 -0
- tico-0.2.0.dev260522/tico/quantization/recipes/stages/cle.py +57 -0
- tico-0.2.0.dev260522/tico/quantization/recipes/stages/gptq.py +63 -0
- tico-0.2.0.dev260522/tico/quantization/recipes/stages/ptq.py +51 -0
- tico-0.2.0.dev260522/tico/quantization/recipes/stages/smoothquant.py +30 -0
- tico-0.2.0.dev260522/tico/quantization/recipes/stages/spinquant.py +38 -0
- tico-0.2.0.dev260522/tico/quantization/recipes/utils.py +125 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/quantization/wrapq/examples/quantize_full_qmodel_with_gptq.py +88 -2
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/quantization/wrapq/examples/quantize_qwen3_vl_with_gptq.py +17 -5
- tico-0.2.0.dev260522/tico/serialize/__init__.py +1 -0
- tico-0.2.0.dev260522/tico/serialize/operators/adapters/__init__.py +1 -0
- tico-0.2.0.dev260522/tico/serialize/operators/adapters/onert/__init__.py +1 -0
- tico-0.2.0.dev260522/tico/utils/__init__.py +1 -0
- tico-0.2.0.dev260522/tico/utils/compat/__init__.py +1 -0
- tico-0.2.0.dev260522/tico/utils/mx/__init__.py +1 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico.egg-info/PKG-INFO +1 -1
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico.egg-info/SOURCES.txt +39 -0
- tico-0.2.0.dev260520/tico/_version.py +0 -1
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/LICENSE +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/README.md +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/pyproject.toml +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/setup.cfg +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/__init__.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/config/__init__.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/config/base.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/config/factory.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/config/v1.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/experimental/__init__.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/interpreter/__init__.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/interpreter/infer.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/interpreter/interpreter.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/passes/__init__.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/passes/cast_aten_where_arg_type.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/passes/cast_clamp_mixed_type_args.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/passes/cast_mixed_type_args.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/passes/const_prop_pass.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/passes/convert_conv1d_to_conv2d.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/passes/convert_conv3d_to_conv2d.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/passes/convert_expand_to_slice_cat.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/passes/convert_layout_op_to_reshape.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/passes/convert_matmul_to_linear.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/passes/convert_repeat_to_expand_copy.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/passes/convert_sym_size_to_circle_shape.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/passes/convert_to_relu6.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/passes/decompose_addmm.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/passes/decompose_batch_norm.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/passes/decompose_fake_quantize.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/passes/decompose_fake_quantize_tensor_qparams.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/passes/decompose_group_norm.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/passes/decompose_grouped_conv2d.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/passes/decompose_slice_scatter.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/passes/eliminate_rank_round_trip_region.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/passes/extract_dtype_kwargs.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/passes/fill_meta_val.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/passes/fuse_leading_unsqueeze_reshape.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/passes/fuse_redundant_reshape_to_mean.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/passes/legalize_causal_mask_value.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/passes/legalize_predefined_layout_operators.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/passes/lower_copy.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/passes/lower_pow2_to_mul.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/passes/lower_to_resize_nearest_neighbor.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/passes/lower_to_slice.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/passes/merge_consecutive_cat.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/passes/ops.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/passes/remove_nop.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/passes/remove_redundant_assert_nodes.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/passes/remove_redundant_expand.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/passes/remove_redundant_permute.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/passes/remove_redundant_reshape.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/passes/remove_redundant_slice.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/passes/remove_redundant_to_copy.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/passes/remove_unused_placeholder.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/passes/restore_linear.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/passes/segment_index_select.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/pt2_to_circle.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/quantization/__init__.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/quantization/algorithm/__init__.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/quantization/algorithm/cle/__init__.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/quantization/algorithm/cle/cle.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/quantization/algorithm/cle/quantizer.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/quantization/algorithm/fpi_gptq/__init__.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/quantization/algorithm/fpi_gptq/fpi_gptq.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/quantization/algorithm/fpi_gptq/quantizer.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/quantization/algorithm/gptq/__init__.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/quantization/algorithm/gptq/gptq.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/quantization/algorithm/gptq/quant.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/quantization/algorithm/gptq/quantizer.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/quantization/algorithm/gptq/utils.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/quantization/algorithm/qwen3_vl_gptq/__init__.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/quantization/algorithm/qwen3_vl_gptq/gptq.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/quantization/algorithm/qwen3_vl_gptq/quantizer.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/quantization/algorithm/qwen3_vl_gptq/utils.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/quantization/algorithm/smoothquant/__init__.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/quantization/algorithm/smoothquant/observer.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/quantization/algorithm/smoothquant/quantizer.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/quantization/algorithm/smoothquant/smooth_quant.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/quantization/algorithm/spinquant/__init__.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/quantization/algorithm/spinquant/fuse_norm_utils.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/quantization/algorithm/spinquant/hadamard_utils.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/quantization/algorithm/spinquant/quantizer.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/quantization/algorithm/spinquant/qwen3_vl_model_utils.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/quantization/algorithm/spinquant/qwen3_vl_quantizer.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/quantization/algorithm/spinquant/qwen3_vl_rotation_utils.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/quantization/algorithm/spinquant/rotation_utils.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/quantization/algorithm/spinquant/spin_llama.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/quantization/algorithm/spinquant/spin_qwen3_vl.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/quantization/config/__init__.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/quantization/config/base.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/quantization/config/builders.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/quantization/config/cle.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/quantization/config/fpi_gptq.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/quantization/config/gptq.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/quantization/config/llama_attention.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/quantization/config/ptq.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/quantization/config/qwen3_vl_gptq.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/quantization/config/qwen3_vl_spinquant.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/quantization/config/smoothquant.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/quantization/config/spinquant.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/quantization/config/utils.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/quantization/evaluation/__init__.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/quantization/evaluation/backend.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/quantization/evaluation/evaluate.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/quantization/evaluation/executor/__init__.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/quantization/evaluation/executor/backend_executor.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/quantization/evaluation/executor/circle_executor.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/quantization/evaluation/executor/triv24_executor.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/quantization/evaluation/hellaswag_eval_utils.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/quantization/evaluation/metric.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/quantization/evaluation/mmlu_eval_utils.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/quantization/evaluation/script/llm_tasks_eval.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/quantization/evaluation/script/mini_vqa_eval.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/quantization/evaluation/utils.py +0 -0
- {tico-0.2.0.dev260520/tico/quantization/passes → tico-0.2.0.dev260522/tico/quantization/examples}/__init__.py +0 -0
- {tico-0.2.0.dev260520/tico/quantization/wrapq → tico-0.2.0.dev260522/tico/quantization/examples/configs}/__init__.py +0 -0
- {tico-0.2.0.dev260520/tico/quantization/wrapq/examples → tico-0.2.0.dev260522/tico/quantization/passes}/__init__.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/quantization/passes/fold_quant_ops.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/quantization/passes/insert_quantize_on_dtype_mismatch.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/quantization/passes/propagate_qparam_backward.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/quantization/passes/propagate_qparam_forward.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/quantization/passes/quantize_bias.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/quantization/passes/remove_weight_dequant_op.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/quantization/public_interface.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/quantization/quantizer.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/quantization/quantizer_registry.py +0 -0
- {tico-0.2.0.dev260520/tico/quantization/wrapq/examples/llama → tico-0.2.0.dev260522/tico/quantization/recipes/data}/__init__.py +0 -0
- {tico-0.2.0.dev260520/tico/quantization/wrapq/examples/nn → tico-0.2.0.dev260522/tico/quantization/recipes/debug}/__init__.py +0 -0
- {tico-0.2.0.dev260520/tico/quantization/wrapq/examples/qwen → tico-0.2.0.dev260522/tico/quantization/recipes/evaluation}/__init__.py +0 -0
- {tico-0.2.0.dev260520/tico/quantization/wrapq/observers → tico-0.2.0.dev260522/tico/quantization/recipes/export}/__init__.py +0 -0
- {tico-0.2.0.dev260520/tico/quantization/wrapq/utils → tico-0.2.0.dev260522/tico/quantization/wrapq}/__init__.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/quantization/wrapq/dtypes.py +0 -0
- {tico-0.2.0.dev260520/tico/quantization/wrapq/wrappers → tico-0.2.0.dev260522/tico/quantization/wrapq/examples}/__init__.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/quantization/wrapq/examples/compare_ppl.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/quantization/wrapq/examples/debug_quant_outputs.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/quantization/wrapq/examples/evaluate_fk_llama_model.py +0 -0
- {tico-0.2.0.dev260520/tico/quantization/wrapq/wrappers → tico-0.2.0.dev260522/tico/quantization/wrapq/examples}/llama/__init__.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/quantization/wrapq/examples/llama/quantize_attention_decode.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/quantization/wrapq/examples/llama/quantize_attention_prefill.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/quantization/wrapq/examples/llama/quantize_decoder_layer_decode.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/quantization/wrapq/examples/llama/quantize_decoder_layer_prefill.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/quantization/wrapq/examples/llama/quantize_mlp.py +0 -0
- {tico-0.2.0.dev260520/tico/quantization/wrapq/wrappers → tico-0.2.0.dev260522/tico/quantization/wrapq/examples}/nn/__init__.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/quantization/wrapq/examples/nn/quantize_conv3d.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/quantization/wrapq/examples/nn/quantize_conv3d_special_case.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/quantization/wrapq/examples/nn/quantize_layernorm.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/quantization/wrapq/examples/nn/quantize_linear.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/quantization/wrapq/examples/nn/quantize_tied_embedding.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/quantization/wrapq/examples/quantize_with_gptq.py +0 -0
- {tico-0.2.0.dev260520/tico/quantization/wrapq/wrappers/ops → tico-0.2.0.dev260522/tico/quantization/wrapq/examples/qwen}/__init__.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/quantization/wrapq/examples/qwen/quantize_for_conditional_generation.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/quantization/wrapq/examples/qwen/quantize_model.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/quantization/wrapq/examples/qwen/quantize_text_attention.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/quantization/wrapq/examples/qwen/quantize_text_decoder_layer.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/quantization/wrapq/examples/qwen/quantize_text_mlp.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/quantization/wrapq/examples/qwen/quantize_text_model.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/quantization/wrapq/examples/qwen/quantize_vision_attention.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/quantization/wrapq/examples/qwen/quantize_vision_block.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/quantization/wrapq/examples/qwen/quantize_vision_mlp.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/quantization/wrapq/examples/qwen/quantize_vision_model.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/quantization/wrapq/examples/qwen/quantize_vision_patch_embed.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/quantization/wrapq/examples/qwen/quantize_vision_patch_merger.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/quantization/wrapq/examples/qwen/trace_qwen.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/quantization/wrapq/examples/static_llama_layer_runtime.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/quantization/wrapq/mode.py +0 -0
- {tico-0.2.0.dev260520/tico/serialize → tico-0.2.0.dev260522/tico/quantization/wrapq/observers}/__init__.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/quantization/wrapq/observers/affine_base.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/quantization/wrapq/observers/base.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/quantization/wrapq/observers/ema.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/quantization/wrapq/observers/identity.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/quantization/wrapq/observers/minmax.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/quantization/wrapq/observers/mx.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/quantization/wrapq/qscheme.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/quantization/wrapq/quantizer.py +0 -0
- {tico-0.2.0.dev260520/tico/serialize/operators/adapters → tico-0.2.0.dev260522/tico/quantization/wrapq/utils}/__init__.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/quantization/wrapq/utils/check_missing_qparam.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/quantization/wrapq/utils/introspection.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/quantization/wrapq/utils/metrics.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/quantization/wrapq/utils/reduce_utils.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/quantization/wrapq/utils/utils.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/quantization/wrapq/utils/version.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/quantization/wrapq/wrap_helper.py +0 -0
- {tico-0.2.0.dev260520/tico/serialize/operators/adapters/onert → tico-0.2.0.dev260522/tico/quantization/wrapq/wrappers}/__init__.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/quantization/wrapq/wrappers/fairseq/__init__.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/quantization/wrapq/wrappers/fairseq/decoder_export_single_step.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/quantization/wrapq/wrappers/fairseq/quant_decoder.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/quantization/wrapq/wrappers/fairseq/quant_decoder_layer.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/quantization/wrapq/wrappers/fairseq/quant_encoder.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/quantization/wrapq/wrappers/fairseq/quant_encoder_layer.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/quantization/wrapq/wrappers/fairseq/quant_mha.py +0 -0
- {tico-0.2.0.dev260520/tico/utils → tico-0.2.0.dev260522/tico/quantization/wrapq/wrappers/llama}/__init__.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/quantization/wrapq/wrappers/llama/export_adapters.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/quantization/wrapq/wrappers/llama/quant_attention.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/quantization/wrapq/wrappers/llama/quant_decoder_layer.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/quantization/wrapq/wrappers/llama/quant_mlp.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/quantization/wrapq/wrappers/llama/quant_model.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/quantization/wrapq/wrappers/llama/quant_model_for_causal_lm.py +0 -0
- {tico-0.2.0.dev260520/tico/utils/compat → tico-0.2.0.dev260522/tico/quantization/wrapq/wrappers/nn}/__init__.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/quantization/wrapq/wrappers/nn/quant_conv3d.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/quantization/wrapq/wrappers/nn/quant_conv3d_decomposed.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/quantization/wrapq/wrappers/nn/quant_embedding.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/quantization/wrapq/wrappers/nn/quant_layernorm.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/quantization/wrapq/wrappers/nn/quant_linear.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/quantization/wrapq/wrappers/nn/quant_silu.py +0 -0
- {tico-0.2.0.dev260520/tico/utils/mx → tico-0.2.0.dev260522/tico/quantization/wrapq/wrappers/ops}/__init__.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/quantization/wrapq/wrappers/ops/quant_rmsnorm.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/quantization/wrapq/wrappers/ptq_wrapper.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/quantization/wrapq/wrappers/quant_elementwise.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/quantization/wrapq/wrappers/quant_module_base.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/quantization/wrapq/wrappers/qwen_vl/quant_for_conditional_generation.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/quantization/wrapq/wrappers/qwen_vl/quant_model.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/quantization/wrapq/wrappers/qwen_vl/quant_text_attention.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/quantization/wrapq/wrappers/qwen_vl/quant_text_decoder_layer.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/quantization/wrapq/wrappers/qwen_vl/quant_text_mlp.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/quantization/wrapq/wrappers/qwen_vl/quant_text_model.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/quantization/wrapq/wrappers/qwen_vl/quant_vision_attention.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/quantization/wrapq/wrappers/qwen_vl/quant_vision_block.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/quantization/wrapq/wrappers/qwen_vl/quant_vision_mlp.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/quantization/wrapq/wrappers/qwen_vl/quant_vision_model.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/quantization/wrapq/wrappers/qwen_vl/quant_vision_patch_embed.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/quantization/wrapq/wrappers/qwen_vl/quant_vision_patch_merger.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/quantization/wrapq/wrappers/registry.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/serialize/circle_graph.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/serialize/circle_mapping.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/serialize/circle_serializer.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/serialize/operators/__init__.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/serialize/operators/adapters/llama_rmsnorm.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/serialize/operators/adapters/onert/llama_attention.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/serialize/operators/hashable_opcode.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/serialize/operators/node_visitor.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/serialize/operators/op_abs.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/serialize/operators/op_add.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/serialize/operators/op_alias_copy.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/serialize/operators/op_any.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/serialize/operators/op_arange_start_step.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/serialize/operators/op_argmax.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/serialize/operators/op_attention.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/serialize/operators/op_avg_pool2d.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/serialize/operators/op_bmm.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/serialize/operators/op_cat.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/serialize/operators/op_circle_shape.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/serialize/operators/op_clamp.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/serialize/operators/op_clone.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/serialize/operators/op_constant_pad_nd.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/serialize/operators/op_conv2d.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/serialize/operators/op_cos.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/serialize/operators/op_cumsum.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/serialize/operators/op_depthwise_conv2d.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/serialize/operators/op_dequantize_per_channel.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/serialize/operators/op_dequantize_per_tensor.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/serialize/operators/op_div.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/serialize/operators/op_embedding.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/serialize/operators/op_eq.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/serialize/operators/op_exp.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/serialize/operators/op_expand.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/serialize/operators/op_full.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/serialize/operators/op_full_like.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/serialize/operators/op_ge.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/serialize/operators/op_gelu.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/serialize/operators/op_gt.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/serialize/operators/op_index.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/serialize/operators/op_index_select.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/serialize/operators/op_instance_norm.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/serialize/operators/op_le.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/serialize/operators/op_leaky_relu.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/serialize/operators/op_linear.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/serialize/operators/op_log.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/serialize/operators/op_log1p.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/serialize/operators/op_logical_and.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/serialize/operators/op_logical_not.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/serialize/operators/op_lt.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/serialize/operators/op_max_dim.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/serialize/operators/op_max_pool2d_with_indices.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/serialize/operators/op_maximum.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/serialize/operators/op_mean.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/serialize/operators/op_minimum.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/serialize/operators/op_mm.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/serialize/operators/op_mul.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/serialize/operators/op_ne.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/serialize/operators/op_neg.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/serialize/operators/op_permute.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/serialize/operators/op_pow.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/serialize/operators/op_prelu.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/serialize/operators/op_quantize_per_tensor.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/serialize/operators/op_reciprocal.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/serialize/operators/op_relu.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/serialize/operators/op_relu6.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/serialize/operators/op_repeat.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/serialize/operators/op_reshape.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/serialize/operators/op_resize_nearest_neighbor.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/serialize/operators/op_rmsnorm.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/serialize/operators/op_round.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/serialize/operators/op_rsqrt.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/serialize/operators/op_scalar_tensor.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/serialize/operators/op_select_copy.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/serialize/operators/op_sigmoid.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/serialize/operators/op_sin.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/serialize/operators/op_slice.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/serialize/operators/op_softmax.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/serialize/operators/op_split_with_sizes.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/serialize/operators/op_sqrt.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/serialize/operators/op_squeeze.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/serialize/operators/op_sub.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/serialize/operators/op_sum.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/serialize/operators/op_tanh.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/serialize/operators/op_to_copy.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/serialize/operators/op_transpose_conv.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/serialize/operators/op_unsqueeze.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/serialize/operators/op_view.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/serialize/operators/op_where.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/serialize/operators/utils.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/serialize/pack.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/serialize/quant_param.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/utils/compat/torch.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/utils/compat/transformers.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/utils/convert.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/utils/define.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/utils/diff_graph.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/utils/dtype.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/utils/errors.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/utils/graph.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/utils/installed_packages.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/utils/logging.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/utils/model.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/utils/mx/elemwise_ops.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/utils/mx/formats.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/utils/mx/mx_ops.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/utils/padding.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/utils/passes.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/utils/pytree_utils.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/utils/record_input.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/utils/register_custom_op.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/utils/serialize.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/utils/signature.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/utils/trace_decorators.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/utils/utils.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/utils/validate_args_kwargs.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/utils/version.py +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico.egg-info/dependency_links.txt +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico.egg-info/entry_points.txt +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico.egg-info/requires.txt +0 -0
- {tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico.egg-info/top_level.txt +0 -0
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__version__ = "0.2.0.dev260522"
|
{tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/quantization/evaluation/mmmu_eval_utils.py
RENAMED
|
@@ -19,49 +19,64 @@ from typing import Any, Iterable
|
|
|
19
19
|
import torch
|
|
20
20
|
from datasets import load_dataset
|
|
21
21
|
|
|
22
|
-
from tico.quantization.evaluation.vlm_eval_utils import
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
"
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
"
|
|
64
|
-
|
|
22
|
+
from tico.quantization.evaluation.vlm_eval_utils import (
|
|
23
|
+
generate_answer,
|
|
24
|
+
generate_image_only_answer,
|
|
25
|
+
)
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
MMMU_DATASETS = ["MMMU/MMMU", "MMMU/MMMU_Pro"]
|
|
29
|
+
|
|
30
|
+
MMMU_SUBJECTS: dict[str, list[str]] = {
|
|
31
|
+
"MMMU/MMMU": [
|
|
32
|
+
"Accounting",
|
|
33
|
+
"Agriculture",
|
|
34
|
+
"Architecture_and_Engineering",
|
|
35
|
+
"Art",
|
|
36
|
+
"Art_Theory",
|
|
37
|
+
"Basic_Medical_Science",
|
|
38
|
+
"Biology",
|
|
39
|
+
"Chemistry",
|
|
40
|
+
"Clinical_Medicine",
|
|
41
|
+
"Computer_Science",
|
|
42
|
+
"Design",
|
|
43
|
+
"Diagnostics_and_Laboratory_Medicine",
|
|
44
|
+
"Economics",
|
|
45
|
+
"Electronics",
|
|
46
|
+
"Energy_and_Power",
|
|
47
|
+
"Finance",
|
|
48
|
+
"Geography",
|
|
49
|
+
"History",
|
|
50
|
+
"Literature",
|
|
51
|
+
"Manage",
|
|
52
|
+
"Marketing",
|
|
53
|
+
"Materials",
|
|
54
|
+
"Math",
|
|
55
|
+
"Mechanical_Engineering",
|
|
56
|
+
"Music",
|
|
57
|
+
"Pharmacy",
|
|
58
|
+
"Physics",
|
|
59
|
+
"Psychology",
|
|
60
|
+
"Public_Health",
|
|
61
|
+
"Sociology",
|
|
62
|
+
],
|
|
63
|
+
"MMMU/MMMU_Pro": [
|
|
64
|
+
"standard (10 options)",
|
|
65
|
+
"standard (4 options)",
|
|
66
|
+
"vision",
|
|
67
|
+
],
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
MMMU_SPLITS: dict[str, list[str]] = {
|
|
71
|
+
"MMMU/MMMU": [
|
|
72
|
+
"dev",
|
|
73
|
+
"validation",
|
|
74
|
+
"test",
|
|
75
|
+
],
|
|
76
|
+
"MMMU/MMMU_Pro": [
|
|
77
|
+
"test",
|
|
78
|
+
],
|
|
79
|
+
}
|
|
65
80
|
|
|
66
81
|
|
|
67
82
|
def take_from_dataset(ds, start: int, n: int) -> Iterable[dict[str, Any]]:
|
|
@@ -76,20 +91,25 @@ def take_from_dataset(ds, start: int, n: int) -> Iterable[dict[str, Any]]:
|
|
|
76
91
|
|
|
77
92
|
|
|
78
93
|
def load_data(
|
|
94
|
+
dataset: str,
|
|
79
95
|
subject: str,
|
|
80
|
-
split: str
|
|
96
|
+
split: str,
|
|
81
97
|
start: int = 0,
|
|
82
98
|
n_samples: int = -1,
|
|
83
99
|
streaming: bool = True,
|
|
84
100
|
) -> Iterable[dict[str, Any]]:
|
|
85
|
-
|
|
101
|
+
|
|
102
|
+
if dataset not in MMMU_DATASETS:
|
|
103
|
+
raise ValueError(f"Invalid dataset '{dataset}'")
|
|
104
|
+
|
|
105
|
+
if subject not in MMMU_SUBJECTS[dataset]:
|
|
86
106
|
raise ValueError(f"Invalid subject '{subject}'")
|
|
87
107
|
|
|
88
|
-
if split not in MMMU_SPLITS:
|
|
108
|
+
if split not in MMMU_SPLITS[dataset]:
|
|
89
109
|
raise ValueError(f"Invalid split '{split}'")
|
|
90
110
|
|
|
91
111
|
ds: Iterable[dict[str, Any]] = load_dataset(
|
|
92
|
-
path=
|
|
112
|
+
path=dataset,
|
|
93
113
|
name=subject,
|
|
94
114
|
split=split,
|
|
95
115
|
streaming=streaming,
|
|
@@ -109,8 +129,8 @@ def get_item_mmmu(ex: dict[str, Any]) -> dict[str, Any]:
|
|
|
109
129
|
|
|
110
130
|
return {
|
|
111
131
|
"id": ex["id"],
|
|
112
|
-
"image": ex["image_1"],
|
|
113
|
-
"question": ex["question"],
|
|
132
|
+
"image": ex["image_1"] if "image_1" in ex else ex["image"],
|
|
133
|
+
"question": ex["question"] if "question" in ex else "",
|
|
114
134
|
"choices": choices,
|
|
115
135
|
"answer": ex["answer"],
|
|
116
136
|
}
|
|
@@ -203,15 +223,30 @@ def extract_answer(generated_text: str) -> str | None:
|
|
|
203
223
|
"""
|
|
204
224
|
text = generated_text.strip()
|
|
205
225
|
|
|
206
|
-
# Look for
|
|
207
|
-
first_char_match = re.match(
|
|
226
|
+
# Look for a letter at the beginning, e.g. "A", "A.", "(A)", "A Answer".
|
|
227
|
+
first_char_match = re.match(
|
|
228
|
+
r"^\s*\(?([A-J])\)?(?:[.)\s]|$)",
|
|
229
|
+
text,
|
|
230
|
+
re.IGNORECASE,
|
|
231
|
+
)
|
|
208
232
|
if first_char_match:
|
|
209
233
|
return first_char_match.group(1).upper()
|
|
210
234
|
|
|
235
|
+
# Common verbose outputs, e.g. "The answer is C", "Answer: C", "Option C".
|
|
236
|
+
answer_match = re.search(
|
|
237
|
+
r"\b(?:answer|option|choice)\s*(?:is|:)?\s*\(?([A-J])\)?\b",
|
|
238
|
+
text,
|
|
239
|
+
re.IGNORECASE,
|
|
240
|
+
)
|
|
241
|
+
if answer_match:
|
|
242
|
+
return answer_match.group(1).upper()
|
|
243
|
+
|
|
211
244
|
return text
|
|
212
245
|
|
|
213
246
|
|
|
214
247
|
def load_few_shot_examples(
|
|
248
|
+
dataset: str,
|
|
249
|
+
split: str,
|
|
215
250
|
subject: str,
|
|
216
251
|
n_shots: int = 5,
|
|
217
252
|
) -> list[dict[str, Any]]:
|
|
@@ -219,6 +254,8 @@ def load_few_shot_examples(
|
|
|
219
254
|
Load few-shot examples for a given MMMU subject from the 'dev' split.
|
|
220
255
|
|
|
221
256
|
Args:
|
|
257
|
+
dataset: Dataset name.
|
|
258
|
+
split: Split name (e.g. 'train', 'test', 'validation').
|
|
222
259
|
subject: The subject name.
|
|
223
260
|
n_shots: Number of few-shot examples to load.
|
|
224
261
|
|
|
@@ -229,8 +266,10 @@ def load_few_shot_examples(
|
|
|
229
266
|
return []
|
|
230
267
|
|
|
231
268
|
ds = load_data(
|
|
269
|
+
dataset=dataset,
|
|
232
270
|
subject=subject,
|
|
233
|
-
split=
|
|
271
|
+
split=split,
|
|
272
|
+
start=0,
|
|
234
273
|
n_samples=n_shots,
|
|
235
274
|
streaming=True,
|
|
236
275
|
)
|
|
@@ -238,9 +277,16 @@ def load_few_shot_examples(
|
|
|
238
277
|
return [get_item_mmmu(ex) for ex in ds]
|
|
239
278
|
|
|
240
279
|
|
|
280
|
+
def is_mmmu_pro_vision(dataset: str, subject: str) -> bool:
|
|
281
|
+
return dataset == "MMMU/MMMU_Pro" and subject == "vision"
|
|
282
|
+
|
|
283
|
+
|
|
241
284
|
def evaluate_subject(
|
|
242
285
|
model,
|
|
243
286
|
processor,
|
|
287
|
+
dataset: str,
|
|
288
|
+
eval_split: str,
|
|
289
|
+
few_shot_split: str,
|
|
244
290
|
subject: str,
|
|
245
291
|
device: str | torch.device,
|
|
246
292
|
max_new_tokens: int,
|
|
@@ -255,7 +301,10 @@ def evaluate_subject(
|
|
|
255
301
|
|
|
256
302
|
Args:
|
|
257
303
|
model: Language model with generation capability.
|
|
258
|
-
|
|
304
|
+
processor: Matching processor for the model.
|
|
305
|
+
dataset: Dataset name.
|
|
306
|
+
eval_split: Split name for evaluation (e.g. 'train', 'test', 'validation').
|
|
307
|
+
few_shot_split: Split name for few-shot examples (e.g. 'train', 'test', 'validation').
|
|
259
308
|
subject: The MMMU subject to evaluate.
|
|
260
309
|
device: Device for inference.
|
|
261
310
|
n_shots: Number of few-shot examples.
|
|
@@ -267,11 +316,31 @@ def evaluate_subject(
|
|
|
267
316
|
Returns:
|
|
268
317
|
A tuple of (correct_count, total_count, skipped_count).
|
|
269
318
|
"""
|
|
270
|
-
|
|
319
|
+
vision_only = is_mmmu_pro_vision(dataset, subject)
|
|
320
|
+
if vision_only:
|
|
321
|
+
if n_shots > 0 and verbose:
|
|
322
|
+
print(
|
|
323
|
+
"\n[WARNING] MMMU-Pro vision subset is evaluated image-only; "
|
|
324
|
+
f"ignoring n_shots={n_shots}."
|
|
325
|
+
)
|
|
326
|
+
few_shot_examples: list[dict[str, Any]] = []
|
|
327
|
+
else:
|
|
328
|
+
few_shot_examples = load_few_shot_examples(
|
|
329
|
+
dataset=dataset, split=few_shot_split, subject=subject, n_shots=n_shots
|
|
330
|
+
)
|
|
331
|
+
|
|
332
|
+
# If we take few-shot examples from the same split as evaluation examples,
|
|
333
|
+
# then exclude few-shot examples from the evaluation set by adjusting start argument to load_data.
|
|
334
|
+
if few_shot_examples and eval_split == few_shot_split:
|
|
335
|
+
start = n_shots
|
|
336
|
+
else:
|
|
337
|
+
start = 0
|
|
271
338
|
|
|
272
339
|
test_data = load_data(
|
|
340
|
+
dataset=dataset,
|
|
273
341
|
subject=subject,
|
|
274
|
-
split=
|
|
342
|
+
split=eval_split,
|
|
343
|
+
start=start,
|
|
275
344
|
n_samples=n_samples,
|
|
276
345
|
streaming=True,
|
|
277
346
|
)
|
|
@@ -283,7 +352,7 @@ def evaluate_subject(
|
|
|
283
352
|
ex: dict[str, Any]
|
|
284
353
|
for ex in test_data:
|
|
285
354
|
# Skip questions with multiple images
|
|
286
|
-
if ex["image_2"] is not None:
|
|
355
|
+
if "image_2" in ex and ex["image_2"] is not None:
|
|
287
356
|
skipped += 1
|
|
288
357
|
if verbose:
|
|
289
358
|
question: str = ex["question"]
|
|
@@ -292,23 +361,59 @@ def evaluate_subject(
|
|
|
292
361
|
|
|
293
362
|
item = get_item_mmmu(ex)
|
|
294
363
|
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
364
|
+
if vision_only:
|
|
365
|
+
prompt = "<image-only>"
|
|
366
|
+
else:
|
|
367
|
+
prompt = build_few_shot_prompt(
|
|
368
|
+
question=item["question"],
|
|
369
|
+
choices=item["choices"],
|
|
370
|
+
subject=subject,
|
|
371
|
+
few_shot_examples=few_shot_examples,
|
|
372
|
+
)
|
|
301
373
|
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
374
|
+
try:
|
|
375
|
+
if vision_only:
|
|
376
|
+
generated = generate_image_only_answer(
|
|
377
|
+
model=model,
|
|
378
|
+
processor=processor,
|
|
379
|
+
image=item["image"],
|
|
380
|
+
question="Answer the multiple-choice question shown in the image. Return only one letter from A to J.",
|
|
381
|
+
device=device,
|
|
382
|
+
max_new_tokens=max_new_tokens,
|
|
383
|
+
max_seq_len=max_seq_len,
|
|
384
|
+
temperature=temperature,
|
|
385
|
+
)
|
|
386
|
+
else:
|
|
387
|
+
generated = generate_answer(
|
|
388
|
+
model=model,
|
|
389
|
+
processor=processor,
|
|
390
|
+
question=prompt,
|
|
391
|
+
image=item["image"],
|
|
392
|
+
device=device,
|
|
393
|
+
max_new_tokens=max_new_tokens,
|
|
394
|
+
max_seq_len=max_seq_len,
|
|
395
|
+
temperature=temperature,
|
|
396
|
+
)
|
|
397
|
+
except ValueError as error:
|
|
398
|
+
if "Mismatch in `image` token count between text and `input_ids`." in str(
|
|
399
|
+
error
|
|
400
|
+
):
|
|
401
|
+
if verbose:
|
|
402
|
+
print(
|
|
403
|
+
f"\n[WARNING] prompt too long for the specified max_seq_len={max_seq_len}. Skipping."
|
|
404
|
+
)
|
|
405
|
+
print(f"Error: {error}")
|
|
406
|
+
print(f"Prompt: {prompt}")
|
|
407
|
+
skipped += 1
|
|
408
|
+
continue
|
|
409
|
+
else:
|
|
410
|
+
raise error
|
|
411
|
+
except RuntimeError as error:
|
|
412
|
+
if verbose:
|
|
413
|
+
print(f"[ERROR]: {error}")
|
|
414
|
+
print(f"Prompt: {prompt}")
|
|
415
|
+
skipped += 1
|
|
416
|
+
continue
|
|
312
417
|
|
|
313
418
|
predicted = extract_answer(generated)
|
|
314
419
|
gold = item["answer"].upper()
|
|
@@ -319,7 +424,10 @@ def evaluate_subject(
|
|
|
319
424
|
|
|
320
425
|
if verbose:
|
|
321
426
|
print(f"\n[Sample {total}] Subject: {subject}")
|
|
322
|
-
|
|
427
|
+
if vision_only:
|
|
428
|
+
print("Q: <embedded in image>")
|
|
429
|
+
else:
|
|
430
|
+
print(f"Q: {item['question'][:100]}...")
|
|
323
431
|
print(f"Choices: {item['choices']}")
|
|
324
432
|
print(
|
|
325
433
|
f"Generated: {generated}, Predicted: {predicted}, Gold: {gold}, Correct: {is_correct}"
|
|
@@ -331,6 +439,7 @@ def evaluate_subject(
|
|
|
331
439
|
def evaluate_mmmu(
|
|
332
440
|
model,
|
|
333
441
|
processor,
|
|
442
|
+
dataset: str,
|
|
334
443
|
subjects: list[str] | None = None,
|
|
335
444
|
device: str | torch.device = "cuda",
|
|
336
445
|
n_shots: int = 5,
|
|
@@ -345,7 +454,8 @@ def evaluate_mmmu(
|
|
|
345
454
|
|
|
346
455
|
Args:
|
|
347
456
|
model: Language model with generation capability.
|
|
348
|
-
|
|
457
|
+
processor: Matching processor for the model.
|
|
458
|
+
dataset: Dataset name.
|
|
349
459
|
subjects: List of subjects to evaluate. Use None for all subjects.
|
|
350
460
|
device: Device for inference.
|
|
351
461
|
n_shots: Number of few-shot examples per subject.
|
|
@@ -357,8 +467,14 @@ def evaluate_mmmu(
|
|
|
357
467
|
Returns:
|
|
358
468
|
Aggregated results dictionary in '{ subject: (correct, total, skipped) }' format.
|
|
359
469
|
"""
|
|
470
|
+
if dataset not in MMMU_DATASETS:
|
|
471
|
+
raise ValueError(f"Invalid dataset '{dataset}'")
|
|
472
|
+
|
|
360
473
|
if subjects is None:
|
|
361
|
-
subjects = MMMU_SUBJECTS
|
|
474
|
+
subjects = MMMU_SUBJECTS[dataset]
|
|
475
|
+
|
|
476
|
+
eval_split = "validation" if dataset == "MMMU/MMMU" else "test"
|
|
477
|
+
few_shot_split = "test"
|
|
362
478
|
|
|
363
479
|
# { subject: (correct, total) }
|
|
364
480
|
results: dict[str, tuple[int, int, int]] = {}
|
|
@@ -370,6 +486,9 @@ def evaluate_mmmu(
|
|
|
370
486
|
correct, total, skipped = evaluate_subject(
|
|
371
487
|
model=model,
|
|
372
488
|
processor=processor,
|
|
489
|
+
dataset=dataset,
|
|
490
|
+
eval_split=eval_split,
|
|
491
|
+
few_shot_split=few_shot_split,
|
|
373
492
|
subject=subject,
|
|
374
493
|
device=device,
|
|
375
494
|
n_shots=n_shots,
|
{tico-0.2.0.dev260520 → tico-0.2.0.dev260522}/tico/quantization/evaluation/vlm_eval_utils.py
RENAMED
|
@@ -423,6 +423,85 @@ def generate_answer(
|
|
|
423
423
|
return processor.tokenizer.decode(gen_ids, skip_special_tokens=True).strip()
|
|
424
424
|
|
|
425
425
|
|
|
426
|
+
@torch.no_grad()
|
|
427
|
+
def generate_image_only_answer(
|
|
428
|
+
model,
|
|
429
|
+
processor,
|
|
430
|
+
image,
|
|
431
|
+
device: str | torch.device,
|
|
432
|
+
question: str | None = None,
|
|
433
|
+
max_new_tokens: int = 16,
|
|
434
|
+
temperature: float = 0.0,
|
|
435
|
+
max_seq_len: int | None = None,
|
|
436
|
+
) -> str:
|
|
437
|
+
"""
|
|
438
|
+
Generate an answer from the image only.
|
|
439
|
+
|
|
440
|
+
Args:
|
|
441
|
+
model: Vision-language generation model.
|
|
442
|
+
processor: Matching processor for the model.
|
|
443
|
+
image: Input image.
|
|
444
|
+
question: Optional text question.
|
|
445
|
+
device: Device on which generation should run.
|
|
446
|
+
max_new_tokens: Maximum number of generated tokens.
|
|
447
|
+
temperature: Sampling temperature. Greedy decoding is used when this
|
|
448
|
+
value is less than or equal to zero.
|
|
449
|
+
max_seq_len: Optional maximum text sequence length for processor
|
|
450
|
+
tokenization.
|
|
451
|
+
|
|
452
|
+
Returns:
|
|
453
|
+
The decoded model answer string.
|
|
454
|
+
"""
|
|
455
|
+
content: list = [{"type": "image"}]
|
|
456
|
+
|
|
457
|
+
if question is not None:
|
|
458
|
+
content.append(
|
|
459
|
+
{
|
|
460
|
+
"type": "text",
|
|
461
|
+
"text": question,
|
|
462
|
+
}
|
|
463
|
+
)
|
|
464
|
+
|
|
465
|
+
messages = [
|
|
466
|
+
{
|
|
467
|
+
"role": "user",
|
|
468
|
+
"content": content,
|
|
469
|
+
}
|
|
470
|
+
]
|
|
471
|
+
|
|
472
|
+
prompt = processor.apply_chat_template(
|
|
473
|
+
messages,
|
|
474
|
+
tokenize=False,
|
|
475
|
+
add_generation_prompt=True,
|
|
476
|
+
)
|
|
477
|
+
|
|
478
|
+
processor_kwargs: dict[str, Any] = {
|
|
479
|
+
"text": prompt,
|
|
480
|
+
"images": image,
|
|
481
|
+
"return_tensors": "pt",
|
|
482
|
+
}
|
|
483
|
+
if max_seq_len is not None and max_seq_len > 0:
|
|
484
|
+
processor_kwargs["truncation"] = True
|
|
485
|
+
processor_kwargs["max_length"] = max_seq_len
|
|
486
|
+
|
|
487
|
+
inputs = processor(**processor_kwargs)
|
|
488
|
+
inputs = move_inputs_to_device(inputs, device)
|
|
489
|
+
|
|
490
|
+
do_sample = temperature > 0.0
|
|
491
|
+
gen_kwargs: dict[str, Any] = {
|
|
492
|
+
"max_new_tokens": max_new_tokens,
|
|
493
|
+
"do_sample": do_sample,
|
|
494
|
+
}
|
|
495
|
+
if do_sample:
|
|
496
|
+
gen_kwargs["temperature"] = temperature
|
|
497
|
+
|
|
498
|
+
out_ids = model.generate(**inputs, **gen_kwargs)
|
|
499
|
+
input_len = inputs["input_ids"].shape[1]
|
|
500
|
+
gen_ids = out_ids[0, input_len:]
|
|
501
|
+
|
|
502
|
+
return processor.tokenizer.decode(gen_ids, skip_special_tokens=True).strip()
|
|
503
|
+
|
|
504
|
+
|
|
426
505
|
class CocoResult(TypedDict):
|
|
427
506
|
image_id: str
|
|
428
507
|
caption: str
|
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
# Copyright (c) 2026 Samsung Electronics Co., Ltd. All Rights Reserved
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
import argparse
|
|
16
|
+
|
|
17
|
+
import torch
|
|
18
|
+
|
|
19
|
+
from tico.quantization.recipes.adapters import get_adapter
|
|
20
|
+
from tico.quantization.recipes.config import load_recipe_config
|
|
21
|
+
from tico.quantization.recipes.context import RecipeContext
|
|
22
|
+
from tico.quantization.recipes.utils import set_seed
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def parse_args() -> argparse.Namespace:
|
|
26
|
+
parser = argparse.ArgumentParser(
|
|
27
|
+
description="Evaluate an FP or fake-quant checkpoint."
|
|
28
|
+
)
|
|
29
|
+
parser.add_argument("--config", required=True, help="Base recipe config.")
|
|
30
|
+
parser.add_argument(
|
|
31
|
+
"--checkpoint", default=None, help="Optional torch checkpoint to evaluate."
|
|
32
|
+
)
|
|
33
|
+
parser.add_argument("--model", default=None, help="Override model.name_or_path.")
|
|
34
|
+
parser.add_argument("--device", default=None, help="Override runtime.device.")
|
|
35
|
+
parser.add_argument(
|
|
36
|
+
"--tasks",
|
|
37
|
+
default=None,
|
|
38
|
+
help="Override eval tasks. LLM: lm_eval_tasks, VLM: vlm_tasks.",
|
|
39
|
+
)
|
|
40
|
+
parser.add_argument("--set", action="append", default=[], metavar="KEY=VALUE")
|
|
41
|
+
return parser.parse_args()
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def main() -> None:
|
|
45
|
+
args = parse_args()
|
|
46
|
+
overrides = list(args.set)
|
|
47
|
+
overrides.append("evaluation.enabled=true")
|
|
48
|
+
if args.model:
|
|
49
|
+
overrides.append(f"model.name_or_path={args.model}")
|
|
50
|
+
if args.device:
|
|
51
|
+
overrides.append(f"runtime.device={args.device}")
|
|
52
|
+
|
|
53
|
+
cfg = load_recipe_config(args.config, overrides=overrides)
|
|
54
|
+
set_seed(cfg.get("runtime", {}).get("seed", 42))
|
|
55
|
+
adapter = get_adapter(cfg["model"]["family"])
|
|
56
|
+
ctx = RecipeContext(cfg=cfg, adapter=adapter)
|
|
57
|
+
ctx = adapter.load_model(ctx)
|
|
58
|
+
|
|
59
|
+
if args.checkpoint:
|
|
60
|
+
ctx.model = torch.load(args.checkpoint, weights_only=False).eval()
|
|
61
|
+
|
|
62
|
+
if args.tasks:
|
|
63
|
+
if adapter.family == "llama":
|
|
64
|
+
cfg.setdefault("evaluation", {})["lm_eval_tasks"] = args.tasks
|
|
65
|
+
else:
|
|
66
|
+
cfg.setdefault("evaluation", {})["vlm_tasks"] = [
|
|
67
|
+
t.strip() for t in args.tasks.split(",") if t.strip()
|
|
68
|
+
]
|
|
69
|
+
|
|
70
|
+
adapter.evaluate(ctx)
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
if __name__ == "__main__":
|
|
74
|
+
main()
|
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
# Copyright (c) 2026 Samsung Electronics Co., Ltd. All Rights Reserved
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
import argparse
|
|
16
|
+
|
|
17
|
+
from tico.quantization.recipes.adapters import get_adapter
|
|
18
|
+
from tico.quantization.recipes.config import load_recipe_config
|
|
19
|
+
from tico.quantization.recipes.context import RecipeContext
|
|
20
|
+
from tico.quantization.recipes.debug.static_llama_runtime import (
|
|
21
|
+
run_static_llama_runtime,
|
|
22
|
+
StaticLlamaRuntimeConfig,
|
|
23
|
+
)
|
|
24
|
+
from tico.quantization.recipes.debug.tied_embedding import (
|
|
25
|
+
run_tied_embedding_smoke,
|
|
26
|
+
TiedEmbeddingSmokeConfig,
|
|
27
|
+
)
|
|
28
|
+
from tico.quantization.recipes.debug.trace import trace_ptq_parity
|
|
29
|
+
from tico.quantization.recipes.utils import set_seed
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def parse_args() -> argparse.Namespace:
|
|
33
|
+
parser = argparse.ArgumentParser(description="Inspect/debug quantization recipes.")
|
|
34
|
+
parser.add_argument("--config", required=True, help="Recipe config.")
|
|
35
|
+
parser.add_argument(
|
|
36
|
+
"--mode",
|
|
37
|
+
choices=["trace", "static-llama-runtime", "tied-embedding-smoke"],
|
|
38
|
+
default="trace",
|
|
39
|
+
)
|
|
40
|
+
parser.add_argument("--model", default=None, help="Override model.name_or_path.")
|
|
41
|
+
parser.add_argument("--device", default=None, help="Override runtime.device.")
|
|
42
|
+
parser.add_argument("--enable-quantization", action="store_true")
|
|
43
|
+
parser.add_argument("--interesting-modules", nargs="*", default=[])
|
|
44
|
+
parser.add_argument("--set", action="append", default=[], metavar="KEY=VALUE")
|
|
45
|
+
return parser.parse_args()
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def main() -> None:
|
|
49
|
+
args = parse_args()
|
|
50
|
+
overrides = list(args.set)
|
|
51
|
+
if args.model:
|
|
52
|
+
overrides.append(f"model.name_or_path={args.model}")
|
|
53
|
+
if args.device:
|
|
54
|
+
overrides.append(f"runtime.device={args.device}")
|
|
55
|
+
|
|
56
|
+
cfg = load_recipe_config(args.config, overrides=overrides)
|
|
57
|
+
set_seed(cfg.get("runtime", {}).get("seed", 42))
|
|
58
|
+
|
|
59
|
+
if args.mode == "tied-embedding-smoke":
|
|
60
|
+
smoke_cfg = TiedEmbeddingSmokeConfig(
|
|
61
|
+
**cfg.get("debug", {}).get("tied_embedding", {})
|
|
62
|
+
)
|
|
63
|
+
run_tied_embedding_smoke(smoke_cfg)
|
|
64
|
+
return
|
|
65
|
+
|
|
66
|
+
if args.mode == "static-llama-runtime":
|
|
67
|
+
runtime_cfg = StaticLlamaRuntimeConfig(
|
|
68
|
+
**cfg.get("debug", {}).get("static_llama_runtime", {})
|
|
69
|
+
)
|
|
70
|
+
run_static_llama_runtime(runtime_cfg)
|
|
71
|
+
return
|
|
72
|
+
|
|
73
|
+
adapter = get_adapter(cfg["model"]["family"])
|
|
74
|
+
ctx = RecipeContext(cfg=cfg, adapter=adapter)
|
|
75
|
+
ctx = adapter.load_model(ctx)
|
|
76
|
+
ctx.calibration_inputs = adapter.build_calibration_inputs(ctx)
|
|
77
|
+
|
|
78
|
+
if args.mode == "trace":
|
|
79
|
+
trace_ptq_parity(
|
|
80
|
+
ctx,
|
|
81
|
+
enable_quantization=args.enable_quantization,
|
|
82
|
+
interesting_modules=args.interesting_modules,
|
|
83
|
+
)
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
if __name__ == "__main__":
|
|
87
|
+
main()
|