tico 0.2.0.dev260331__tar.gz → 0.2.0.dev260403__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (323) hide show
  1. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/PKG-INFO +1 -1
  2. tico-0.2.0.dev260403/tico/_version.py +1 -0
  3. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/quantization/algorithm/gptq/quantizer.py +34 -5
  4. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/quantization/config/builders.py +26 -3
  5. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/quantization/config/gptq.py +22 -1
  6. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/quantization/config/ptq.py +197 -6
  7. tico-0.2.0.dev260403/tico/quantization/config/utils.py +41 -0
  8. tico-0.2.0.dev260403/tico/quantization/wrapq/examples/qwen/quantize_model.py +357 -0
  9. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/quantization/wrapq/wrappers/llama/quant_attn_decode.py +1 -2
  10. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/quantization/wrapq/wrappers/llama/quant_attn_prefill.py +1 -2
  11. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/quantization/wrapq/wrappers/nn/quant_layernorm.py +9 -1
  12. tico-0.2.0.dev260403/tico/quantization/wrapq/wrappers/qwen_vl/quant_model.py +636 -0
  13. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/quantization/wrapq/wrappers/qwen_vl/quant_text_attn.py +2 -1
  14. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/quantization/wrapq/wrappers/qwen_vl/quant_text_model.py +2 -1
  15. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/quantization/wrapq/wrappers/registry.py +1 -0
  16. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico.egg-info/PKG-INFO +1 -1
  17. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico.egg-info/SOURCES.txt +3 -0
  18. tico-0.2.0.dev260331/tico/_version.py +0 -1
  19. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/LICENSE +0 -0
  20. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/README.md +0 -0
  21. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/pyproject.toml +0 -0
  22. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/setup.cfg +0 -0
  23. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/__init__.py +0 -0
  24. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/config/__init__.py +0 -0
  25. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/config/base.py +0 -0
  26. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/config/factory.py +0 -0
  27. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/config/v1.py +0 -0
  28. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/experimental/__init__.py +0 -0
  29. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/interpreter/__init__.py +0 -0
  30. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/interpreter/infer.py +0 -0
  31. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/interpreter/interpreter.py +0 -0
  32. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/passes/__init__.py +0 -0
  33. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/passes/cast_aten_where_arg_type.py +0 -0
  34. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/passes/cast_clamp_mixed_type_args.py +0 -0
  35. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/passes/cast_mixed_type_args.py +0 -0
  36. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/passes/const_prop_pass.py +0 -0
  37. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/passes/convert_conv1d_to_conv2d.py +0 -0
  38. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/passes/convert_conv3d_to_conv2d.py +0 -0
  39. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/passes/convert_expand_to_slice_cat.py +0 -0
  40. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/passes/convert_layout_op_to_reshape.py +0 -0
  41. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/passes/convert_matmul_to_linear.py +0 -0
  42. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/passes/convert_repeat_to_expand_copy.py +0 -0
  43. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/passes/convert_sym_size_to_circle_shape.py +0 -0
  44. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/passes/convert_to_relu6.py +0 -0
  45. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/passes/decompose_addmm.py +0 -0
  46. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/passes/decompose_batch_norm.py +0 -0
  47. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/passes/decompose_fake_quantize.py +0 -0
  48. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/passes/decompose_fake_quantize_tensor_qparams.py +0 -0
  49. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/passes/decompose_group_norm.py +0 -0
  50. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/passes/decompose_grouped_conv2d.py +0 -0
  51. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/passes/decompose_slice_scatter.py +0 -0
  52. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/passes/extract_dtype_kwargs.py +0 -0
  53. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/passes/fill_meta_val.py +0 -0
  54. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/passes/fuse_leading_unsqueeze_reshape.py +0 -0
  55. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/passes/fuse_redundant_reshape_to_mean.py +0 -0
  56. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/passes/legalize_causal_mask_value.py +0 -0
  57. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/passes/legalize_predefined_layout_operators.py +0 -0
  58. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/passes/lower_copy.py +0 -0
  59. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/passes/lower_pow2_to_mul.py +0 -0
  60. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/passes/lower_to_resize_nearest_neighbor.py +0 -0
  61. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/passes/lower_to_slice.py +0 -0
  62. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/passes/merge_consecutive_cat.py +0 -0
  63. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/passes/ops.py +0 -0
  64. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/passes/remove_nop.py +0 -0
  65. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/passes/remove_redundant_assert_nodes.py +0 -0
  66. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/passes/remove_redundant_expand.py +0 -0
  67. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/passes/remove_redundant_permute.py +0 -0
  68. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/passes/remove_redundant_reshape.py +0 -0
  69. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/passes/remove_redundant_slice.py +0 -0
  70. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/passes/remove_redundant_to_copy.py +0 -0
  71. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/passes/restore_linear.py +0 -0
  72. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/passes/segment_index_select.py +0 -0
  73. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/pt2_to_circle.py +0 -0
  74. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/quantization/__init__.py +0 -0
  75. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/quantization/algorithm/__init__.py +0 -0
  76. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/quantization/algorithm/fpi_gptq/__init__.py +0 -0
  77. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/quantization/algorithm/fpi_gptq/fpi_gptq.py +0 -0
  78. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/quantization/algorithm/fpi_gptq/quantizer.py +0 -0
  79. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/quantization/algorithm/gptq/__init__.py +0 -0
  80. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/quantization/algorithm/gptq/gptq.py +0 -0
  81. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/quantization/algorithm/gptq/quant.py +0 -0
  82. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/quantization/algorithm/gptq/utils.py +0 -0
  83. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/quantization/algorithm/qwen3_vl_gptq/__init__.py +0 -0
  84. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/quantization/algorithm/qwen3_vl_gptq/gptq.py +0 -0
  85. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/quantization/algorithm/qwen3_vl_gptq/quantizer.py +0 -0
  86. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/quantization/algorithm/qwen3_vl_gptq/utils.py +0 -0
  87. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/quantization/algorithm/smoothquant/__init__.py +0 -0
  88. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/quantization/algorithm/smoothquant/observer.py +0 -0
  89. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/quantization/algorithm/smoothquant/quantizer.py +0 -0
  90. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/quantization/algorithm/smoothquant/smooth_quant.py +0 -0
  91. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/quantization/config/__init__.py +0 -0
  92. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/quantization/config/base.py +0 -0
  93. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/quantization/config/fpi_gptq.py +0 -0
  94. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/quantization/config/qwen3_vl_gptq.py +0 -0
  95. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/quantization/config/smoothquant.py +0 -0
  96. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/quantization/evaluation/__init__.py +0 -0
  97. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/quantization/evaluation/backend.py +0 -0
  98. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/quantization/evaluation/evaluate.py +0 -0
  99. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/quantization/evaluation/executor/__init__.py +0 -0
  100. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/quantization/evaluation/executor/backend_executor.py +0 -0
  101. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/quantization/evaluation/executor/circle_executor.py +0 -0
  102. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/quantization/evaluation/executor/triv24_executor.py +0 -0
  103. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/quantization/evaluation/metric.py +0 -0
  104. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/quantization/evaluation/script/llm_tasks_eval.py +0 -0
  105. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/quantization/evaluation/script/mini_vqa_eval.py +0 -0
  106. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/quantization/evaluation/utils.py +0 -0
  107. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/quantization/evaluation/vlm_eval_utils.py +0 -0
  108. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/quantization/passes/__init__.py +0 -0
  109. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/quantization/passes/fold_quant_ops.py +0 -0
  110. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/quantization/passes/insert_quantize_on_dtype_mismatch.py +0 -0
  111. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/quantization/passes/propagate_qparam_backward.py +0 -0
  112. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/quantization/passes/propagate_qparam_forward.py +0 -0
  113. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/quantization/passes/quantize_bias.py +0 -0
  114. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/quantization/passes/remove_weight_dequant_op.py +0 -0
  115. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/quantization/public_interface.py +0 -0
  116. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/quantization/quantizer.py +0 -0
  117. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/quantization/quantizer_registry.py +0 -0
  118. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/quantization/wrapq/__init__.py +0 -0
  119. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/quantization/wrapq/dtypes.py +0 -0
  120. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/quantization/wrapq/examples/__init__.py +0 -0
  121. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/quantization/wrapq/examples/compare_ppl.py +0 -0
  122. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/quantization/wrapq/examples/debug_quant_outputs.py +0 -0
  123. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/quantization/wrapq/examples/llama/__init__.py +0 -0
  124. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/quantization/wrapq/examples/llama/quantize_attn_decode.py +0 -0
  125. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/quantization/wrapq/examples/llama/quantize_attn_prefill.py +0 -0
  126. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/quantization/wrapq/examples/llama/quantize_decoder_layer_decode.py +0 -0
  127. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/quantization/wrapq/examples/llama/quantize_decoder_layer_prefill.py +0 -0
  128. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/quantization/wrapq/examples/llama/quantize_mlp.py +0 -0
  129. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/quantization/wrapq/examples/nn/__init__.py +0 -0
  130. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/quantization/wrapq/examples/nn/quantize_conv3d.py +0 -0
  131. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/quantization/wrapq/examples/nn/quantize_conv3d_special_case.py +0 -0
  132. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/quantization/wrapq/examples/nn/quantize_linear.py +0 -0
  133. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/quantization/wrapq/examples/quantize_full_qmodel_with_gptq.py +0 -0
  134. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/quantization/wrapq/examples/quantize_full_vlm_model_with_gptq.py +0 -0
  135. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/quantization/wrapq/examples/quantize_qwen3_vl_with_gptq.py +0 -0
  136. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/quantization/wrapq/examples/quantize_with_gptq.py +0 -0
  137. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/quantization/wrapq/examples/qwen/__init__.py +0 -0
  138. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/quantization/wrapq/examples/qwen/quantize_text_attn.py +0 -0
  139. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/quantization/wrapq/examples/qwen/quantize_text_decoder_layer.py +0 -0
  140. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/quantization/wrapq/examples/qwen/quantize_text_mlp.py +0 -0
  141. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/quantization/wrapq/examples/qwen/quantize_text_model.py +0 -0
  142. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/quantization/wrapq/examples/qwen/quantize_vision_attn.py +0 -0
  143. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/quantization/wrapq/examples/qwen/quantize_vision_block.py +0 -0
  144. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/quantization/wrapq/examples/qwen/quantize_vision_mlp.py +0 -0
  145. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/quantization/wrapq/examples/qwen/quantize_vision_model.py +0 -0
  146. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/quantization/wrapq/examples/qwen/quantize_vision_patch_embed.py +0 -0
  147. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/quantization/wrapq/examples/qwen/quantize_vision_patch_merger.py +0 -0
  148. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/quantization/wrapq/examples/static_llama_layer_runtime.py +0 -0
  149. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/quantization/wrapq/mode.py +0 -0
  150. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/quantization/wrapq/observers/__init__.py +0 -0
  151. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/quantization/wrapq/observers/affine_base.py +0 -0
  152. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/quantization/wrapq/observers/base.py +0 -0
  153. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/quantization/wrapq/observers/ema.py +0 -0
  154. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/quantization/wrapq/observers/identity.py +0 -0
  155. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/quantization/wrapq/observers/minmax.py +0 -0
  156. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/quantization/wrapq/observers/mx.py +0 -0
  157. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/quantization/wrapq/qscheme.py +0 -0
  158. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/quantization/wrapq/quantizer.py +0 -0
  159. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/quantization/wrapq/utils/__init__.py +0 -0
  160. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/quantization/wrapq/utils/check_missing_qparam.py +0 -0
  161. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/quantization/wrapq/utils/introspection.py +0 -0
  162. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/quantization/wrapq/utils/metrics.py +0 -0
  163. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/quantization/wrapq/utils/reduce_utils.py +0 -0
  164. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/quantization/wrapq/utils/version.py +0 -0
  165. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/quantization/wrapq/wrap_helper.py +0 -0
  166. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/quantization/wrapq/wrappers/__init__.py +0 -0
  167. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/quantization/wrapq/wrappers/fairseq/__init__.py +0 -0
  168. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/quantization/wrapq/wrappers/fairseq/decoder_export_single_step.py +0 -0
  169. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/quantization/wrapq/wrappers/fairseq/quant_decoder.py +0 -0
  170. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/quantization/wrapq/wrappers/fairseq/quant_decoder_layer.py +0 -0
  171. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/quantization/wrapq/wrappers/fairseq/quant_encoder.py +0 -0
  172. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/quantization/wrapq/wrappers/fairseq/quant_encoder_layer.py +0 -0
  173. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/quantization/wrapq/wrappers/fairseq/quant_mha.py +0 -0
  174. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/quantization/wrapq/wrappers/llama/__init__.py +0 -0
  175. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/quantization/wrapq/wrappers/llama/quant_decoder_layer_decode.py +0 -0
  176. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/quantization/wrapq/wrappers/llama/quant_decoder_layer_prefill.py +0 -0
  177. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/quantization/wrapq/wrappers/llama/quant_mlp.py +0 -0
  178. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/quantization/wrapq/wrappers/llama/quant_model.py +0 -0
  179. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/quantization/wrapq/wrappers/llama/quant_model_for_causal_lm.py +0 -0
  180. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/quantization/wrapq/wrappers/nn/__init__.py +0 -0
  181. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/quantization/wrapq/wrappers/nn/quant_conv3d.py +0 -0
  182. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/quantization/wrapq/wrappers/nn/quant_conv3d_decomposed.py +0 -0
  183. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/quantization/wrapq/wrappers/nn/quant_embedding.py +0 -0
  184. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/quantization/wrapq/wrappers/nn/quant_linear.py +0 -0
  185. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/quantization/wrapq/wrappers/nn/quant_silu.py +0 -0
  186. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/quantization/wrapq/wrappers/ops/__init__.py +0 -0
  187. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/quantization/wrapq/wrappers/ops/quant_rmsnorm.py +0 -0
  188. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/quantization/wrapq/wrappers/ptq_wrapper.py +0 -0
  189. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/quantization/wrapq/wrappers/quant_elementwise.py +0 -0
  190. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/quantization/wrapq/wrappers/quant_module_base.py +0 -0
  191. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/quantization/wrapq/wrappers/qwen_vl/quant_text_decoder_layer.py +0 -0
  192. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/quantization/wrapq/wrappers/qwen_vl/quant_text_mlp.py +0 -0
  193. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/quantization/wrapq/wrappers/qwen_vl/quant_vision_attn.py +0 -0
  194. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/quantization/wrapq/wrappers/qwen_vl/quant_vision_block.py +0 -0
  195. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/quantization/wrapq/wrappers/qwen_vl/quant_vision_mlp.py +0 -0
  196. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/quantization/wrapq/wrappers/qwen_vl/quant_vision_model.py +0 -0
  197. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/quantization/wrapq/wrappers/qwen_vl/quant_vision_patch_embed.py +0 -0
  198. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/quantization/wrapq/wrappers/qwen_vl/quant_vision_patch_merger.py +0 -0
  199. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/serialize/__init__.py +0 -0
  200. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/serialize/circle_graph.py +0 -0
  201. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/serialize/circle_mapping.py +0 -0
  202. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/serialize/circle_serializer.py +0 -0
  203. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/serialize/operators/__init__.py +0 -0
  204. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/serialize/operators/adapters/__init__.py +0 -0
  205. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/serialize/operators/adapters/llama_rmsnorm.py +0 -0
  206. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/serialize/operators/adapters/onert/__init__.py +0 -0
  207. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/serialize/operators/adapters/onert/llama_attention.py +0 -0
  208. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/serialize/operators/hashable_opcode.py +0 -0
  209. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/serialize/operators/node_visitor.py +0 -0
  210. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/serialize/operators/op_abs.py +0 -0
  211. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/serialize/operators/op_add.py +0 -0
  212. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/serialize/operators/op_alias_copy.py +0 -0
  213. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/serialize/operators/op_any.py +0 -0
  214. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/serialize/operators/op_arange_start_step.py +0 -0
  215. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/serialize/operators/op_argmax.py +0 -0
  216. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/serialize/operators/op_attention.py +0 -0
  217. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/serialize/operators/op_avg_pool2d.py +0 -0
  218. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/serialize/operators/op_bmm.py +0 -0
  219. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/serialize/operators/op_cat.py +0 -0
  220. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/serialize/operators/op_circle_shape.py +0 -0
  221. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/serialize/operators/op_clamp.py +0 -0
  222. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/serialize/operators/op_clone.py +0 -0
  223. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/serialize/operators/op_constant_pad_nd.py +0 -0
  224. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/serialize/operators/op_conv2d.py +0 -0
  225. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/serialize/operators/op_cos.py +0 -0
  226. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/serialize/operators/op_cumsum.py +0 -0
  227. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/serialize/operators/op_depthwise_conv2d.py +0 -0
  228. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/serialize/operators/op_dequantize_per_channel.py +0 -0
  229. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/serialize/operators/op_dequantize_per_tensor.py +0 -0
  230. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/serialize/operators/op_div.py +0 -0
  231. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/serialize/operators/op_embedding.py +0 -0
  232. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/serialize/operators/op_eq.py +0 -0
  233. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/serialize/operators/op_exp.py +0 -0
  234. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/serialize/operators/op_expand.py +0 -0
  235. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/serialize/operators/op_full.py +0 -0
  236. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/serialize/operators/op_full_like.py +0 -0
  237. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/serialize/operators/op_ge.py +0 -0
  238. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/serialize/operators/op_gelu.py +0 -0
  239. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/serialize/operators/op_gt.py +0 -0
  240. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/serialize/operators/op_index.py +0 -0
  241. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/serialize/operators/op_index_select.py +0 -0
  242. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/serialize/operators/op_instance_norm.py +0 -0
  243. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/serialize/operators/op_le.py +0 -0
  244. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/serialize/operators/op_leaky_relu.py +0 -0
  245. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/serialize/operators/op_linear.py +0 -0
  246. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/serialize/operators/op_log.py +0 -0
  247. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/serialize/operators/op_log1p.py +0 -0
  248. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/serialize/operators/op_logical_and.py +0 -0
  249. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/serialize/operators/op_logical_not.py +0 -0
  250. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/serialize/operators/op_lt.py +0 -0
  251. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/serialize/operators/op_max_dim.py +0 -0
  252. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/serialize/operators/op_max_pool2d_with_indices.py +0 -0
  253. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/serialize/operators/op_maximum.py +0 -0
  254. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/serialize/operators/op_mean.py +0 -0
  255. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/serialize/operators/op_minimum.py +0 -0
  256. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/serialize/operators/op_mm.py +0 -0
  257. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/serialize/operators/op_mul.py +0 -0
  258. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/serialize/operators/op_ne.py +0 -0
  259. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/serialize/operators/op_neg.py +0 -0
  260. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/serialize/operators/op_permute.py +0 -0
  261. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/serialize/operators/op_pow.py +0 -0
  262. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/serialize/operators/op_prelu.py +0 -0
  263. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/serialize/operators/op_quantize_per_tensor.py +0 -0
  264. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/serialize/operators/op_reciprocal.py +0 -0
  265. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/serialize/operators/op_relu.py +0 -0
  266. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/serialize/operators/op_relu6.py +0 -0
  267. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/serialize/operators/op_repeat.py +0 -0
  268. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/serialize/operators/op_reshape.py +0 -0
  269. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/serialize/operators/op_resize_nearest_neighbor.py +0 -0
  270. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/serialize/operators/op_rmsnorm.py +0 -0
  271. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/serialize/operators/op_round.py +0 -0
  272. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/serialize/operators/op_rsqrt.py +0 -0
  273. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/serialize/operators/op_scalar_tensor.py +0 -0
  274. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/serialize/operators/op_select_copy.py +0 -0
  275. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/serialize/operators/op_sigmoid.py +0 -0
  276. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/serialize/operators/op_sin.py +0 -0
  277. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/serialize/operators/op_slice.py +0 -0
  278. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/serialize/operators/op_softmax.py +0 -0
  279. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/serialize/operators/op_split_with_sizes.py +0 -0
  280. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/serialize/operators/op_sqrt.py +0 -0
  281. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/serialize/operators/op_squeeze.py +0 -0
  282. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/serialize/operators/op_sub.py +0 -0
  283. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/serialize/operators/op_sum.py +0 -0
  284. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/serialize/operators/op_tanh.py +0 -0
  285. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/serialize/operators/op_to_copy.py +0 -0
  286. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/serialize/operators/op_transpose_conv.py +0 -0
  287. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/serialize/operators/op_unsqueeze.py +0 -0
  288. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/serialize/operators/op_view.py +0 -0
  289. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/serialize/operators/op_where.py +0 -0
  290. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/serialize/operators/utils.py +0 -0
  291. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/serialize/pack.py +0 -0
  292. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/serialize/quant_param.py +0 -0
  293. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/utils/__init__.py +0 -0
  294. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/utils/compat/__init__.py +0 -0
  295. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/utils/compat/torch.py +0 -0
  296. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/utils/compat/transformers.py +0 -0
  297. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/utils/convert.py +0 -0
  298. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/utils/define.py +0 -0
  299. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/utils/diff_graph.py +0 -0
  300. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/utils/dtype.py +0 -0
  301. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/utils/errors.py +0 -0
  302. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/utils/graph.py +0 -0
  303. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/utils/installed_packages.py +0 -0
  304. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/utils/logging.py +0 -0
  305. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/utils/model.py +0 -0
  306. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/utils/mx/__init__.py +0 -0
  307. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/utils/mx/elemwise_ops.py +0 -0
  308. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/utils/mx/formats.py +0 -0
  309. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/utils/mx/mx_ops.py +0 -0
  310. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/utils/padding.py +0 -0
  311. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/utils/passes.py +0 -0
  312. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/utils/pytree_utils.py +0 -0
  313. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/utils/record_input.py +0 -0
  314. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/utils/register_custom_op.py +0 -0
  315. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/utils/serialize.py +0 -0
  316. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/utils/signature.py +0 -0
  317. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/utils/trace_decorators.py +0 -0
  318. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/utils/utils.py +0 -0
  319. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico/utils/validate_args_kwargs.py +0 -0
  320. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico.egg-info/dependency_links.txt +0 -0
  321. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico.egg-info/entry_points.txt +0 -0
  322. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico.egg-info/requires.txt +0 -0
  323. {tico-0.2.0.dev260331 → tico-0.2.0.dev260403}/tico.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: tico
3
- Version: 0.2.0.dev260331
3
+ Version: 0.2.0.dev260403
4
4
  Summary: Convert Exported Torch Module To Circle
5
5
  License: This file provides full text of licenses used in this project
6
6
 
@@ -0,0 +1 @@
1
+ __version__ = "0.2.0.dev260403"
@@ -60,6 +60,31 @@ class GPTQQuantizer(BaseQuantizer):
60
60
  self._orig_layer_forward: Optional[Callable[..., Any]] = None
61
61
  self._first_layer_ref: Optional[torch.nn.Module] = None
62
62
 
63
+ def _resolve_weight_bits(
64
+ self,
65
+ gptq_conf: GPTQConfig,
66
+ *,
67
+ full_module_name: str,
68
+ local_module_name: str,
69
+ ) -> int:
70
+ """Resolve the effective bit-width for a quantized submodule."""
71
+ if full_module_name in gptq_conf.weight_bits_overrides:
72
+ return gptq_conf.weight_bits_overrides[full_module_name]
73
+
74
+ if local_module_name in gptq_conf.weight_bits_overrides:
75
+ return gptq_conf.weight_bits_overrides[local_module_name]
76
+
77
+ suffix_matches = [
78
+ bits
79
+ for pattern, bits in gptq_conf.weight_bits_overrides.items()
80
+ if full_module_name.endswith(f".{pattern}")
81
+ ]
82
+
83
+ if suffix_matches:
84
+ return suffix_matches[-1]
85
+
86
+ return gptq_conf.weight_bits
87
+
63
88
  @torch.no_grad()
64
89
  def prepare(
65
90
  self,
@@ -220,18 +245,22 @@ class GPTQQuantizer(BaseQuantizer):
220
245
  gptq: Dict[str, GPTQ] = {}
221
246
  for name in subset:
222
247
  gptq[name] = GPTQ(subset[name])
248
+ full_module_name = module_name[subset[name]]
249
+ weight_bits = self._resolve_weight_bits(
250
+ gptq_conf,
251
+ full_module_name=full_module_name,
252
+ local_module_name=name,
253
+ )
223
254
  if (
224
255
  gptq_conf.sensitivity is not None
225
256
  and isinstance(gptq_conf.sensitivity, dict)
226
- and module_name[subset[name]] in gptq_conf.sensitivity
257
+ and full_module_name in gptq_conf.sensitivity
227
258
  ):
228
- cur_sensitivity = gptq_conf.sensitivity[
229
- module_name[subset[name]]
230
- ]
259
+ cur_sensitivity = gptq_conf.sensitivity[full_module_name]
231
260
  else:
232
261
  cur_sensitivity = None
233
262
  gptq[name].quantizer.configure(
234
- bits=gptq_conf.weight_bits,
263
+ bits=weight_bits,
235
264
  perchannel=gptq_conf.perchannel,
236
265
  sym=gptq_conf.symmetric,
237
266
  mse=gptq_conf.mse,
@@ -13,10 +13,13 @@
13
13
  # limitations under the License.
14
14
 
15
15
  import copy
16
- from typing import Any, Dict, Optional, Tuple
16
+ from typing import Any, Dict, Optional, Tuple, Type
17
17
 
18
18
  from tico.quantization.config.ptq import PTQConfig, WrapperVariant
19
+ from tico.quantization.config.utils import auto_qscheme_for
19
20
  from tico.quantization.wrapq.dtypes import DType
21
+ from tico.quantization.wrapq.observers.base import ObserverBase
22
+ from tico.quantization.wrapq.observers.minmax import MinMaxObserver
20
23
  from tico.quantization.wrapq.qscheme import QScheme
21
24
 
22
25
 
@@ -121,6 +124,10 @@ def _build_weight_override(weight_dtype: Optional[DType]) -> Dict[str, Any]:
121
124
  """
122
125
  Build a weight override dictionary.
123
126
 
127
+ The override explicitly carries both dtype and qscheme so that local dtype
128
+ changes do not accidentally inherit an incompatible or less suitable
129
+ qscheme from an outer scope.
130
+
124
131
  Parameters
125
132
  ----------
126
133
  weight_dtype : Optional[DType]
@@ -134,7 +141,12 @@ def _build_weight_override(weight_dtype: Optional[DType]) -> Dict[str, Any]:
134
141
  """
135
142
  if weight_dtype is None:
136
143
  return {}
137
- return {"weight": {"dtype": weight_dtype}}
144
+ return {
145
+ "weight": {
146
+ "dtype": weight_dtype,
147
+ "qscheme": auto_qscheme_for(weight_dtype, "weight"),
148
+ }
149
+ }
138
150
 
139
151
 
140
152
  def _build_norm_override(
@@ -162,9 +174,13 @@ def _build_norm_override(
162
174
 
163
175
  if norm_dtype is not None:
164
176
  override["dtype"] = norm_dtype
177
+ override["qscheme"] = auto_qscheme_for(norm_dtype)
165
178
 
166
179
  if norm_weight_dtype is not None:
167
- override["weight"] = {"dtype": norm_weight_dtype}
180
+ override["weight"] = {
181
+ "dtype": norm_weight_dtype,
182
+ "qscheme": auto_qscheme_for(norm_weight_dtype, "weight"),
183
+ }
168
184
 
169
185
  return override
170
186
 
@@ -309,6 +325,7 @@ def build_llm_ptq_config(
309
325
  wrapper_variant: WrapperVariant = "prefill",
310
326
  activation_dtype: DType = DType.int(16),
311
327
  default_qscheme: QScheme = QScheme.PER_TENSOR_SYMM,
328
+ default_observer: Type[ObserverBase] = MinMaxObserver,
312
329
  linear_weight_bits: Optional[int] = None,
313
330
  linear_weight_dtype: Optional[DType] = None,
314
331
  embedding_weight_bits: Optional[int] = None,
@@ -346,6 +363,11 @@ def build_llm_ptq_config(
346
363
  default_qscheme : QScheme, default=QScheme.PER_TENSOR_SYMM
347
364
  Default quantization scheme for observers that do not receive an
348
365
  explicit override.
366
+ default_observer : Type[ObserverBase], default=MinMaxObserver
367
+ Observer class to instantiate when no explicit observer is provided
368
+ via overrides.
369
+ This should be a subclass of `ObserverBase` (e.g., MinMaxObserver,
370
+ EMAObserver). The class itself (not an instance) must be passed.
349
371
  linear_weight_bits : Optional[int], default=None
350
372
  Convenience bit-width for decoder-layer linear projection weights.
351
373
  Used only when `linear_weight_dtype` is not provided.
@@ -416,6 +438,7 @@ def build_llm_ptq_config(
416
438
  return PTQConfig(
417
439
  default_dtype=activation_dtype,
418
440
  default_qscheme=default_qscheme,
441
+ default_observer=default_observer,
419
442
  wrapper_variant=wrapper_variant,
420
443
  overrides=overrides,
421
444
  strict_wrap=strict_wrap,
@@ -12,7 +12,7 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
- from dataclasses import dataclass
15
+ from dataclasses import dataclass, field
16
16
 
17
17
  import torch
18
18
 
@@ -23,6 +23,21 @@ from tico.quantization.config.base import BaseConfig
23
23
  class GPTQConfig(BaseConfig):
24
24
  """
25
25
  Configuration for GPTQ weight quantization.
26
+
27
+ Attributes
28
+ ----------
29
+ weight_bits : int
30
+ Default bit-width applied to quantized weights.
31
+ weight_bits_overrides : dict[str, int]
32
+ Optional per-module bit-width overrides.
33
+
34
+ Supported keys are matched in the following order:
35
+ 1) Full module name, for example `model.layers.0.self_attn.o_proj`
36
+ 2) Layer-local module name, for example `self_attn.o_proj`
37
+ 3) Full-name suffix, for example `self_attn.o_proj` or `down_proj`
38
+
39
+ This makes it possible to keep a default bit-width for most modules
40
+ while selectively increasing precision for specific projections.
26
41
  """
27
42
 
28
43
  # general
@@ -31,6 +46,7 @@ class GPTQConfig(BaseConfig):
31
46
 
32
47
  # quantizer.configure params (weight quantization spec)
33
48
  weight_bits: int = 8
49
+ weight_bits_overrides: dict[str, int] = field(default_factory=dict)
34
50
  perchannel: bool = True
35
51
  symmetric: bool = False
36
52
  mse: str | None = None
@@ -49,6 +65,11 @@ class GPTQConfig(BaseConfig):
49
65
  def validate(self) -> None:
50
66
  if self.weight_bits <= 0:
51
67
  raise ValueError(f"weight_bits must be positive. got {self.weight_bits}")
68
+ for module_name, bits in self.weight_bits_overrides.items():
69
+ if bits <= 0:
70
+ raise ValueError(
71
+ f"weight_bits_overrides[{module_name!r}] must be positive. got {bits}"
72
+ )
52
73
  if self.groupsize != -1 and self.groupsize <= 0:
53
74
  raise ValueError(f"groupsize must be -1 or positive. got {self.groupsize}")
54
75
  if not (0.0 < self.percdamp <= 1.0):
@@ -12,10 +12,12 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
+ from copy import deepcopy
15
16
  from dataclasses import dataclass, field
16
- from typing import Any, Dict, Literal, Mapping, Type
17
+ from typing import Any, Dict, Iterable, Literal, Mapping, MutableMapping, Optional, Type
17
18
 
18
19
  from tico.quantization.config.base import BaseConfig
20
+ from tico.quantization.config.utils import auto_qscheme_for, dtype_is_unsigned
19
21
  from tico.quantization.wrapq.dtypes import DType
20
22
  from tico.quantization.wrapq.observers.base import ObserverBase
21
23
  from tico.quantization.wrapq.observers.minmax import MinMaxObserver
@@ -24,6 +26,92 @@ from tico.quantization.wrapq.qscheme import QScheme
24
26
  WrapperVariant = Literal["common", "prefill", "decode"]
25
27
 
26
28
 
29
+ def _resolve_qscheme(
30
+ *,
31
+ dtype: DType,
32
+ qscheme: Optional[QScheme],
33
+ context: str,
34
+ obs_name: Optional[str] = None,
35
+ ) -> QScheme:
36
+ """
37
+ Resolve a dtype/qscheme pair using the option-C policy.
38
+
39
+ Resolution policy:
40
+ 1. If `qscheme` is None, infer it from `dtype` and `obs_name`.
41
+ 2. If the caller explicitly provides an incompatible pair, raise.
42
+ """
43
+ resolved_qscheme = qscheme or auto_qscheme_for(dtype, obs_name)
44
+
45
+ if dtype_is_unsigned(dtype) and resolved_qscheme.is_symmetric():
46
+ raise ValueError(
47
+ f"Invalid quantization config at {context}: unsigned dtype "
48
+ f"{dtype!r} cannot be paired with symmetric qscheme "
49
+ f"{resolved_qscheme!r}."
50
+ )
51
+
52
+ return resolved_qscheme
53
+
54
+
55
+ def _normalize_overrides(
56
+ mapping: Mapping[str, Any],
57
+ *,
58
+ inherited_dtype: DType,
59
+ inherited_qscheme: QScheme,
60
+ context: str,
61
+ current_name: Optional[str] = None,
62
+ ) -> Dict[str, Any]:
63
+ """
64
+ Recursively normalize and validate nested override mappings.
65
+
66
+ Any node that provides `dtype` but omits `qscheme` receives an inferred
67
+ qscheme derived from that dtype. Explicit incompatible pairs are rejected
68
+ immediately.
69
+
70
+ The current mapping key is tracked as `current_name` so that special
71
+ observer names such as `weight` can receive a more suitable automatic
72
+ default qscheme.
73
+ """
74
+ normalized: Dict[str, Any] = dict(mapping)
75
+
76
+ local_dtype = normalized.get("dtype", inherited_dtype)
77
+ local_qscheme = normalized.get("qscheme", inherited_qscheme)
78
+
79
+ if "dtype" in normalized:
80
+ normalized["qscheme"] = _resolve_qscheme(
81
+ dtype=local_dtype,
82
+ qscheme=normalized.get("qscheme"),
83
+ context=context,
84
+ obs_name=current_name,
85
+ )
86
+ local_qscheme = normalized["qscheme"]
87
+ elif "qscheme" in normalized:
88
+ local_qscheme = _resolve_qscheme(
89
+ dtype=local_dtype,
90
+ qscheme=normalized["qscheme"],
91
+ context=context,
92
+ obs_name=current_name,
93
+ )
94
+ else:
95
+ _resolve_qscheme(
96
+ dtype=local_dtype,
97
+ qscheme=local_qscheme,
98
+ context=context,
99
+ obs_name=current_name,
100
+ )
101
+
102
+ for key, value in list(normalized.items()):
103
+ if isinstance(value, Mapping):
104
+ normalized[key] = _normalize_overrides(
105
+ value,
106
+ inherited_dtype=local_dtype,
107
+ inherited_qscheme=local_qscheme,
108
+ context=f"{context}.{key}",
109
+ current_name=key,
110
+ )
111
+
112
+ return normalized
113
+
114
+
27
115
  @dataclass
28
116
  class PTQConfig(BaseConfig):
29
117
  """
@@ -38,10 +126,19 @@ class PTQConfig(BaseConfig):
38
126
  default_observer : Type[ObserverBase], optional
39
127
  Observer class to instantiate when the caller (or an override) does
40
128
  not provide a `observer` key.
41
- default_qscheme : QScheme
42
- Fallback quantization scheme (per-tensor / per-channel,
43
- asymmetric / symmetric) for observers that DO NOT receive an explicit
44
- override.
129
+ default_qscheme : Optional[QScheme]
130
+ Fallback quantization scheme for observers that do not receive an
131
+ explicit override.
132
+
133
+ When set to `None`, the qscheme is inferred automatically from the
134
+ effective dtype and, for special observer names such as `weight`,
135
+ from the observer role:
136
+ - unsigned activation-like dtype -> `QScheme.PER_TENSOR_ASYMM`
137
+ - unsigned weight dtype -> `QScheme.PER_CHANNEL_ASYMM`
138
+ - signed dtype -> `QScheme.PER_TENSOR_SYMM`
139
+
140
+ When explicitly provided, the pair is validated. Incompatible pairs,
141
+ such as unsigned dtype with symmetric qscheme, raise immediately.
45
142
  wrapper_variant : str
46
143
  Execution specialization used when resolving quantization wrappers.
47
144
 
@@ -114,17 +211,111 @@ class PTQConfig(BaseConfig):
114
211
 
115
212
  default_dtype: DType = DType.uint(8)
116
213
  default_observer: Type[ObserverBase] = MinMaxObserver # type: ignore[type-abstract]
117
- default_qscheme: QScheme = QScheme.PER_TENSOR_ASYMM
214
+ default_qscheme: Optional[QScheme] = None
118
215
  wrapper_variant: WrapperVariant = "common"
119
216
  overrides: Mapping[str, Mapping[str, Any]] = field(default_factory=dict)
120
217
  model_args: Mapping[str, Any] = field(default_factory=dict)
121
218
  # If True, any module that cannot be wrapped will raise.
122
219
  strict_wrap: bool = True
123
220
 
221
+ def __post_init__(self) -> None:
222
+ """
223
+ Resolve automatic qscheme defaults and validate nested overrides.
224
+ """
225
+ self.default_qscheme = _resolve_qscheme(
226
+ dtype=self.default_dtype,
227
+ qscheme=self.default_qscheme,
228
+ context="PTQConfig.default_qscheme",
229
+ )
230
+ self.normalize_overrides()
231
+
124
232
  @property
125
233
  def name(self) -> str:
126
234
  return "ptq"
127
235
 
236
+ def normalize_overrides(self) -> None:
237
+ """
238
+ Normalize and validate the entire override tree in-place.
239
+
240
+ This method is useful when callers directly mutate `self.overrides`
241
+ after construction and want to retroactively apply automatic qscheme
242
+ inference and compatibility checks.
243
+ """
244
+ assert self.default_qscheme is not None
245
+ self.overrides = _normalize_overrides(
246
+ self.overrides,
247
+ inherited_dtype=self.default_dtype,
248
+ inherited_qscheme=self.default_qscheme,
249
+ context="PTQConfig.overrides",
250
+ )
251
+
252
+ def set_override(
253
+ self,
254
+ path: Iterable[str],
255
+ value: Mapping[str, Any],
256
+ ) -> None:
257
+ """
258
+ Set a nested override and normalize only the affected subtree.
259
+
260
+ Parameters
261
+ ----------
262
+ path : Iterable[str]
263
+ Hierarchical path inside `self.overrides`.
264
+ Example: `("model", "layers", "0", "self_attn", "o_proj", "weight")`
265
+ value : Mapping[str, Any]
266
+ Override payload to assign at the target path.
267
+
268
+ Notes
269
+ -----
270
+ The inserted subtree is normalized immediately, so callers may provide
271
+ only `dtype` and rely on automatic qscheme inference.
272
+ """
273
+ keys = tuple(path)
274
+ if not keys:
275
+ raise ValueError("Override path must not be empty.")
276
+
277
+ root: MutableMapping[str, Any] = dict(self.overrides)
278
+ current: MutableMapping[str, Any] = root
279
+ parent_dtype = self.default_dtype
280
+ parent_qscheme = self.default_qscheme
281
+ context = "PTQConfig.overrides"
282
+
283
+ for key in keys[:-1]:
284
+ context = f"{context}.{key}"
285
+ next_value = current.get(key)
286
+ if isinstance(next_value, Mapping):
287
+ child = dict(next_value)
288
+ elif next_value is None:
289
+ child = {}
290
+ else:
291
+ raise ValueError(
292
+ f"Cannot create nested override under non-mapping node at {context}."
293
+ )
294
+
295
+ current[key] = child
296
+ current = child
297
+
298
+ local_dtype = current.get("dtype", parent_dtype)
299
+ parent_qscheme = _resolve_qscheme(
300
+ dtype=local_dtype,
301
+ qscheme=current.get("qscheme", parent_qscheme),
302
+ context=context,
303
+ obs_name=key,
304
+ )
305
+ parent_dtype = local_dtype
306
+
307
+ assert parent_qscheme is not None
308
+ leaf_key = keys[-1]
309
+ leaf_context = f"{context}.{leaf_key}"
310
+ current[leaf_key] = _normalize_overrides(
311
+ deepcopy(value),
312
+ inherited_dtype=parent_dtype,
313
+ inherited_qscheme=parent_qscheme,
314
+ context=leaf_context,
315
+ current_name=leaf_key,
316
+ )
317
+ self.overrides = root
318
+
128
319
  def get_kwargs(self, obs_name: str) -> Dict[str, Any]:
129
320
  """
130
321
  Return user-specified kwargs for *obs_name* inside **this** wrapper.
@@ -0,0 +1,41 @@
1
+ # Copyright (c) 2026 Samsung Electronics Co., Ltd. All Rights Reserved
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ from typing import Optional
16
+
17
+ from tico.quantization.wrapq.dtypes import DType
18
+ from tico.quantization.wrapq.qscheme import QScheme
19
+
20
+
21
+ def dtype_is_unsigned(dtype: DType) -> bool:
22
+ """
23
+ Return True when the dtype is unsigned.
24
+ """
25
+ return not dtype.signed
26
+
27
+
28
+ def auto_qscheme_for(dtype: DType, obs_name: Optional[str] = None) -> QScheme:
29
+ """
30
+ Choose the default qscheme associated with a dtype and observer name.
31
+
32
+ Default policy:
33
+ - signed dtype -> symmetric per-tensor
34
+ - unsigned dtype -> asymmetric per-tensor
35
+ - unsigned weight -> asymmetric per-channel
36
+ """
37
+ if dtype_is_unsigned(dtype):
38
+ if obs_name == "weight":
39
+ return QScheme.PER_CHANNEL_ASYMM
40
+ return QScheme.PER_TENSOR_ASYMM
41
+ return QScheme.PER_TENSOR_SYMM