tico 0.2.0.dev260511__tar.gz → 0.2.0.dev260512__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (345) hide show
  1. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/PKG-INFO +1 -1
  2. tico-0.2.0.dev260512/tico/_version.py +1 -0
  3. tico-0.2.0.dev260512/tico/passes/remove_unused_placeholder.py +130 -0
  4. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/config/builders.py +23 -5
  5. tico-0.2.0.dev260512/tico/quantization/config/llama_attention.py +209 -0
  6. tico-0.2.0.dev260512/tico/quantization/passes/quantize_bias.py +145 -0
  7. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/wrapq/examples/quantize_full_qmodel_with_gptq.py +129 -66
  8. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/wrapq/wrappers/llama/quant_attention.py +391 -90
  9. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/wrapq/wrappers/llama/quant_decoder_layer.py +32 -5
  10. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/wrapq/wrappers/llama/quant_model.py +22 -1
  11. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/utils/convert.py +2 -0
  12. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico.egg-info/PKG-INFO +1 -1
  13. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico.egg-info/SOURCES.txt +2 -1
  14. tico-0.2.0.dev260511/tico/_version.py +0 -1
  15. tico-0.2.0.dev260511/tico/quantization/passes/quantize_bias.py +0 -122
  16. tico-0.2.0.dev260511/tico/quantization/wrapq/examples/quantize_full_vlm_model_with_gptq.py +0 -257
  17. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/LICENSE +0 -0
  18. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/README.md +0 -0
  19. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/pyproject.toml +0 -0
  20. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/setup.cfg +0 -0
  21. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/__init__.py +0 -0
  22. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/config/__init__.py +0 -0
  23. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/config/base.py +0 -0
  24. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/config/factory.py +0 -0
  25. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/config/v1.py +0 -0
  26. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/experimental/__init__.py +0 -0
  27. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/interpreter/__init__.py +0 -0
  28. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/interpreter/infer.py +0 -0
  29. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/interpreter/interpreter.py +0 -0
  30. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/passes/__init__.py +0 -0
  31. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/passes/cast_aten_where_arg_type.py +0 -0
  32. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/passes/cast_clamp_mixed_type_args.py +0 -0
  33. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/passes/cast_mixed_type_args.py +0 -0
  34. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/passes/const_prop_pass.py +0 -0
  35. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/passes/convert_conv1d_to_conv2d.py +0 -0
  36. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/passes/convert_conv3d_to_conv2d.py +0 -0
  37. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/passes/convert_expand_to_slice_cat.py +0 -0
  38. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/passes/convert_layout_op_to_reshape.py +0 -0
  39. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/passes/convert_matmul_to_linear.py +0 -0
  40. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/passes/convert_repeat_to_expand_copy.py +0 -0
  41. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/passes/convert_sym_size_to_circle_shape.py +0 -0
  42. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/passes/convert_to_relu6.py +0 -0
  43. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/passes/decompose_addmm.py +0 -0
  44. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/passes/decompose_batch_norm.py +0 -0
  45. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/passes/decompose_fake_quantize.py +0 -0
  46. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/passes/decompose_fake_quantize_tensor_qparams.py +0 -0
  47. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/passes/decompose_group_norm.py +0 -0
  48. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/passes/decompose_grouped_conv2d.py +0 -0
  49. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/passes/decompose_slice_scatter.py +0 -0
  50. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/passes/eliminate_rank_round_trip_region.py +0 -0
  51. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/passes/extract_dtype_kwargs.py +0 -0
  52. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/passes/fill_meta_val.py +0 -0
  53. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/passes/fuse_leading_unsqueeze_reshape.py +0 -0
  54. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/passes/fuse_redundant_reshape_to_mean.py +0 -0
  55. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/passes/legalize_causal_mask_value.py +0 -0
  56. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/passes/legalize_predefined_layout_operators.py +0 -0
  57. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/passes/lower_copy.py +0 -0
  58. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/passes/lower_pow2_to_mul.py +0 -0
  59. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/passes/lower_to_resize_nearest_neighbor.py +0 -0
  60. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/passes/lower_to_slice.py +0 -0
  61. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/passes/merge_consecutive_cat.py +0 -0
  62. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/passes/ops.py +0 -0
  63. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/passes/remove_nop.py +0 -0
  64. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/passes/remove_redundant_assert_nodes.py +0 -0
  65. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/passes/remove_redundant_expand.py +0 -0
  66. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/passes/remove_redundant_permute.py +0 -0
  67. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/passes/remove_redundant_reshape.py +0 -0
  68. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/passes/remove_redundant_slice.py +0 -0
  69. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/passes/remove_redundant_to_copy.py +0 -0
  70. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/passes/restore_linear.py +0 -0
  71. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/passes/segment_index_select.py +0 -0
  72. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/pt2_to_circle.py +0 -0
  73. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/__init__.py +0 -0
  74. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/algorithm/__init__.py +0 -0
  75. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/algorithm/cle/__init__.py +0 -0
  76. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/algorithm/cle/cle.py +0 -0
  77. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/algorithm/cle/quantizer.py +0 -0
  78. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/algorithm/fpi_gptq/__init__.py +0 -0
  79. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/algorithm/fpi_gptq/fpi_gptq.py +0 -0
  80. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/algorithm/fpi_gptq/quantizer.py +0 -0
  81. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/algorithm/gptq/__init__.py +0 -0
  82. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/algorithm/gptq/gptq.py +0 -0
  83. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/algorithm/gptq/quant.py +0 -0
  84. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/algorithm/gptq/quantizer.py +0 -0
  85. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/algorithm/gptq/utils.py +0 -0
  86. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/algorithm/qwen3_vl_gptq/__init__.py +0 -0
  87. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/algorithm/qwen3_vl_gptq/gptq.py +0 -0
  88. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/algorithm/qwen3_vl_gptq/quantizer.py +0 -0
  89. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/algorithm/qwen3_vl_gptq/utils.py +0 -0
  90. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/algorithm/smoothquant/__init__.py +0 -0
  91. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/algorithm/smoothquant/observer.py +0 -0
  92. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/algorithm/smoothquant/quantizer.py +0 -0
  93. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/algorithm/smoothquant/smooth_quant.py +0 -0
  94. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/algorithm/spinquant/__init__.py +0 -0
  95. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/algorithm/spinquant/fuse_norm_utils.py +0 -0
  96. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/algorithm/spinquant/hadamard_utils.py +0 -0
  97. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/algorithm/spinquant/quantizer.py +0 -0
  98. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/algorithm/spinquant/rotation_utils.py +0 -0
  99. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/algorithm/spinquant/spin_llama.py +0 -0
  100. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/config/__init__.py +0 -0
  101. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/config/base.py +0 -0
  102. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/config/cle.py +0 -0
  103. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/config/fpi_gptq.py +0 -0
  104. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/config/gptq.py +0 -0
  105. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/config/ptq.py +0 -0
  106. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/config/qwen3_vl_gptq.py +0 -0
  107. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/config/smoothquant.py +0 -0
  108. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/config/spinquant.py +0 -0
  109. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/config/utils.py +0 -0
  110. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/evaluation/__init__.py +0 -0
  111. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/evaluation/backend.py +0 -0
  112. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/evaluation/evaluate.py +0 -0
  113. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/evaluation/executor/__init__.py +0 -0
  114. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/evaluation/executor/backend_executor.py +0 -0
  115. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/evaluation/executor/circle_executor.py +0 -0
  116. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/evaluation/executor/triv24_executor.py +0 -0
  117. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/evaluation/metric.py +0 -0
  118. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/evaluation/mmlu_eval_utils.py +0 -0
  119. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/evaluation/script/llm_tasks_eval.py +0 -0
  120. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/evaluation/script/mini_vqa_eval.py +0 -0
  121. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/evaluation/utils.py +0 -0
  122. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/evaluation/vlm_eval_utils.py +0 -0
  123. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/passes/__init__.py +0 -0
  124. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/passes/fold_quant_ops.py +0 -0
  125. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/passes/insert_quantize_on_dtype_mismatch.py +0 -0
  126. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/passes/propagate_qparam_backward.py +0 -0
  127. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/passes/propagate_qparam_forward.py +0 -0
  128. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/passes/remove_weight_dequant_op.py +0 -0
  129. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/public_interface.py +0 -0
  130. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/quantizer.py +0 -0
  131. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/quantizer_registry.py +0 -0
  132. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/wrapq/__init__.py +0 -0
  133. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/wrapq/dtypes.py +0 -0
  134. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/wrapq/examples/__init__.py +0 -0
  135. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/wrapq/examples/compare_ppl.py +0 -0
  136. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/wrapq/examples/debug_quant_outputs.py +0 -0
  137. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/wrapq/examples/evaluate_fk_llama_model.py +0 -0
  138. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/wrapq/examples/llama/__init__.py +0 -0
  139. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/wrapq/examples/llama/quantize_attention_decode.py +0 -0
  140. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/wrapq/examples/llama/quantize_attention_prefill.py +0 -0
  141. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/wrapq/examples/llama/quantize_decoder_layer_decode.py +0 -0
  142. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/wrapq/examples/llama/quantize_decoder_layer_prefill.py +0 -0
  143. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/wrapq/examples/llama/quantize_mlp.py +0 -0
  144. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/wrapq/examples/nn/__init__.py +0 -0
  145. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/wrapq/examples/nn/quantize_conv3d.py +0 -0
  146. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/wrapq/examples/nn/quantize_conv3d_special_case.py +0 -0
  147. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/wrapq/examples/nn/quantize_layernorm.py +0 -0
  148. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/wrapq/examples/nn/quantize_linear.py +0 -0
  149. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/wrapq/examples/quantize_qwen3_vl_with_gptq.py +0 -0
  150. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/wrapq/examples/quantize_with_gptq.py +0 -0
  151. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/wrapq/examples/qwen/__init__.py +0 -0
  152. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/wrapq/examples/qwen/quantize_for_conditional_generation.py +0 -0
  153. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/wrapq/examples/qwen/quantize_model.py +0 -0
  154. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/wrapq/examples/qwen/quantize_text_attention.py +0 -0
  155. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/wrapq/examples/qwen/quantize_text_decoder_layer.py +0 -0
  156. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/wrapq/examples/qwen/quantize_text_mlp.py +0 -0
  157. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/wrapq/examples/qwen/quantize_text_model.py +0 -0
  158. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/wrapq/examples/qwen/quantize_vision_attention.py +0 -0
  159. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/wrapq/examples/qwen/quantize_vision_block.py +0 -0
  160. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/wrapq/examples/qwen/quantize_vision_mlp.py +0 -0
  161. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/wrapq/examples/qwen/quantize_vision_model.py +0 -0
  162. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/wrapq/examples/qwen/quantize_vision_patch_embed.py +0 -0
  163. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/wrapq/examples/qwen/quantize_vision_patch_merger.py +0 -0
  164. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/wrapq/examples/qwen/trace_qwen.py +0 -0
  165. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/wrapq/examples/static_llama_layer_runtime.py +0 -0
  166. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/wrapq/mode.py +0 -0
  167. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/wrapq/observers/__init__.py +0 -0
  168. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/wrapq/observers/affine_base.py +0 -0
  169. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/wrapq/observers/base.py +0 -0
  170. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/wrapq/observers/ema.py +0 -0
  171. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/wrapq/observers/identity.py +0 -0
  172. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/wrapq/observers/minmax.py +0 -0
  173. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/wrapq/observers/mx.py +0 -0
  174. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/wrapq/qscheme.py +0 -0
  175. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/wrapq/quantizer.py +0 -0
  176. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/wrapq/utils/__init__.py +0 -0
  177. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/wrapq/utils/check_missing_qparam.py +0 -0
  178. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/wrapq/utils/introspection.py +0 -0
  179. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/wrapq/utils/metrics.py +0 -0
  180. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/wrapq/utils/reduce_utils.py +0 -0
  181. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/wrapq/utils/utils.py +0 -0
  182. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/wrapq/utils/version.py +0 -0
  183. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/wrapq/wrap_helper.py +0 -0
  184. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/wrapq/wrappers/__init__.py +0 -0
  185. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/wrapq/wrappers/fairseq/__init__.py +0 -0
  186. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/wrapq/wrappers/fairseq/decoder_export_single_step.py +0 -0
  187. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/wrapq/wrappers/fairseq/quant_decoder.py +0 -0
  188. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/wrapq/wrappers/fairseq/quant_decoder_layer.py +0 -0
  189. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/wrapq/wrappers/fairseq/quant_encoder.py +0 -0
  190. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/wrapq/wrappers/fairseq/quant_encoder_layer.py +0 -0
  191. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/wrapq/wrappers/fairseq/quant_mha.py +0 -0
  192. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/wrapq/wrappers/llama/__init__.py +0 -0
  193. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/wrapq/wrappers/llama/export_adapters.py +0 -0
  194. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/wrapq/wrappers/llama/quant_mlp.py +0 -0
  195. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/wrapq/wrappers/llama/quant_model_for_causal_lm.py +0 -0
  196. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/wrapq/wrappers/nn/__init__.py +0 -0
  197. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/wrapq/wrappers/nn/quant_conv3d.py +0 -0
  198. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/wrapq/wrappers/nn/quant_conv3d_decomposed.py +0 -0
  199. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/wrapq/wrappers/nn/quant_embedding.py +0 -0
  200. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/wrapq/wrappers/nn/quant_layernorm.py +0 -0
  201. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/wrapq/wrappers/nn/quant_linear.py +0 -0
  202. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/wrapq/wrappers/nn/quant_silu.py +0 -0
  203. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/wrapq/wrappers/ops/__init__.py +0 -0
  204. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/wrapq/wrappers/ops/quant_rmsnorm.py +0 -0
  205. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/wrapq/wrappers/ptq_wrapper.py +0 -0
  206. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/wrapq/wrappers/quant_elementwise.py +0 -0
  207. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/wrapq/wrappers/quant_module_base.py +0 -0
  208. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/wrapq/wrappers/qwen_vl/quant_for_conditional_generation.py +0 -0
  209. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/wrapq/wrappers/qwen_vl/quant_model.py +0 -0
  210. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/wrapq/wrappers/qwen_vl/quant_text_attention.py +0 -0
  211. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/wrapq/wrappers/qwen_vl/quant_text_decoder_layer.py +0 -0
  212. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/wrapq/wrappers/qwen_vl/quant_text_mlp.py +0 -0
  213. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/wrapq/wrappers/qwen_vl/quant_text_model.py +0 -0
  214. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/wrapq/wrappers/qwen_vl/quant_vision_attention.py +0 -0
  215. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/wrapq/wrappers/qwen_vl/quant_vision_block.py +0 -0
  216. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/wrapq/wrappers/qwen_vl/quant_vision_mlp.py +0 -0
  217. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/wrapq/wrappers/qwen_vl/quant_vision_model.py +0 -0
  218. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/wrapq/wrappers/qwen_vl/quant_vision_patch_embed.py +0 -0
  219. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/wrapq/wrappers/qwen_vl/quant_vision_patch_merger.py +0 -0
  220. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/quantization/wrapq/wrappers/registry.py +0 -0
  221. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/serialize/__init__.py +0 -0
  222. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/serialize/circle_graph.py +0 -0
  223. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/serialize/circle_mapping.py +0 -0
  224. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/serialize/circle_serializer.py +0 -0
  225. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/serialize/operators/__init__.py +0 -0
  226. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/serialize/operators/adapters/__init__.py +0 -0
  227. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/serialize/operators/adapters/llama_rmsnorm.py +0 -0
  228. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/serialize/operators/adapters/onert/__init__.py +0 -0
  229. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/serialize/operators/adapters/onert/llama_attention.py +0 -0
  230. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/serialize/operators/hashable_opcode.py +0 -0
  231. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/serialize/operators/node_visitor.py +0 -0
  232. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/serialize/operators/op_abs.py +0 -0
  233. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/serialize/operators/op_add.py +0 -0
  234. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/serialize/operators/op_alias_copy.py +0 -0
  235. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/serialize/operators/op_any.py +0 -0
  236. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/serialize/operators/op_arange_start_step.py +0 -0
  237. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/serialize/operators/op_argmax.py +0 -0
  238. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/serialize/operators/op_attention.py +0 -0
  239. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/serialize/operators/op_avg_pool2d.py +0 -0
  240. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/serialize/operators/op_bmm.py +0 -0
  241. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/serialize/operators/op_cat.py +0 -0
  242. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/serialize/operators/op_circle_shape.py +0 -0
  243. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/serialize/operators/op_clamp.py +0 -0
  244. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/serialize/operators/op_clone.py +0 -0
  245. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/serialize/operators/op_constant_pad_nd.py +0 -0
  246. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/serialize/operators/op_conv2d.py +0 -0
  247. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/serialize/operators/op_cos.py +0 -0
  248. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/serialize/operators/op_cumsum.py +0 -0
  249. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/serialize/operators/op_depthwise_conv2d.py +0 -0
  250. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/serialize/operators/op_dequantize_per_channel.py +0 -0
  251. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/serialize/operators/op_dequantize_per_tensor.py +0 -0
  252. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/serialize/operators/op_div.py +0 -0
  253. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/serialize/operators/op_embedding.py +0 -0
  254. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/serialize/operators/op_eq.py +0 -0
  255. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/serialize/operators/op_exp.py +0 -0
  256. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/serialize/operators/op_expand.py +0 -0
  257. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/serialize/operators/op_full.py +0 -0
  258. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/serialize/operators/op_full_like.py +0 -0
  259. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/serialize/operators/op_ge.py +0 -0
  260. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/serialize/operators/op_gelu.py +0 -0
  261. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/serialize/operators/op_gt.py +0 -0
  262. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/serialize/operators/op_index.py +0 -0
  263. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/serialize/operators/op_index_select.py +0 -0
  264. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/serialize/operators/op_instance_norm.py +0 -0
  265. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/serialize/operators/op_le.py +0 -0
  266. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/serialize/operators/op_leaky_relu.py +0 -0
  267. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/serialize/operators/op_linear.py +0 -0
  268. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/serialize/operators/op_log.py +0 -0
  269. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/serialize/operators/op_log1p.py +0 -0
  270. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/serialize/operators/op_logical_and.py +0 -0
  271. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/serialize/operators/op_logical_not.py +0 -0
  272. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/serialize/operators/op_lt.py +0 -0
  273. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/serialize/operators/op_max_dim.py +0 -0
  274. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/serialize/operators/op_max_pool2d_with_indices.py +0 -0
  275. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/serialize/operators/op_maximum.py +0 -0
  276. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/serialize/operators/op_mean.py +0 -0
  277. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/serialize/operators/op_minimum.py +0 -0
  278. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/serialize/operators/op_mm.py +0 -0
  279. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/serialize/operators/op_mul.py +0 -0
  280. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/serialize/operators/op_ne.py +0 -0
  281. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/serialize/operators/op_neg.py +0 -0
  282. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/serialize/operators/op_permute.py +0 -0
  283. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/serialize/operators/op_pow.py +0 -0
  284. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/serialize/operators/op_prelu.py +0 -0
  285. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/serialize/operators/op_quantize_per_tensor.py +0 -0
  286. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/serialize/operators/op_reciprocal.py +0 -0
  287. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/serialize/operators/op_relu.py +0 -0
  288. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/serialize/operators/op_relu6.py +0 -0
  289. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/serialize/operators/op_repeat.py +0 -0
  290. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/serialize/operators/op_reshape.py +0 -0
  291. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/serialize/operators/op_resize_nearest_neighbor.py +0 -0
  292. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/serialize/operators/op_rmsnorm.py +0 -0
  293. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/serialize/operators/op_round.py +0 -0
  294. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/serialize/operators/op_rsqrt.py +0 -0
  295. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/serialize/operators/op_scalar_tensor.py +0 -0
  296. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/serialize/operators/op_select_copy.py +0 -0
  297. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/serialize/operators/op_sigmoid.py +0 -0
  298. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/serialize/operators/op_sin.py +0 -0
  299. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/serialize/operators/op_slice.py +0 -0
  300. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/serialize/operators/op_softmax.py +0 -0
  301. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/serialize/operators/op_split_with_sizes.py +0 -0
  302. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/serialize/operators/op_sqrt.py +0 -0
  303. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/serialize/operators/op_squeeze.py +0 -0
  304. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/serialize/operators/op_sub.py +0 -0
  305. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/serialize/operators/op_sum.py +0 -0
  306. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/serialize/operators/op_tanh.py +0 -0
  307. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/serialize/operators/op_to_copy.py +0 -0
  308. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/serialize/operators/op_transpose_conv.py +0 -0
  309. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/serialize/operators/op_unsqueeze.py +0 -0
  310. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/serialize/operators/op_view.py +0 -0
  311. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/serialize/operators/op_where.py +0 -0
  312. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/serialize/operators/utils.py +0 -0
  313. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/serialize/pack.py +0 -0
  314. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/serialize/quant_param.py +0 -0
  315. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/utils/__init__.py +0 -0
  316. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/utils/compat/__init__.py +0 -0
  317. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/utils/compat/torch.py +0 -0
  318. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/utils/compat/transformers.py +0 -0
  319. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/utils/define.py +0 -0
  320. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/utils/diff_graph.py +0 -0
  321. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/utils/dtype.py +0 -0
  322. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/utils/errors.py +0 -0
  323. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/utils/graph.py +0 -0
  324. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/utils/installed_packages.py +0 -0
  325. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/utils/logging.py +0 -0
  326. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/utils/model.py +0 -0
  327. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/utils/mx/__init__.py +0 -0
  328. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/utils/mx/elemwise_ops.py +0 -0
  329. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/utils/mx/formats.py +0 -0
  330. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/utils/mx/mx_ops.py +0 -0
  331. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/utils/padding.py +0 -0
  332. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/utils/passes.py +0 -0
  333. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/utils/pytree_utils.py +0 -0
  334. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/utils/record_input.py +0 -0
  335. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/utils/register_custom_op.py +0 -0
  336. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/utils/serialize.py +0 -0
  337. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/utils/signature.py +0 -0
  338. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/utils/trace_decorators.py +0 -0
  339. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/utils/utils.py +0 -0
  340. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/utils/validate_args_kwargs.py +0 -0
  341. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico/utils/version.py +0 -0
  342. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico.egg-info/dependency_links.txt +0 -0
  343. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico.egg-info/entry_points.txt +0 -0
  344. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico.egg-info/requires.txt +0 -0
  345. {tico-0.2.0.dev260511 → tico-0.2.0.dev260512}/tico.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: tico
3
- Version: 0.2.0.dev260511
3
+ Version: 0.2.0.dev260512
4
4
  Summary: Convert Exported Torch Module To Circle
5
5
  License: This file provides full text of licenses used in this project
6
6
 
@@ -0,0 +1 @@
1
+ __version__ = "0.2.0.dev260512"
@@ -0,0 +1,130 @@
1
+ # Copyright (c) 2026 Samsung Electronics Co., Ltd. All Rights Reserved
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ from typing import TYPE_CHECKING
16
+
17
+ if TYPE_CHECKING:
18
+ import torch.fx
19
+
20
+ import torch
21
+ from torch._export.utils import is_buffer, is_lifted_tensor_constant, is_param
22
+ from torch.export import ExportedProgram
23
+
24
+ from tico.utils import logging
25
+ from tico.utils.passes import PassBase, PassResult
26
+ from tico.utils.trace_decorators import (
27
+ trace_const_diff_on_pass,
28
+ trace_graph_diff_on_pass,
29
+ )
30
+
31
+
32
+ def _is_constant_placeholder(
33
+ exported_program: ExportedProgram,
34
+ node: "torch.fx.Node",
35
+ ) -> bool:
36
+ """
37
+ Return whether the given placeholder represents a lifted constant.
38
+
39
+ Parameters, buffers, and lifted tensor constants are treated as constant
40
+ placeholders because they are backed by ExportedProgram state instead of
41
+ runtime user inputs.
42
+ """
43
+
44
+ if node.op != "placeholder":
45
+ return False
46
+
47
+ return (
48
+ is_param(exported_program, node)
49
+ or is_buffer(exported_program, node)
50
+ or is_lifted_tensor_constant(exported_program, node)
51
+ )
52
+
53
+
54
+ def _remove_constant_placeholder(
55
+ exported_program: ExportedProgram,
56
+ node: "torch.fx.Node",
57
+ ) -> None:
58
+ """
59
+ Remove an unused constant placeholder from the graph and ExportedProgram state.
60
+
61
+ The graph signature is updated by the caller after all unused placeholders are
62
+ removed.
63
+ """
64
+
65
+ signature = exported_program.graph_signature
66
+
67
+ if name := signature.inputs_to_parameters.get(node.name, None):
68
+ exported_program.state_dict.pop(name, None)
69
+ elif name := signature.inputs_to_lifted_tensor_constants.get(node.name, None):
70
+ exported_program.constants.pop(name, None)
71
+ elif name := signature.inputs_to_buffers.get(node.name, None):
72
+ exported_program.constants.pop(name, None)
73
+ exported_program.state_dict.pop(name, None)
74
+
75
+ exported_program.graph.erase_node(node)
76
+
77
+
78
+ @trace_graph_diff_on_pass
79
+ @trace_const_diff_on_pass
80
+ class RemoveUnusedPlaceholder(PassBase):
81
+ """
82
+ Remove unused constant placeholders from an exported graph.
83
+
84
+ FX dead-code elimination does not remove placeholder nodes even when they have
85
+ no users. This pass removes unused placeholders that correspond to parameters,
86
+ buffers, or lifted tensor constants, and then updates the ExportedProgram graph
87
+ signature accordingly.
88
+
89
+ Runtime user input placeholders are never removed by this pass.
90
+ """
91
+
92
+ def __init__(self) -> None:
93
+ super().__init__()
94
+
95
+ def call(self, exported_program: ExportedProgram) -> PassResult:
96
+ logger = logging.getLogger(__name__)
97
+
98
+ graph_module = exported_program.graph_module
99
+ graph: torch.fx.Graph = graph_module.graph
100
+
101
+ unused_placeholders = [
102
+ node
103
+ for node in graph.nodes
104
+ if _is_constant_placeholder(exported_program, node) and len(node.users) == 0
105
+ ]
106
+
107
+ if not unused_placeholders:
108
+ return PassResult(False)
109
+
110
+ removed_names = [node.name for node in unused_placeholders]
111
+
112
+ for node in unused_placeholders:
113
+ _remove_constant_placeholder(exported_program, node)
114
+
115
+ existing_name_to_spec = {
116
+ spec.arg.name: spec for spec in exported_program.graph_signature.input_specs
117
+ }
118
+ exported_program.graph_signature.input_specs = [
119
+ existing_name_to_spec[node.name]
120
+ for node in graph.nodes
121
+ if node.op == "placeholder"
122
+ ]
123
+
124
+ graph.lint()
125
+ graph_module.recompile()
126
+
127
+ logger.debug(f"Unused constant placeholders are removed: {removed_names}")
128
+
129
+ # Run only once.
130
+ return PassResult(False)
@@ -13,9 +13,13 @@
13
13
  # limitations under the License.
14
14
 
15
15
  import copy
16
- from dataclasses import dataclass, field
17
16
  from typing import Any, Dict, Mapping, Optional, Tuple, Type
18
17
 
18
+ from tico.quantization.config.llama_attention import (
19
+ DEFAULT_EXECUTION_PROFILE,
20
+ ExecutionProfile,
21
+ normalize_execution_profile,
22
+ )
19
23
  from tico.quantization.config.ptq import PTQConfig
20
24
  from tico.quantization.config.utils import auto_qscheme_for
21
25
  from tico.quantization.wrapq.dtypes import DType
@@ -336,6 +340,7 @@ def build_llm_ptq_config(
336
340
  norm_weight_bits: Optional[int] = None,
337
341
  norm_weight_dtype: Optional[DType] = None,
338
342
  strict_wrap: bool = True,
343
+ profile: ExecutionProfile = DEFAULT_EXECUTION_PROFILE,
339
344
  ) -> PTQConfig:
340
345
  """
341
346
  Build a PTQConfig for an LLM using model-family-aware override generation.
@@ -363,9 +368,7 @@ def build_llm_ptq_config(
363
368
  explicit override.
364
369
  default_observer : Type[ObserverBase], default=MinMaxObserver
365
370
  Observer class to instantiate when no explicit observer is provided
366
- via overrides.
367
- This should be a subclass of `ObserverBase` (e.g., MinMaxObserver,
368
- EMAObserver). The class itself (not an instance) must be passed.
371
+ through overrides.
369
372
  linear_weight_bits : Optional[int], default=None
370
373
  Convenience bit-width for decoder-layer linear projection weights.
371
374
  Used only when `linear_weight_dtype` is not provided.
@@ -391,6 +394,12 @@ def build_llm_ptq_config(
391
394
  strict_wrap : bool, default=True
392
395
  If True, preparing a model will raise when a required module cannot be
393
396
  wrapped.
397
+ profile : ExecutionProfile, default="npu_export"
398
+ Execution profile stored as `PTQConfig.model_args["profile"]`.
399
+ "reference_eval" selects a GPU-friendly, Hugging Face-like path.
400
+ "npu_export" preserves the existing NPU-export-oriented graph.
401
+ Advanced users may override or extend `qcfg.model_args` directly
402
+ before calling `prepare()`.
394
403
 
395
404
  Returns
396
405
  -------
@@ -402,6 +411,11 @@ def build_llm_ptq_config(
402
411
  NotImplementedError
403
412
  If the requested `model_type` is not supported.
404
413
  """
414
+ profile = normalize_execution_profile(
415
+ profile,
416
+ context="build_llm_ptq_config.profile",
417
+ )
418
+
405
419
  resolved_linear_weight_dtype = _resolve_weight_dtype(
406
420
  dtype=linear_weight_dtype,
407
421
  bits=linear_weight_bits,
@@ -438,6 +452,7 @@ def build_llm_ptq_config(
438
452
  default_qscheme=default_qscheme,
439
453
  default_observer=default_observer,
440
454
  overrides=overrides,
455
+ model_args={"profile": profile},
441
456
  strict_wrap=strict_wrap,
442
457
  )
443
458
 
@@ -448,7 +463,10 @@ def _build_qwen3_vl_norm_override(
448
463
  norm_weight_dtype: Optional[DType],
449
464
  ) -> Dict[str, Any]:
450
465
  """
451
- Build an override dictionary for Qwen3-VL norm modules (RMSNorm and LayerNorm).
466
+ Build an override dictionary for Qwen3-VL norm modules.
467
+
468
+ The generated override covers both RMSNorm-style observers used by text
469
+ modules and LayerNorm-style observers used by vision modules.
452
470
 
453
471
  Parameters
454
472
  ----------
@@ -0,0 +1,209 @@
1
+ # Copyright (c) 2026 Samsung Electronics Co., Ltd. All Rights Reserved
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ from dataclasses import dataclass, fields, replace
16
+ from typing import Any, cast, Literal, Mapping, Optional
17
+
18
+ from tico.quantization.config.ptq import PTQConfig
19
+
20
+
21
+ ExecutionProfile = Literal["reference_eval", "npu_export"]
22
+ ScaleFusion = Literal["none", "q_proj", "k_proj"]
23
+ RopeConvention = Literal["hf", "pre_negated_sin"]
24
+ AttentionLayout = Literal["batched", "unrolled"]
25
+
26
+ DEFAULT_EXECUTION_PROFILE: ExecutionProfile = "npu_export"
27
+ SUPPORTED_EXECUTION_PROFILES: tuple[ExecutionProfile, ...] = (
28
+ "reference_eval",
29
+ "npu_export",
30
+ )
31
+
32
+
33
+ @dataclass(frozen=True)
34
+ class LlamaAttentionOptions:
35
+ """
36
+ Execution options for quantized Llama attention wrappers.
37
+
38
+ These options describe graph-level implementation choices, not quantization
39
+ policy. They are intentionally read from `PTQConfig.model_args` instead of
40
+ `PTQConfig.overrides`.
41
+
42
+ Attributes
43
+ ----------
44
+ scale_fusion : ScaleFusion
45
+ Where to apply the attention scale `1 / sqrt(head_dim)`.
46
+ "none" applies it to logits at runtime, while "q_proj" and
47
+ "k_proj" fold it into the corresponding projection weights.
48
+ rope : RopeConvention
49
+ Rotary embedding sign convention. "hf" uses `rotate_half` as
50
+ `(-x2, x1)` with normal sine values. "pre_negated_sin" expects the
51
+ first half of sine values to be pre-negated and uses `(x2, x1)` in the
52
+ rotate-half operation.
53
+ layout : AttentionLayout
54
+ Attention implementation layout. "batched" is closer to the
55
+ Hugging Face implementation and is preferable for GPU evaluation.
56
+ "unrolled" preserves the NPU-export-friendly per-head loop.
57
+ """
58
+
59
+ scale_fusion: ScaleFusion = "k_proj"
60
+ rope: RopeConvention = "pre_negated_sin"
61
+ layout: AttentionLayout = "unrolled"
62
+
63
+
64
+ _PRESETS: dict[ExecutionProfile, LlamaAttentionOptions] = {
65
+ "reference_eval": LlamaAttentionOptions(
66
+ scale_fusion="none",
67
+ rope="hf",
68
+ layout="batched",
69
+ ),
70
+ "npu_export": LlamaAttentionOptions(
71
+ scale_fusion="k_proj",
72
+ rope="pre_negated_sin",
73
+ layout="unrolled",
74
+ ),
75
+ }
76
+
77
+
78
+ def normalize_execution_profile(
79
+ profile: Any,
80
+ *,
81
+ context: str = "profile",
82
+ ) -> ExecutionProfile:
83
+ """
84
+ Validate and return an execution profile string.
85
+
86
+ Parameters
87
+ ----------
88
+ profile : Any
89
+ User-provided profile value.
90
+ context : str
91
+ Human-readable location used in error messages.
92
+
93
+ Returns
94
+ -------
95
+ ExecutionProfile
96
+ Validated profile value.
97
+
98
+ Raises
99
+ ------
100
+ TypeError
101
+ If the profile value is not a string.
102
+ ValueError
103
+ If the profile string is not supported.
104
+ """
105
+ if not isinstance(profile, str):
106
+ raise TypeError(f"{context} must be a string, got {type(profile).__name__}.")
107
+ if profile not in SUPPORTED_EXECUTION_PROFILES:
108
+ raise ValueError(
109
+ f"Unsupported execution profile at {context}: {profile!r}. "
110
+ f"Supported profiles: {list(SUPPORTED_EXECUTION_PROFILES)}."
111
+ )
112
+ return cast(ExecutionProfile, profile)
113
+
114
+
115
+ def get_llama_attention_options(
116
+ qcfg: Optional[PTQConfig],
117
+ ) -> LlamaAttentionOptions:
118
+ """
119
+ Resolve Llama attention implementation options from a PTQConfig.
120
+
121
+ The root-level `model_args["profile"]` selects the default execution
122
+ profile for all profile-aware wrappers. The attention wrapper may override
123
+ that default through `model_args["attention"]`.
124
+
125
+ Supported examples are::
126
+
127
+ PTQConfig(..., model_args={"profile": "reference_eval"})
128
+
129
+ and::
130
+
131
+ PTQConfig(
132
+ ...,
133
+ model_args={
134
+ "profile": "reference_eval",
135
+ "attention": {
136
+ "layout": "unrolled",
137
+ },
138
+ },
139
+ )
140
+
141
+ `model_args["attention"]` may also be a plain profile string, for example
142
+ "npu_export". When no option is provided, the default profile is
143
+ "npu_export" to preserve the existing export-oriented graph.
144
+
145
+ Parameters
146
+ ----------
147
+ qcfg : Optional[PTQConfig]
148
+ PTQ configuration associated with the wrapper.
149
+
150
+ Returns
151
+ -------
152
+ LlamaAttentionOptions
153
+ Validated execution options.
154
+ """
155
+ if qcfg is None:
156
+ return _PRESETS[DEFAULT_EXECUTION_PROFILE]
157
+
158
+ root_profile = normalize_execution_profile(
159
+ qcfg.get_model_arg("profile", DEFAULT_EXECUTION_PROFILE),
160
+ context="PTQConfig.model_args['profile']",
161
+ )
162
+
163
+ raw_attention = qcfg.get_model_arg("attention", {})
164
+ if raw_attention is None:
165
+ raw_attention = {}
166
+ if isinstance(raw_attention, str):
167
+ raw_attention = {"profile": raw_attention}
168
+ if not isinstance(raw_attention, Mapping):
169
+ raise TypeError(
170
+ "PTQConfig.model_args['attention'] must be a mapping, a string, or None."
171
+ )
172
+
173
+ raw = dict(raw_attention)
174
+ profile = normalize_execution_profile(
175
+ raw.pop("profile", root_profile),
176
+ context="PTQConfig.model_args['attention']['profile']",
177
+ )
178
+
179
+ valid_keys = {field.name for field in fields(LlamaAttentionOptions)}
180
+ unknown_keys = sorted(set(raw) - valid_keys)
181
+ if unknown_keys:
182
+ raise ValueError(f"Unknown Llama attention option(s): {unknown_keys}.")
183
+
184
+ options = replace(_PRESETS[profile], **raw)
185
+ _validate_llama_attention_options(options)
186
+ return options
187
+
188
+
189
+ def is_npu_export_attention_options(options: LlamaAttentionOptions) -> bool:
190
+ """
191
+ Return whether the options match the NPU-export-friendly attention graph.
192
+ """
193
+ return (
194
+ options.scale_fusion == "k_proj"
195
+ and options.rope == "pre_negated_sin"
196
+ and options.layout == "unrolled"
197
+ )
198
+
199
+
200
+ def _validate_llama_attention_options(options: LlamaAttentionOptions) -> None:
201
+ """
202
+ Validate a fully resolved LlamaAttentionOptions instance.
203
+ """
204
+ if options.scale_fusion not in ("none", "q_proj", "k_proj"):
205
+ raise ValueError(f"Unsupported scale_fusion: {options.scale_fusion!r}.")
206
+ if options.rope not in ("hf", "pre_negated_sin"):
207
+ raise ValueError(f"Unsupported rope convention: {options.rope!r}.")
208
+ if options.layout not in ("batched", "unrolled"):
209
+ raise ValueError(f"Unsupported attention layout: {options.layout!r}.")
@@ -0,0 +1,145 @@
1
+ # Copyright (c) 2025 Samsung Electronics Co., Ltd. All Rights Reserved
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ from typing import Optional, Tuple, TYPE_CHECKING
16
+
17
+ if TYPE_CHECKING:
18
+ import torch.fx
19
+
20
+ import torch
21
+ from torch.export import ExportedProgram
22
+
23
+ from tico.serialize.quant_param import QPARAM_KEY, QuantParam, to_qparam_dtype
24
+ from tico.utils import logging
25
+ from tico.utils.graph import add_placeholder, get_torch_param_value, is_torch_param
26
+ from tico.utils.passes import PassBase, PassResult
27
+ from tico.utils.trace_decorators import trace_graph_diff_on_pass
28
+ from tico.utils.validate_args_kwargs import Conv2DArgs, LinearArgs
29
+
30
+
31
+ def _get_input_weight_bias_for_bias_quantization(
32
+ node: "torch.fx.Node",
33
+ ) -> Optional[Tuple["torch.fx.Node", "torch.fx.Node", "torch.fx.Node"]]:
34
+ """
35
+ Return input, weight, and bias nodes for operators whose bias can be quantized.
36
+
37
+ The returned tuple follows the common bias quantization rule where the bias
38
+ scale is computed from the input scale and the per-output-channel weight scale.
39
+ """
40
+
41
+ if node.target == torch.ops.aten.linear.default:
42
+ lin_args = LinearArgs(*node.args, **node.kwargs)
43
+ if lin_args.bias is None:
44
+ return None
45
+ return lin_args.input, lin_args.weight, lin_args.bias
46
+
47
+ if node.target in [
48
+ torch.ops.circle_custom.conv2d,
49
+ torch.ops.circle_custom.conv2d.padding,
50
+ ]:
51
+ conv_args = Conv2DArgs(*node.args, **node.kwargs)
52
+ if conv_args.bias is None:
53
+ return None
54
+ return conv_args.input, conv_args.weight, conv_args.bias
55
+
56
+ return None
57
+
58
+
59
+ @trace_graph_diff_on_pass
60
+ class QuantizeBias(PassBase):
61
+ """
62
+ Quantize bias.
63
+
64
+ This pass identifies fp32 biases, quantizes them using scales of input and weights.
65
+
66
+ This pass assumes that if bias is fp32, input and weights must have been quantized.
67
+ """
68
+
69
+ def __init__(self):
70
+ super().__init__()
71
+
72
+ def call(self, exported_program: ExportedProgram) -> PassResult:
73
+ logger = logging.getLogger(__name__)
74
+
75
+ graph_module = exported_program.graph_module
76
+ graph: torch.fx.Graph = graph_module.graph
77
+ for node in graph.nodes:
78
+ if node.op != "call_function":
79
+ continue
80
+
81
+ op_args = _get_input_weight_bias_for_bias_quantization(node)
82
+ if op_args is None:
83
+ continue
84
+
85
+ inp, weights, bias = op_args
86
+
87
+ # Only support bias is Parameter.
88
+ # TODO Is it possible that bias is not Parameter?
89
+ if not is_torch_param(bias, exported_program):
90
+ continue
91
+
92
+ bias_val: torch.Tensor = get_torch_param_value(bias, exported_program)
93
+ if bias_val.dtype != torch.float32:
94
+ continue
95
+
96
+ if QPARAM_KEY not in inp.meta:
97
+ continue
98
+
99
+ if QPARAM_KEY not in weights.meta:
100
+ continue
101
+
102
+ quant_dtype = None
103
+ if inp.meta[QPARAM_KEY].dtype == "int16":
104
+ quant_dtype = torch.int64
105
+ elif inp.meta[QPARAM_KEY].dtype == "uint8":
106
+ quant_dtype = torch.int32
107
+ else:
108
+ continue
109
+
110
+ assert quant_dtype is not None
111
+ type_info = torch.iinfo(quant_dtype)
112
+
113
+ i_scale = inp.meta[QPARAM_KEY].scale
114
+ w_scale = weights.meta[QPARAM_KEY].scale
115
+
116
+ assert i_scale is not None
117
+ assert w_scale is not None
118
+ assert len(i_scale) == 1
119
+ assert len(w_scale) == bias_val.shape[0]
120
+
121
+ bias_scale = torch.tensor(i_scale) * torch.tensor(w_scale)
122
+ q_bias = torch.round(bias_val / bias_scale)
123
+ q_bias = torch.clamp(q_bias, min=type_info.min, max=type_info.max)
124
+ q_bias = q_bias.to(quant_dtype)
125
+
126
+ q_bias_node = add_placeholder(exported_program, q_bias, bias.name)
127
+
128
+ qparam = QuantParam()
129
+ qparam.scale = bias_scale.tolist()
130
+ assert qparam.scale is not None
131
+ qparam.zero_point = [0] * len(qparam.scale)
132
+ qparam.dtype = to_qparam_dtype(quant_dtype)
133
+ qparam.quantized_dimension = 0
134
+ q_bias_node.meta[QPARAM_KEY] = qparam
135
+
136
+ node.update_arg(2, q_bias_node)
137
+
138
+ logger.debug(f"Bias ({bias.name}) is quantized to {q_bias_node.name}.")
139
+
140
+ graph.eliminate_dead_code()
141
+ graph.lint()
142
+ graph_module.recompile()
143
+
144
+ # Run only once.
145
+ return PassResult(False)