tico 0.2.0.dev260411__tar.gz → 0.2.0.dev260415__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (333) hide show
  1. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/PKG-INFO +1 -1
  2. tico-0.2.0.dev260415/tico/_version.py +1 -0
  3. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/quantization/algorithm/gptq/quantizer.py +25 -3
  4. tico-0.2.0.dev260415/tico/quantization/wrapq/examples/evaluate_fk_llama_model.py +156 -0
  5. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/quantization/wrapq/examples/quantize_full_qmodel_with_gptq.py +58 -10
  6. tico-0.2.0.dev260415/tico/quantization/wrapq/examples/qwen/trace_qwen.py +1159 -0
  7. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/quantization/wrapq/wrappers/qwen_vl/quant_model.py +66 -6
  8. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/utils/utils.py +21 -0
  9. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico.egg-info/PKG-INFO +1 -1
  10. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico.egg-info/SOURCES.txt +2 -0
  11. tico-0.2.0.dev260411/tico/_version.py +0 -1
  12. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/LICENSE +0 -0
  13. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/README.md +0 -0
  14. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/pyproject.toml +0 -0
  15. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/setup.cfg +0 -0
  16. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/__init__.py +0 -0
  17. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/config/__init__.py +0 -0
  18. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/config/base.py +0 -0
  19. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/config/factory.py +0 -0
  20. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/config/v1.py +0 -0
  21. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/experimental/__init__.py +0 -0
  22. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/interpreter/__init__.py +0 -0
  23. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/interpreter/infer.py +0 -0
  24. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/interpreter/interpreter.py +0 -0
  25. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/passes/__init__.py +0 -0
  26. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/passes/cast_aten_where_arg_type.py +0 -0
  27. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/passes/cast_clamp_mixed_type_args.py +0 -0
  28. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/passes/cast_mixed_type_args.py +0 -0
  29. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/passes/const_prop_pass.py +0 -0
  30. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/passes/convert_conv1d_to_conv2d.py +0 -0
  31. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/passes/convert_conv3d_to_conv2d.py +0 -0
  32. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/passes/convert_expand_to_slice_cat.py +0 -0
  33. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/passes/convert_layout_op_to_reshape.py +0 -0
  34. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/passes/convert_matmul_to_linear.py +0 -0
  35. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/passes/convert_repeat_to_expand_copy.py +0 -0
  36. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/passes/convert_sym_size_to_circle_shape.py +0 -0
  37. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/passes/convert_to_relu6.py +0 -0
  38. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/passes/decompose_addmm.py +0 -0
  39. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/passes/decompose_batch_norm.py +0 -0
  40. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/passes/decompose_fake_quantize.py +0 -0
  41. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/passes/decompose_fake_quantize_tensor_qparams.py +0 -0
  42. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/passes/decompose_group_norm.py +0 -0
  43. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/passes/decompose_grouped_conv2d.py +0 -0
  44. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/passes/decompose_slice_scatter.py +0 -0
  45. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/passes/extract_dtype_kwargs.py +0 -0
  46. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/passes/fill_meta_val.py +0 -0
  47. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/passes/fuse_leading_unsqueeze_reshape.py +0 -0
  48. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/passes/fuse_redundant_reshape_to_mean.py +0 -0
  49. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/passes/legalize_causal_mask_value.py +0 -0
  50. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/passes/legalize_predefined_layout_operators.py +0 -0
  51. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/passes/lower_copy.py +0 -0
  52. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/passes/lower_pow2_to_mul.py +0 -0
  53. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/passes/lower_to_resize_nearest_neighbor.py +0 -0
  54. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/passes/lower_to_slice.py +0 -0
  55. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/passes/merge_consecutive_cat.py +0 -0
  56. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/passes/ops.py +0 -0
  57. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/passes/remove_nop.py +0 -0
  58. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/passes/remove_redundant_assert_nodes.py +0 -0
  59. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/passes/remove_redundant_expand.py +0 -0
  60. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/passes/remove_redundant_permute.py +0 -0
  61. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/passes/remove_redundant_reshape.py +0 -0
  62. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/passes/remove_redundant_slice.py +0 -0
  63. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/passes/remove_redundant_to_copy.py +0 -0
  64. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/passes/restore_linear.py +0 -0
  65. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/passes/segment_index_select.py +0 -0
  66. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/pt2_to_circle.py +0 -0
  67. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/quantization/__init__.py +0 -0
  68. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/quantization/algorithm/__init__.py +0 -0
  69. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/quantization/algorithm/fpi_gptq/__init__.py +0 -0
  70. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/quantization/algorithm/fpi_gptq/fpi_gptq.py +0 -0
  71. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/quantization/algorithm/fpi_gptq/quantizer.py +0 -0
  72. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/quantization/algorithm/gptq/__init__.py +0 -0
  73. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/quantization/algorithm/gptq/gptq.py +0 -0
  74. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/quantization/algorithm/gptq/quant.py +0 -0
  75. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/quantization/algorithm/gptq/utils.py +0 -0
  76. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/quantization/algorithm/qwen3_vl_gptq/__init__.py +0 -0
  77. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/quantization/algorithm/qwen3_vl_gptq/gptq.py +0 -0
  78. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/quantization/algorithm/qwen3_vl_gptq/quantizer.py +0 -0
  79. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/quantization/algorithm/qwen3_vl_gptq/utils.py +0 -0
  80. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/quantization/algorithm/smoothquant/__init__.py +0 -0
  81. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/quantization/algorithm/smoothquant/observer.py +0 -0
  82. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/quantization/algorithm/smoothquant/quantizer.py +0 -0
  83. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/quantization/algorithm/smoothquant/smooth_quant.py +0 -0
  84. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/quantization/algorithm/spinquant/__init__.py +0 -0
  85. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/quantization/algorithm/spinquant/fuse_norm_utils.py +0 -0
  86. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/quantization/algorithm/spinquant/hadamard_utils.py +0 -0
  87. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/quantization/algorithm/spinquant/quantizer.py +0 -0
  88. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/quantization/algorithm/spinquant/rotation_utils.py +0 -0
  89. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/quantization/algorithm/spinquant/spin_llama.py +0 -0
  90. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/quantization/config/__init__.py +0 -0
  91. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/quantization/config/base.py +0 -0
  92. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/quantization/config/builders.py +0 -0
  93. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/quantization/config/fpi_gptq.py +0 -0
  94. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/quantization/config/gptq.py +0 -0
  95. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/quantization/config/ptq.py +0 -0
  96. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/quantization/config/qwen3_vl_gptq.py +0 -0
  97. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/quantization/config/smoothquant.py +0 -0
  98. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/quantization/config/spinquant.py +0 -0
  99. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/quantization/config/utils.py +0 -0
  100. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/quantization/evaluation/__init__.py +0 -0
  101. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/quantization/evaluation/backend.py +0 -0
  102. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/quantization/evaluation/evaluate.py +0 -0
  103. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/quantization/evaluation/executor/__init__.py +0 -0
  104. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/quantization/evaluation/executor/backend_executor.py +0 -0
  105. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/quantization/evaluation/executor/circle_executor.py +0 -0
  106. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/quantization/evaluation/executor/triv24_executor.py +0 -0
  107. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/quantization/evaluation/metric.py +0 -0
  108. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/quantization/evaluation/script/llm_tasks_eval.py +0 -0
  109. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/quantization/evaluation/script/mini_vqa_eval.py +0 -0
  110. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/quantization/evaluation/utils.py +0 -0
  111. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/quantization/evaluation/vlm_eval_utils.py +0 -0
  112. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/quantization/passes/__init__.py +0 -0
  113. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/quantization/passes/fold_quant_ops.py +0 -0
  114. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/quantization/passes/insert_quantize_on_dtype_mismatch.py +0 -0
  115. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/quantization/passes/propagate_qparam_backward.py +0 -0
  116. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/quantization/passes/propagate_qparam_forward.py +0 -0
  117. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/quantization/passes/quantize_bias.py +0 -0
  118. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/quantization/passes/remove_weight_dequant_op.py +0 -0
  119. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/quantization/public_interface.py +0 -0
  120. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/quantization/quantizer.py +0 -0
  121. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/quantization/quantizer_registry.py +0 -0
  122. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/quantization/wrapq/__init__.py +0 -0
  123. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/quantization/wrapq/dtypes.py +0 -0
  124. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/quantization/wrapq/examples/__init__.py +0 -0
  125. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/quantization/wrapq/examples/compare_ppl.py +0 -0
  126. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/quantization/wrapq/examples/debug_quant_outputs.py +0 -0
  127. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/quantization/wrapq/examples/llama/__init__.py +0 -0
  128. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/quantization/wrapq/examples/llama/quantize_attn_decode.py +0 -0
  129. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/quantization/wrapq/examples/llama/quantize_attn_prefill.py +0 -0
  130. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/quantization/wrapq/examples/llama/quantize_decoder_layer_decode.py +0 -0
  131. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/quantization/wrapq/examples/llama/quantize_decoder_layer_prefill.py +0 -0
  132. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/quantization/wrapq/examples/llama/quantize_mlp.py +0 -0
  133. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/quantization/wrapq/examples/nn/__init__.py +0 -0
  134. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/quantization/wrapq/examples/nn/quantize_conv3d.py +0 -0
  135. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/quantization/wrapq/examples/nn/quantize_conv3d_special_case.py +0 -0
  136. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/quantization/wrapq/examples/nn/quantize_linear.py +0 -0
  137. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/quantization/wrapq/examples/quantize_full_vlm_model_with_gptq.py +0 -0
  138. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/quantization/wrapq/examples/quantize_qwen3_vl_with_gptq.py +0 -0
  139. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/quantization/wrapq/examples/quantize_with_gptq.py +0 -0
  140. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/quantization/wrapq/examples/qwen/__init__.py +0 -0
  141. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/quantization/wrapq/examples/qwen/quantize_for_conditional_generation.py +0 -0
  142. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/quantization/wrapq/examples/qwen/quantize_model.py +0 -0
  143. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/quantization/wrapq/examples/qwen/quantize_text_attn.py +0 -0
  144. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/quantization/wrapq/examples/qwen/quantize_text_decoder_layer.py +0 -0
  145. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/quantization/wrapq/examples/qwen/quantize_text_mlp.py +0 -0
  146. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/quantization/wrapq/examples/qwen/quantize_text_model.py +0 -0
  147. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/quantization/wrapq/examples/qwen/quantize_vision_attn.py +0 -0
  148. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/quantization/wrapq/examples/qwen/quantize_vision_block.py +0 -0
  149. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/quantization/wrapq/examples/qwen/quantize_vision_mlp.py +0 -0
  150. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/quantization/wrapq/examples/qwen/quantize_vision_model.py +0 -0
  151. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/quantization/wrapq/examples/qwen/quantize_vision_patch_embed.py +0 -0
  152. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/quantization/wrapq/examples/qwen/quantize_vision_patch_merger.py +0 -0
  153. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/quantization/wrapq/examples/static_llama_layer_runtime.py +0 -0
  154. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/quantization/wrapq/mode.py +0 -0
  155. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/quantization/wrapq/observers/__init__.py +0 -0
  156. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/quantization/wrapq/observers/affine_base.py +0 -0
  157. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/quantization/wrapq/observers/base.py +0 -0
  158. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/quantization/wrapq/observers/ema.py +0 -0
  159. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/quantization/wrapq/observers/identity.py +0 -0
  160. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/quantization/wrapq/observers/minmax.py +0 -0
  161. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/quantization/wrapq/observers/mx.py +0 -0
  162. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/quantization/wrapq/qscheme.py +0 -0
  163. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/quantization/wrapq/quantizer.py +0 -0
  164. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/quantization/wrapq/utils/__init__.py +0 -0
  165. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/quantization/wrapq/utils/check_missing_qparam.py +0 -0
  166. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/quantization/wrapq/utils/introspection.py +0 -0
  167. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/quantization/wrapq/utils/metrics.py +0 -0
  168. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/quantization/wrapq/utils/reduce_utils.py +0 -0
  169. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/quantization/wrapq/utils/version.py +0 -0
  170. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/quantization/wrapq/wrap_helper.py +0 -0
  171. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/quantization/wrapq/wrappers/__init__.py +0 -0
  172. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/quantization/wrapq/wrappers/fairseq/__init__.py +0 -0
  173. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/quantization/wrapq/wrappers/fairseq/decoder_export_single_step.py +0 -0
  174. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/quantization/wrapq/wrappers/fairseq/quant_decoder.py +0 -0
  175. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/quantization/wrapq/wrappers/fairseq/quant_decoder_layer.py +0 -0
  176. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/quantization/wrapq/wrappers/fairseq/quant_encoder.py +0 -0
  177. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/quantization/wrapq/wrappers/fairseq/quant_encoder_layer.py +0 -0
  178. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/quantization/wrapq/wrappers/fairseq/quant_mha.py +0 -0
  179. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/quantization/wrapq/wrappers/llama/__init__.py +0 -0
  180. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/quantization/wrapq/wrappers/llama/export_adapters.py +0 -0
  181. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/quantization/wrapq/wrappers/llama/quant_attention.py +0 -0
  182. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/quantization/wrapq/wrappers/llama/quant_decoder_layer.py +0 -0
  183. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/quantization/wrapq/wrappers/llama/quant_mlp.py +0 -0
  184. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/quantization/wrapq/wrappers/llama/quant_model.py +0 -0
  185. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/quantization/wrapq/wrappers/llama/quant_model_for_causal_lm.py +0 -0
  186. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/quantization/wrapq/wrappers/nn/__init__.py +0 -0
  187. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/quantization/wrapq/wrappers/nn/quant_conv3d.py +0 -0
  188. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/quantization/wrapq/wrappers/nn/quant_conv3d_decomposed.py +0 -0
  189. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/quantization/wrapq/wrappers/nn/quant_embedding.py +0 -0
  190. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/quantization/wrapq/wrappers/nn/quant_layernorm.py +0 -0
  191. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/quantization/wrapq/wrappers/nn/quant_linear.py +0 -0
  192. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/quantization/wrapq/wrappers/nn/quant_silu.py +0 -0
  193. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/quantization/wrapq/wrappers/ops/__init__.py +0 -0
  194. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/quantization/wrapq/wrappers/ops/quant_rmsnorm.py +0 -0
  195. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/quantization/wrapq/wrappers/ptq_wrapper.py +0 -0
  196. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/quantization/wrapq/wrappers/quant_elementwise.py +0 -0
  197. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/quantization/wrapq/wrappers/quant_module_base.py +0 -0
  198. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/quantization/wrapq/wrappers/qwen_vl/quant_for_conditional_generation.py +0 -0
  199. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/quantization/wrapq/wrappers/qwen_vl/quant_text_attn.py +0 -0
  200. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/quantization/wrapq/wrappers/qwen_vl/quant_text_decoder_layer.py +0 -0
  201. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/quantization/wrapq/wrappers/qwen_vl/quant_text_mlp.py +0 -0
  202. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/quantization/wrapq/wrappers/qwen_vl/quant_text_model.py +0 -0
  203. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/quantization/wrapq/wrappers/qwen_vl/quant_vision_attn.py +0 -0
  204. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/quantization/wrapq/wrappers/qwen_vl/quant_vision_block.py +0 -0
  205. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/quantization/wrapq/wrappers/qwen_vl/quant_vision_mlp.py +0 -0
  206. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/quantization/wrapq/wrappers/qwen_vl/quant_vision_model.py +0 -0
  207. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/quantization/wrapq/wrappers/qwen_vl/quant_vision_patch_embed.py +0 -0
  208. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/quantization/wrapq/wrappers/qwen_vl/quant_vision_patch_merger.py +0 -0
  209. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/quantization/wrapq/wrappers/registry.py +0 -0
  210. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/serialize/__init__.py +0 -0
  211. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/serialize/circle_graph.py +0 -0
  212. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/serialize/circle_mapping.py +0 -0
  213. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/serialize/circle_serializer.py +0 -0
  214. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/serialize/operators/__init__.py +0 -0
  215. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/serialize/operators/adapters/__init__.py +0 -0
  216. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/serialize/operators/adapters/llama_rmsnorm.py +0 -0
  217. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/serialize/operators/adapters/onert/__init__.py +0 -0
  218. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/serialize/operators/adapters/onert/llama_attention.py +0 -0
  219. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/serialize/operators/hashable_opcode.py +0 -0
  220. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/serialize/operators/node_visitor.py +0 -0
  221. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/serialize/operators/op_abs.py +0 -0
  222. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/serialize/operators/op_add.py +0 -0
  223. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/serialize/operators/op_alias_copy.py +0 -0
  224. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/serialize/operators/op_any.py +0 -0
  225. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/serialize/operators/op_arange_start_step.py +0 -0
  226. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/serialize/operators/op_argmax.py +0 -0
  227. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/serialize/operators/op_attention.py +0 -0
  228. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/serialize/operators/op_avg_pool2d.py +0 -0
  229. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/serialize/operators/op_bmm.py +0 -0
  230. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/serialize/operators/op_cat.py +0 -0
  231. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/serialize/operators/op_circle_shape.py +0 -0
  232. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/serialize/operators/op_clamp.py +0 -0
  233. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/serialize/operators/op_clone.py +0 -0
  234. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/serialize/operators/op_constant_pad_nd.py +0 -0
  235. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/serialize/operators/op_conv2d.py +0 -0
  236. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/serialize/operators/op_cos.py +0 -0
  237. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/serialize/operators/op_cumsum.py +0 -0
  238. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/serialize/operators/op_depthwise_conv2d.py +0 -0
  239. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/serialize/operators/op_dequantize_per_channel.py +0 -0
  240. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/serialize/operators/op_dequantize_per_tensor.py +0 -0
  241. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/serialize/operators/op_div.py +0 -0
  242. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/serialize/operators/op_embedding.py +0 -0
  243. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/serialize/operators/op_eq.py +0 -0
  244. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/serialize/operators/op_exp.py +0 -0
  245. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/serialize/operators/op_expand.py +0 -0
  246. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/serialize/operators/op_full.py +0 -0
  247. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/serialize/operators/op_full_like.py +0 -0
  248. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/serialize/operators/op_ge.py +0 -0
  249. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/serialize/operators/op_gelu.py +0 -0
  250. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/serialize/operators/op_gt.py +0 -0
  251. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/serialize/operators/op_index.py +0 -0
  252. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/serialize/operators/op_index_select.py +0 -0
  253. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/serialize/operators/op_instance_norm.py +0 -0
  254. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/serialize/operators/op_le.py +0 -0
  255. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/serialize/operators/op_leaky_relu.py +0 -0
  256. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/serialize/operators/op_linear.py +0 -0
  257. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/serialize/operators/op_log.py +0 -0
  258. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/serialize/operators/op_log1p.py +0 -0
  259. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/serialize/operators/op_logical_and.py +0 -0
  260. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/serialize/operators/op_logical_not.py +0 -0
  261. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/serialize/operators/op_lt.py +0 -0
  262. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/serialize/operators/op_max_dim.py +0 -0
  263. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/serialize/operators/op_max_pool2d_with_indices.py +0 -0
  264. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/serialize/operators/op_maximum.py +0 -0
  265. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/serialize/operators/op_mean.py +0 -0
  266. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/serialize/operators/op_minimum.py +0 -0
  267. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/serialize/operators/op_mm.py +0 -0
  268. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/serialize/operators/op_mul.py +0 -0
  269. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/serialize/operators/op_ne.py +0 -0
  270. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/serialize/operators/op_neg.py +0 -0
  271. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/serialize/operators/op_permute.py +0 -0
  272. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/serialize/operators/op_pow.py +0 -0
  273. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/serialize/operators/op_prelu.py +0 -0
  274. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/serialize/operators/op_quantize_per_tensor.py +0 -0
  275. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/serialize/operators/op_reciprocal.py +0 -0
  276. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/serialize/operators/op_relu.py +0 -0
  277. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/serialize/operators/op_relu6.py +0 -0
  278. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/serialize/operators/op_repeat.py +0 -0
  279. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/serialize/operators/op_reshape.py +0 -0
  280. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/serialize/operators/op_resize_nearest_neighbor.py +0 -0
  281. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/serialize/operators/op_rmsnorm.py +0 -0
  282. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/serialize/operators/op_round.py +0 -0
  283. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/serialize/operators/op_rsqrt.py +0 -0
  284. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/serialize/operators/op_scalar_tensor.py +0 -0
  285. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/serialize/operators/op_select_copy.py +0 -0
  286. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/serialize/operators/op_sigmoid.py +0 -0
  287. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/serialize/operators/op_sin.py +0 -0
  288. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/serialize/operators/op_slice.py +0 -0
  289. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/serialize/operators/op_softmax.py +0 -0
  290. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/serialize/operators/op_split_with_sizes.py +0 -0
  291. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/serialize/operators/op_sqrt.py +0 -0
  292. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/serialize/operators/op_squeeze.py +0 -0
  293. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/serialize/operators/op_sub.py +0 -0
  294. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/serialize/operators/op_sum.py +0 -0
  295. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/serialize/operators/op_tanh.py +0 -0
  296. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/serialize/operators/op_to_copy.py +0 -0
  297. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/serialize/operators/op_transpose_conv.py +0 -0
  298. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/serialize/operators/op_unsqueeze.py +0 -0
  299. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/serialize/operators/op_view.py +0 -0
  300. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/serialize/operators/op_where.py +0 -0
  301. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/serialize/operators/utils.py +0 -0
  302. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/serialize/pack.py +0 -0
  303. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/serialize/quant_param.py +0 -0
  304. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/utils/__init__.py +0 -0
  305. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/utils/compat/__init__.py +0 -0
  306. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/utils/compat/torch.py +0 -0
  307. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/utils/compat/transformers.py +0 -0
  308. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/utils/convert.py +0 -0
  309. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/utils/define.py +0 -0
  310. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/utils/diff_graph.py +0 -0
  311. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/utils/dtype.py +0 -0
  312. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/utils/errors.py +0 -0
  313. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/utils/graph.py +0 -0
  314. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/utils/installed_packages.py +0 -0
  315. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/utils/logging.py +0 -0
  316. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/utils/model.py +0 -0
  317. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/utils/mx/__init__.py +0 -0
  318. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/utils/mx/elemwise_ops.py +0 -0
  319. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/utils/mx/formats.py +0 -0
  320. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/utils/mx/mx_ops.py +0 -0
  321. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/utils/padding.py +0 -0
  322. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/utils/passes.py +0 -0
  323. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/utils/pytree_utils.py +0 -0
  324. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/utils/record_input.py +0 -0
  325. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/utils/register_custom_op.py +0 -0
  326. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/utils/serialize.py +0 -0
  327. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/utils/signature.py +0 -0
  328. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/utils/trace_decorators.py +0 -0
  329. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico/utils/validate_args_kwargs.py +0 -0
  330. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico.egg-info/dependency_links.txt +0 -0
  331. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico.egg-info/entry_points.txt +0 -0
  332. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico.egg-info/requires.txt +0 -0
  333. {tico-0.2.0.dev260411 → tico-0.2.0.dev260415}/tico.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: tico
3
- Version: 0.2.0.dev260411
3
+ Version: 0.2.0.dev260415
4
4
  Summary: Convert Exported Torch Module To Circle
5
5
  License: This file provides full text of licenses used in this project
6
6
 
@@ -0,0 +1 @@
1
+ __version__ = "0.2.0.dev260415"
@@ -28,6 +28,11 @@ from tico.quantization.algorithm.gptq.utils import (
28
28
  from tico.quantization.config.gptq import GPTQConfig
29
29
  from tico.quantization.quantizer import BaseQuantizer
30
30
  from tico.quantization.quantizer_registry import register_quantizer
31
+ from tico.utils.utils import move_to_device
32
+
33
+
34
+ def move_to_cpu(obj):
35
+ return move_to_device(obj, "cpu")
31
36
 
32
37
 
33
38
  class StopForward(Exception):
@@ -118,12 +123,12 @@ class GPTQQuantizer(BaseQuantizer):
118
123
  for idx, item in enumerate(args):
119
124
  if (idx + 1) > len(self.cache_args):
120
125
  self.cache_args.append([])
121
- self.cache_args[idx].append(item)
126
+ self.cache_args[idx].append(move_to_cpu(item))
122
127
  # Store keyword args
123
128
  for k, v in kwargs.items():
124
129
  if self.cache_kwargs.get(k, None) is None:
125
130
  self.cache_kwargs[k] = []
126
- self.cache_kwargs[k].append(v)
131
+ self.cache_kwargs[k].append(move_to_cpu(v))
127
132
 
128
133
  self.num_batches += 1
129
134
  raise StopForward # stop after the first layer
@@ -280,6 +285,7 @@ class GPTQQuantizer(BaseQuantizer):
280
285
 
281
286
  # Run layer forward over all cached batches to build Hessian/statistics
282
287
  batch_num = self.num_batches
288
+ device = next(model.parameters()).device
283
289
  for batch_idx in tqdm(
284
290
  range(batch_num),
285
291
  desc=f"[L{l_idx}] collecting",
@@ -290,9 +296,13 @@ class GPTQQuantizer(BaseQuantizer):
290
296
  cache_args_batch = gather_single_batch_from_list(
291
297
  self.cache_args, batch_idx
292
298
  )
299
+ cache_args_batch = move_to_device(cache_args_batch, device)
300
+
293
301
  cache_kwargs_batch = gather_single_batch_from_dict(
294
302
  self.cache_kwargs, batch_idx
295
303
  )
304
+ cache_kwargs_batch = move_to_device(cache_kwargs_batch, device)
305
+
296
306
  layer(*cache_args_batch, **cache_kwargs_batch)
297
307
 
298
308
  # Remove handles
@@ -314,6 +324,7 @@ class GPTQQuantizer(BaseQuantizer):
314
324
  gptq[name].free()
315
325
 
316
326
  # 4) After quantization, re-run the layer to produce outputs for the next layer
327
+ device = next(model.parameters()).device
317
328
  for batch_idx in tqdm(
318
329
  range(batch_num),
319
330
  desc=f"[L{l_idx}] re-forward",
@@ -324,9 +335,13 @@ class GPTQQuantizer(BaseQuantizer):
324
335
  cache_args_batch = gather_single_batch_from_list(
325
336
  self.cache_args, batch_idx
326
337
  )
338
+ cache_args_batch = move_to_device(cache_args_batch, device)
339
+
327
340
  cache_kwargs_batch = gather_single_batch_from_dict(
328
341
  self.cache_kwargs, batch_idx
329
342
  )
343
+ cache_kwargs_batch = move_to_device(cache_kwargs_batch, device)
344
+
330
345
  outs = layer(*cache_args_batch, **cache_kwargs_batch)
331
346
  # LLaMA's decoder layer return type differs across Transformers versions:
332
347
  # some return a tuple (hidden_states, ...), others return just a tensor.
@@ -334,7 +349,14 @@ class GPTQQuantizer(BaseQuantizer):
334
349
  outs = outs[0] if isinstance(outs, tuple) else outs
335
350
  # Update inputs for next iteration.
336
351
  if len(self.cache_args) > 0:
337
- self.cache_args[0][batch_idx] = outs
352
+ if hasattr(outs, "to") and hasattr(
353
+ self.cache_args[0][batch_idx], "device"
354
+ ):
355
+ self.cache_args[0][batch_idx] = outs.to(
356
+ self.cache_args[0][batch_idx].device
357
+ )
358
+ else:
359
+ self.cache_args[0][batch_idx] = outs
338
360
 
339
361
  if torch.cuda.is_available():
340
362
  torch.cuda.empty_cache()
@@ -0,0 +1,156 @@
1
+ # Copyright (c) 2026 Samsung Electronics Co., Ltd. All Rights Reserved
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ import argparse
16
+
17
+ import torch
18
+
19
+ from lm_eval.utils import make_table
20
+
21
+ from transformers import AutoModelForCausalLM, AutoTokenizer
22
+
23
+ from tico.quantization.evaluation.script.llm_tasks_eval import evaluate_llm_on_tasks
24
+
25
+ DTYPE_MAP = {
26
+ "float32": torch.float32,
27
+ # TODO Support more dtypes
28
+ # "bfloat16": torch.bfloat16,
29
+ # "float16": torch.float16,
30
+ }
31
+
32
+
33
+ def main():
34
+ parser = argparse.ArgumentParser(
35
+ description="Evaluate a fake-quantized Llama model"
36
+ )
37
+ parser.add_argument(
38
+ "--model", type=str, required=True, help="HF repo name or local path."
39
+ )
40
+ parser.add_argument(
41
+ "--device",
42
+ type=str,
43
+ default="cuda" if torch.cuda.is_available() else "cpu",
44
+ help="Device to run on (cuda|cpu|mps).",
45
+ )
46
+ parser.add_argument(
47
+ "--dtype",
48
+ choices=list(DTYPE_MAP.keys()),
49
+ default="float32",
50
+ help="Model dtype for load.",
51
+ )
52
+ parser.add_argument(
53
+ "--hf-token",
54
+ type=str,
55
+ default=None,
56
+ help="Optional HF token for gated/private repos.",
57
+ )
58
+ parser.add_argument(
59
+ "--trust-remote-code",
60
+ action="store_true",
61
+ help="Enable only if you trust the model repo code.",
62
+ )
63
+ parser.add_argument(
64
+ "--cache_dir",
65
+ type=str,
66
+ default=None,
67
+ help="cache_dir for using model/datasets loading",
68
+ )
69
+ parser.add_argument(
70
+ "--fk_model_path", type=str, required=True, help="Path to fake_quantized model"
71
+ )
72
+ parser.add_argument(
73
+ "--eval_tasks",
74
+ type=str,
75
+ default=None,
76
+ help="tasks to be evaluated using lm_eval, e.g. `winogrande,arc_easy,arc_challenge,openbookqa,mmlu_pro,ifeval,bbh`",
77
+ )
78
+ parser.add_argument(
79
+ "--skip_fp_eval",
80
+ action="store_true",
81
+ help="Skip original model evaluation.",
82
+ )
83
+
84
+ args = parser.parse_args()
85
+ print(args)
86
+
87
+ # -------------------------------------------------------------------------
88
+ # Basic setup
89
+ # -------------------------------------------------------------------------
90
+ device = torch.device(args.device)
91
+ dtype = DTYPE_MAP[args.dtype]
92
+
93
+ print("=== Config ===")
94
+ print(f"Model : {args.model}")
95
+ print(f"Device : {device.type}")
96
+ print(f"DType : {args.dtype}")
97
+ print(f"fk_model_path : {args.fk_model_path}")
98
+ print()
99
+
100
+ tokenizer = AutoTokenizer.from_pretrained(
101
+ args.model,
102
+ trust_remote_code=args.trust_remote_code,
103
+ token=args.hf_token,
104
+ cache_dir=args.cache_dir,
105
+ )
106
+
107
+ if not args.skip_fp_eval:
108
+
109
+ # -------------------------------------------------------------------------
110
+ # FP model evaluation
111
+ # -------------------------------------------------------------------------
112
+ print("Loading FP model …")
113
+ model = (
114
+ AutoModelForCausalLM.from_pretrained(
115
+ args.model,
116
+ dtype=dtype,
117
+ trust_remote_code=args.trust_remote_code,
118
+ token=args.hf_token,
119
+ cache_dir=args.cache_dir,
120
+ )
121
+ .cpu()
122
+ .eval()
123
+ )
124
+
125
+ if args.eval_tasks is not None:
126
+ config = model.config
127
+ max_seq_len = config.max_position_embeddings
128
+ results = evaluate_llm_on_tasks(
129
+ model, tokenizer, args.eval_tasks, max_length=max_seq_len
130
+ )
131
+ print("Original RESULTS ARE:")
132
+ print(make_table(results))
133
+
134
+ model = model.cpu()
135
+ if device.type == "cuda" and torch.cuda.is_available():
136
+ torch.cuda.empty_cache()
137
+
138
+ # -------------------------------------------------------------------------
139
+ # FK model evaluation
140
+ # -------------------------------------------------------------------------
141
+ print("Loading fake quantized model …")
142
+ fk_model = torch.load(args.fk_model_path, weights_only=False).eval().to(args.device)
143
+
144
+ if args.eval_tasks is not None:
145
+ config = fk_model.wrapped.config
146
+ max_seq_len = config.max_position_embeddings
147
+
148
+ results = evaluate_llm_on_tasks(
149
+ fk_model, tokenizer, args.eval_tasks, max_length=max_seq_len
150
+ )
151
+ print("Quantized RESULTS ARE:")
152
+ print(make_table(results))
153
+
154
+
155
+ if __name__ == "__main__":
156
+ main()
@@ -217,9 +217,43 @@ def evaluate(q_m, tokenizer, dataset_test, args):
217
217
  print(make_table(results))
218
218
 
219
219
 
220
+ def get_sensitivities_info_name(model, dataset, seed, n_samples):
221
+ model_name = model.config.name_or_path.replace("/", "_")
222
+
223
+ name = (
224
+ "."
225
+ + "/sensitivities_for_"
226
+ + model_name
227
+ + "_"
228
+ + dataset
229
+ + "_"
230
+ + str(n_samples)
231
+ + "_"
232
+ + str(seed)
233
+ + ".pt"
234
+ )
235
+ return name
236
+
237
+
238
+ def get_ptq_model_name(model, args):
239
+ model_name = model.config.name_or_path.replace("/", "_")
240
+
241
+ name = (
242
+ f"PTQ_{model_name}_"
243
+ + ("SpinQuant_" if args.no_spinquant is False else "")
244
+ + ("GPTQ_" if args.no_GPTQ is False else "")
245
+ + (f"{args.gptq_mse}_" if args.no_GPTQ is False else "")
246
+ + str(args.nsamples_for_qcalibration)
247
+ + "_"
248
+ + str(args.seed)
249
+ + ".pt"
250
+ )
251
+ return name
252
+
253
+
220
254
  def main():
221
255
  parser = argparse.ArgumentParser(
222
- description="GPTQ+PTQ pipeline (weight-only + activation)"
256
+ description="GPTQ+PTQ pipeline (weight-only + activation)",
223
257
  )
224
258
  parser.add_argument(
225
259
  "--model", type=str, required=True, help="HF repo name or local path."
@@ -270,16 +304,17 @@ def main():
270
304
  help="Leave model float",
271
305
  )
272
306
  parser.add_argument(
273
- "--save_circle_to_folder",
307
+ "--output_dir",
274
308
  type=str,
275
309
  default=None,
276
- help="Save the whole model to the folder specified",
310
+ help="Save specified artifacts to output_dir",
277
311
  )
278
312
  parser.add_argument(
279
- "--save_layers_to_folder",
313
+ "--save",
314
+ nargs="*",
280
315
  type=str,
281
- default=None,
282
- help="Save all layers to the folder specified",
316
+ choices=["circle_full", "circle_per_layer", "ptq_checkpoint", "sensitivity"],
317
+ help="which artifacts should be saved to output_dir",
283
318
  )
284
319
  parser.add_argument(
285
320
  "--cache_dir",
@@ -439,6 +474,13 @@ def main():
439
474
  else:
440
475
  calibrator = SensitivityCalibrator(model, calib_inputs)
441
476
  sens = calibrator.compute_sensitivity_info()
477
+ if args.output_dir is not None and "sensitivity" in args.save:
478
+ save_name = get_sensitivities_info_name(
479
+ model, "wikitext", args.seed, len(calib_inputs)
480
+ )
481
+ save_path = pathlib.Path(args.output_dir, save_name)
482
+ print(f"Saving calibrated_sensitivities to {save_path}")
483
+ torch.save(sens, save_path)
442
484
 
443
485
  gptq_config = GPTQConfig(
444
486
  weight_bits=args.linear_weight_bits,
@@ -461,15 +503,21 @@ def main():
461
503
  if not args.no_PTQ:
462
504
  q_m = quantize_using_PTQ(q_m, calib_inputs, args)
463
505
 
506
+ if args.output_dir is not None and "ptq_checkpoint" in args.save:
507
+ save_name = get_ptq_model_name(model, args)
508
+ save_path = pathlib.Path(args.output_dir, save_name)
509
+ print(f"Saving PTQ model to {save_path}")
510
+ torch.save(q_m, save_path)
511
+
464
512
  # after PTQ quantizer only fixed-length input sequences are valid
465
513
  evaluate(q_m, tokenizer, dataset_test, args)
466
514
 
467
- if args.save_layers_to_folder is not None:
468
- save_layers_to(q_m, args.max_seq_len, args.save_layers_to_folder)
515
+ if args.output_dir is not None and "circle_per_layer" in args.save:
516
+ save_layers_to(q_m, args.max_seq_len, args.output_dir)
469
517
 
470
- if args.save_circle_to_folder is not None:
518
+ if args.output_dir is not None and "circle_full" in args.save:
471
519
  calib_inputs = list(torch.stack(calib_inputs).reshape(-1, 1, args.max_seq_len))
472
- save_model_to(q_m, calib_inputs, args.save_circle_to_folder)
520
+ save_model_to(q_m, calib_inputs, args.output_dir)
473
521
 
474
522
 
475
523
  if __name__ == "__main__":