tico 0.2.0.dev260326__tar.gz → 0.2.0.dev260331__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (320) hide show
  1. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/PKG-INFO +1 -1
  2. tico-0.2.0.dev260331/tico/_version.py +1 -0
  3. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/quantization/config/builders.py +2 -1
  4. tico-0.2.0.dev260331/tico/quantization/wrapq/examples/static_llama_layer_runtime.py +604 -0
  5. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico.egg-info/PKG-INFO +1 -1
  6. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico.egg-info/SOURCES.txt +1 -0
  7. tico-0.2.0.dev260326/tico/_version.py +0 -1
  8. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/LICENSE +0 -0
  9. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/README.md +0 -0
  10. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/pyproject.toml +0 -0
  11. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/setup.cfg +0 -0
  12. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/__init__.py +0 -0
  13. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/config/__init__.py +0 -0
  14. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/config/base.py +0 -0
  15. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/config/factory.py +0 -0
  16. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/config/v1.py +0 -0
  17. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/experimental/__init__.py +0 -0
  18. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/interpreter/__init__.py +0 -0
  19. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/interpreter/infer.py +0 -0
  20. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/interpreter/interpreter.py +0 -0
  21. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/passes/__init__.py +0 -0
  22. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/passes/cast_aten_where_arg_type.py +0 -0
  23. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/passes/cast_clamp_mixed_type_args.py +0 -0
  24. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/passes/cast_mixed_type_args.py +0 -0
  25. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/passes/const_prop_pass.py +0 -0
  26. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/passes/convert_conv1d_to_conv2d.py +0 -0
  27. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/passes/convert_conv3d_to_conv2d.py +0 -0
  28. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/passes/convert_expand_to_slice_cat.py +0 -0
  29. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/passes/convert_layout_op_to_reshape.py +0 -0
  30. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/passes/convert_matmul_to_linear.py +0 -0
  31. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/passes/convert_repeat_to_expand_copy.py +0 -0
  32. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/passes/convert_sym_size_to_circle_shape.py +0 -0
  33. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/passes/convert_to_relu6.py +0 -0
  34. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/passes/decompose_addmm.py +0 -0
  35. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/passes/decompose_batch_norm.py +0 -0
  36. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/passes/decompose_fake_quantize.py +0 -0
  37. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/passes/decompose_fake_quantize_tensor_qparams.py +0 -0
  38. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/passes/decompose_group_norm.py +0 -0
  39. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/passes/decompose_grouped_conv2d.py +0 -0
  40. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/passes/decompose_slice_scatter.py +0 -0
  41. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/passes/extract_dtype_kwargs.py +0 -0
  42. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/passes/fill_meta_val.py +0 -0
  43. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/passes/fuse_leading_unsqueeze_reshape.py +0 -0
  44. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/passes/fuse_redundant_reshape_to_mean.py +0 -0
  45. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/passes/legalize_causal_mask_value.py +0 -0
  46. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/passes/legalize_predefined_layout_operators.py +0 -0
  47. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/passes/lower_copy.py +0 -0
  48. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/passes/lower_pow2_to_mul.py +0 -0
  49. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/passes/lower_to_resize_nearest_neighbor.py +0 -0
  50. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/passes/lower_to_slice.py +0 -0
  51. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/passes/merge_consecutive_cat.py +0 -0
  52. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/passes/ops.py +0 -0
  53. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/passes/remove_nop.py +0 -0
  54. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/passes/remove_redundant_assert_nodes.py +0 -0
  55. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/passes/remove_redundant_expand.py +0 -0
  56. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/passes/remove_redundant_permute.py +0 -0
  57. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/passes/remove_redundant_reshape.py +0 -0
  58. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/passes/remove_redundant_slice.py +0 -0
  59. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/passes/remove_redundant_to_copy.py +0 -0
  60. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/passes/restore_linear.py +0 -0
  61. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/passes/segment_index_select.py +0 -0
  62. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/pt2_to_circle.py +0 -0
  63. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/quantization/__init__.py +0 -0
  64. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/quantization/algorithm/__init__.py +0 -0
  65. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/quantization/algorithm/fpi_gptq/__init__.py +0 -0
  66. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/quantization/algorithm/fpi_gptq/fpi_gptq.py +0 -0
  67. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/quantization/algorithm/fpi_gptq/quantizer.py +0 -0
  68. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/quantization/algorithm/gptq/__init__.py +0 -0
  69. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/quantization/algorithm/gptq/gptq.py +0 -0
  70. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/quantization/algorithm/gptq/quant.py +0 -0
  71. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/quantization/algorithm/gptq/quantizer.py +0 -0
  72. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/quantization/algorithm/gptq/utils.py +0 -0
  73. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/quantization/algorithm/qwen3_vl_gptq/__init__.py +0 -0
  74. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/quantization/algorithm/qwen3_vl_gptq/gptq.py +0 -0
  75. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/quantization/algorithm/qwen3_vl_gptq/quantizer.py +0 -0
  76. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/quantization/algorithm/qwen3_vl_gptq/utils.py +0 -0
  77. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/quantization/algorithm/smoothquant/__init__.py +0 -0
  78. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/quantization/algorithm/smoothquant/observer.py +0 -0
  79. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/quantization/algorithm/smoothquant/quantizer.py +0 -0
  80. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/quantization/algorithm/smoothquant/smooth_quant.py +0 -0
  81. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/quantization/config/__init__.py +0 -0
  82. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/quantization/config/base.py +0 -0
  83. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/quantization/config/fpi_gptq.py +0 -0
  84. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/quantization/config/gptq.py +0 -0
  85. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/quantization/config/ptq.py +0 -0
  86. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/quantization/config/qwen3_vl_gptq.py +0 -0
  87. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/quantization/config/smoothquant.py +0 -0
  88. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/quantization/evaluation/__init__.py +0 -0
  89. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/quantization/evaluation/backend.py +0 -0
  90. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/quantization/evaluation/evaluate.py +0 -0
  91. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/quantization/evaluation/executor/__init__.py +0 -0
  92. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/quantization/evaluation/executor/backend_executor.py +0 -0
  93. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/quantization/evaluation/executor/circle_executor.py +0 -0
  94. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/quantization/evaluation/executor/triv24_executor.py +0 -0
  95. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/quantization/evaluation/metric.py +0 -0
  96. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/quantization/evaluation/script/llm_tasks_eval.py +0 -0
  97. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/quantization/evaluation/script/mini_vqa_eval.py +0 -0
  98. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/quantization/evaluation/utils.py +0 -0
  99. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/quantization/evaluation/vlm_eval_utils.py +0 -0
  100. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/quantization/passes/__init__.py +0 -0
  101. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/quantization/passes/fold_quant_ops.py +0 -0
  102. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/quantization/passes/insert_quantize_on_dtype_mismatch.py +0 -0
  103. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/quantization/passes/propagate_qparam_backward.py +0 -0
  104. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/quantization/passes/propagate_qparam_forward.py +0 -0
  105. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/quantization/passes/quantize_bias.py +0 -0
  106. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/quantization/passes/remove_weight_dequant_op.py +0 -0
  107. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/quantization/public_interface.py +0 -0
  108. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/quantization/quantizer.py +0 -0
  109. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/quantization/quantizer_registry.py +0 -0
  110. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/quantization/wrapq/__init__.py +0 -0
  111. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/quantization/wrapq/dtypes.py +0 -0
  112. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/quantization/wrapq/examples/__init__.py +0 -0
  113. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/quantization/wrapq/examples/compare_ppl.py +0 -0
  114. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/quantization/wrapq/examples/debug_quant_outputs.py +0 -0
  115. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/quantization/wrapq/examples/llama/__init__.py +0 -0
  116. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/quantization/wrapq/examples/llama/quantize_attn_decode.py +0 -0
  117. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/quantization/wrapq/examples/llama/quantize_attn_prefill.py +0 -0
  118. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/quantization/wrapq/examples/llama/quantize_decoder_layer_decode.py +0 -0
  119. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/quantization/wrapq/examples/llama/quantize_decoder_layer_prefill.py +0 -0
  120. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/quantization/wrapq/examples/llama/quantize_mlp.py +0 -0
  121. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/quantization/wrapq/examples/nn/__init__.py +0 -0
  122. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/quantization/wrapq/examples/nn/quantize_conv3d.py +0 -0
  123. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/quantization/wrapq/examples/nn/quantize_conv3d_special_case.py +0 -0
  124. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/quantization/wrapq/examples/nn/quantize_linear.py +0 -0
  125. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/quantization/wrapq/examples/quantize_full_qmodel_with_gptq.py +0 -0
  126. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/quantization/wrapq/examples/quantize_full_vlm_model_with_gptq.py +0 -0
  127. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/quantization/wrapq/examples/quantize_qwen3_vl_with_gptq.py +0 -0
  128. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/quantization/wrapq/examples/quantize_with_gptq.py +0 -0
  129. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/quantization/wrapq/examples/qwen/__init__.py +0 -0
  130. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/quantization/wrapq/examples/qwen/quantize_text_attn.py +0 -0
  131. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/quantization/wrapq/examples/qwen/quantize_text_decoder_layer.py +0 -0
  132. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/quantization/wrapq/examples/qwen/quantize_text_mlp.py +0 -0
  133. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/quantization/wrapq/examples/qwen/quantize_text_model.py +0 -0
  134. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/quantization/wrapq/examples/qwen/quantize_vision_attn.py +0 -0
  135. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/quantization/wrapq/examples/qwen/quantize_vision_block.py +0 -0
  136. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/quantization/wrapq/examples/qwen/quantize_vision_mlp.py +0 -0
  137. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/quantization/wrapq/examples/qwen/quantize_vision_model.py +0 -0
  138. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/quantization/wrapq/examples/qwen/quantize_vision_patch_embed.py +0 -0
  139. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/quantization/wrapq/examples/qwen/quantize_vision_patch_merger.py +0 -0
  140. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/quantization/wrapq/mode.py +0 -0
  141. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/quantization/wrapq/observers/__init__.py +0 -0
  142. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/quantization/wrapq/observers/affine_base.py +0 -0
  143. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/quantization/wrapq/observers/base.py +0 -0
  144. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/quantization/wrapq/observers/ema.py +0 -0
  145. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/quantization/wrapq/observers/identity.py +0 -0
  146. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/quantization/wrapq/observers/minmax.py +0 -0
  147. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/quantization/wrapq/observers/mx.py +0 -0
  148. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/quantization/wrapq/qscheme.py +0 -0
  149. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/quantization/wrapq/quantizer.py +0 -0
  150. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/quantization/wrapq/utils/__init__.py +0 -0
  151. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/quantization/wrapq/utils/check_missing_qparam.py +0 -0
  152. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/quantization/wrapq/utils/introspection.py +0 -0
  153. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/quantization/wrapq/utils/metrics.py +0 -0
  154. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/quantization/wrapq/utils/reduce_utils.py +0 -0
  155. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/quantization/wrapq/utils/version.py +0 -0
  156. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/quantization/wrapq/wrap_helper.py +0 -0
  157. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/quantization/wrapq/wrappers/__init__.py +0 -0
  158. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/quantization/wrapq/wrappers/fairseq/__init__.py +0 -0
  159. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/quantization/wrapq/wrappers/fairseq/decoder_export_single_step.py +0 -0
  160. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/quantization/wrapq/wrappers/fairseq/quant_decoder.py +0 -0
  161. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/quantization/wrapq/wrappers/fairseq/quant_decoder_layer.py +0 -0
  162. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/quantization/wrapq/wrappers/fairseq/quant_encoder.py +0 -0
  163. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/quantization/wrapq/wrappers/fairseq/quant_encoder_layer.py +0 -0
  164. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/quantization/wrapq/wrappers/fairseq/quant_mha.py +0 -0
  165. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/quantization/wrapq/wrappers/llama/__init__.py +0 -0
  166. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/quantization/wrapq/wrappers/llama/quant_attn_decode.py +0 -0
  167. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/quantization/wrapq/wrappers/llama/quant_attn_prefill.py +0 -0
  168. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/quantization/wrapq/wrappers/llama/quant_decoder_layer_decode.py +0 -0
  169. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/quantization/wrapq/wrappers/llama/quant_decoder_layer_prefill.py +0 -0
  170. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/quantization/wrapq/wrappers/llama/quant_mlp.py +0 -0
  171. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/quantization/wrapq/wrappers/llama/quant_model.py +0 -0
  172. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/quantization/wrapq/wrappers/llama/quant_model_for_causal_lm.py +0 -0
  173. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/quantization/wrapq/wrappers/nn/__init__.py +0 -0
  174. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/quantization/wrapq/wrappers/nn/quant_conv3d.py +0 -0
  175. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/quantization/wrapq/wrappers/nn/quant_conv3d_decomposed.py +0 -0
  176. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/quantization/wrapq/wrappers/nn/quant_embedding.py +0 -0
  177. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/quantization/wrapq/wrappers/nn/quant_layernorm.py +0 -0
  178. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/quantization/wrapq/wrappers/nn/quant_linear.py +0 -0
  179. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/quantization/wrapq/wrappers/nn/quant_silu.py +0 -0
  180. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/quantization/wrapq/wrappers/ops/__init__.py +0 -0
  181. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/quantization/wrapq/wrappers/ops/quant_rmsnorm.py +0 -0
  182. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/quantization/wrapq/wrappers/ptq_wrapper.py +0 -0
  183. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/quantization/wrapq/wrappers/quant_elementwise.py +0 -0
  184. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/quantization/wrapq/wrappers/quant_module_base.py +0 -0
  185. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/quantization/wrapq/wrappers/qwen_vl/quant_text_attn.py +0 -0
  186. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/quantization/wrapq/wrappers/qwen_vl/quant_text_decoder_layer.py +0 -0
  187. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/quantization/wrapq/wrappers/qwen_vl/quant_text_mlp.py +0 -0
  188. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/quantization/wrapq/wrappers/qwen_vl/quant_text_model.py +0 -0
  189. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/quantization/wrapq/wrappers/qwen_vl/quant_vision_attn.py +0 -0
  190. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/quantization/wrapq/wrappers/qwen_vl/quant_vision_block.py +0 -0
  191. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/quantization/wrapq/wrappers/qwen_vl/quant_vision_mlp.py +0 -0
  192. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/quantization/wrapq/wrappers/qwen_vl/quant_vision_model.py +0 -0
  193. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/quantization/wrapq/wrappers/qwen_vl/quant_vision_patch_embed.py +0 -0
  194. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/quantization/wrapq/wrappers/qwen_vl/quant_vision_patch_merger.py +0 -0
  195. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/quantization/wrapq/wrappers/registry.py +0 -0
  196. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/serialize/__init__.py +0 -0
  197. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/serialize/circle_graph.py +0 -0
  198. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/serialize/circle_mapping.py +0 -0
  199. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/serialize/circle_serializer.py +0 -0
  200. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/serialize/operators/__init__.py +0 -0
  201. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/serialize/operators/adapters/__init__.py +0 -0
  202. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/serialize/operators/adapters/llama_rmsnorm.py +0 -0
  203. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/serialize/operators/adapters/onert/__init__.py +0 -0
  204. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/serialize/operators/adapters/onert/llama_attention.py +0 -0
  205. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/serialize/operators/hashable_opcode.py +0 -0
  206. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/serialize/operators/node_visitor.py +0 -0
  207. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/serialize/operators/op_abs.py +0 -0
  208. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/serialize/operators/op_add.py +0 -0
  209. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/serialize/operators/op_alias_copy.py +0 -0
  210. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/serialize/operators/op_any.py +0 -0
  211. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/serialize/operators/op_arange_start_step.py +0 -0
  212. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/serialize/operators/op_argmax.py +0 -0
  213. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/serialize/operators/op_attention.py +0 -0
  214. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/serialize/operators/op_avg_pool2d.py +0 -0
  215. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/serialize/operators/op_bmm.py +0 -0
  216. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/serialize/operators/op_cat.py +0 -0
  217. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/serialize/operators/op_circle_shape.py +0 -0
  218. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/serialize/operators/op_clamp.py +0 -0
  219. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/serialize/operators/op_clone.py +0 -0
  220. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/serialize/operators/op_constant_pad_nd.py +0 -0
  221. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/serialize/operators/op_conv2d.py +0 -0
  222. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/serialize/operators/op_cos.py +0 -0
  223. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/serialize/operators/op_cumsum.py +0 -0
  224. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/serialize/operators/op_depthwise_conv2d.py +0 -0
  225. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/serialize/operators/op_dequantize_per_channel.py +0 -0
  226. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/serialize/operators/op_dequantize_per_tensor.py +0 -0
  227. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/serialize/operators/op_div.py +0 -0
  228. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/serialize/operators/op_embedding.py +0 -0
  229. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/serialize/operators/op_eq.py +0 -0
  230. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/serialize/operators/op_exp.py +0 -0
  231. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/serialize/operators/op_expand.py +0 -0
  232. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/serialize/operators/op_full.py +0 -0
  233. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/serialize/operators/op_full_like.py +0 -0
  234. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/serialize/operators/op_ge.py +0 -0
  235. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/serialize/operators/op_gelu.py +0 -0
  236. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/serialize/operators/op_gt.py +0 -0
  237. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/serialize/operators/op_index.py +0 -0
  238. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/serialize/operators/op_index_select.py +0 -0
  239. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/serialize/operators/op_instance_norm.py +0 -0
  240. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/serialize/operators/op_le.py +0 -0
  241. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/serialize/operators/op_leaky_relu.py +0 -0
  242. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/serialize/operators/op_linear.py +0 -0
  243. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/serialize/operators/op_log.py +0 -0
  244. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/serialize/operators/op_log1p.py +0 -0
  245. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/serialize/operators/op_logical_and.py +0 -0
  246. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/serialize/operators/op_logical_not.py +0 -0
  247. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/serialize/operators/op_lt.py +0 -0
  248. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/serialize/operators/op_max_dim.py +0 -0
  249. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/serialize/operators/op_max_pool2d_with_indices.py +0 -0
  250. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/serialize/operators/op_maximum.py +0 -0
  251. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/serialize/operators/op_mean.py +0 -0
  252. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/serialize/operators/op_minimum.py +0 -0
  253. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/serialize/operators/op_mm.py +0 -0
  254. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/serialize/operators/op_mul.py +0 -0
  255. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/serialize/operators/op_ne.py +0 -0
  256. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/serialize/operators/op_neg.py +0 -0
  257. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/serialize/operators/op_permute.py +0 -0
  258. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/serialize/operators/op_pow.py +0 -0
  259. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/serialize/operators/op_prelu.py +0 -0
  260. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/serialize/operators/op_quantize_per_tensor.py +0 -0
  261. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/serialize/operators/op_reciprocal.py +0 -0
  262. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/serialize/operators/op_relu.py +0 -0
  263. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/serialize/operators/op_relu6.py +0 -0
  264. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/serialize/operators/op_repeat.py +0 -0
  265. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/serialize/operators/op_reshape.py +0 -0
  266. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/serialize/operators/op_resize_nearest_neighbor.py +0 -0
  267. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/serialize/operators/op_rmsnorm.py +0 -0
  268. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/serialize/operators/op_round.py +0 -0
  269. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/serialize/operators/op_rsqrt.py +0 -0
  270. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/serialize/operators/op_scalar_tensor.py +0 -0
  271. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/serialize/operators/op_select_copy.py +0 -0
  272. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/serialize/operators/op_sigmoid.py +0 -0
  273. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/serialize/operators/op_sin.py +0 -0
  274. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/serialize/operators/op_slice.py +0 -0
  275. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/serialize/operators/op_softmax.py +0 -0
  276. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/serialize/operators/op_split_with_sizes.py +0 -0
  277. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/serialize/operators/op_sqrt.py +0 -0
  278. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/serialize/operators/op_squeeze.py +0 -0
  279. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/serialize/operators/op_sub.py +0 -0
  280. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/serialize/operators/op_sum.py +0 -0
  281. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/serialize/operators/op_tanh.py +0 -0
  282. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/serialize/operators/op_to_copy.py +0 -0
  283. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/serialize/operators/op_transpose_conv.py +0 -0
  284. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/serialize/operators/op_unsqueeze.py +0 -0
  285. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/serialize/operators/op_view.py +0 -0
  286. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/serialize/operators/op_where.py +0 -0
  287. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/serialize/operators/utils.py +0 -0
  288. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/serialize/pack.py +0 -0
  289. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/serialize/quant_param.py +0 -0
  290. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/utils/__init__.py +0 -0
  291. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/utils/compat/__init__.py +0 -0
  292. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/utils/compat/torch.py +0 -0
  293. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/utils/compat/transformers.py +0 -0
  294. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/utils/convert.py +0 -0
  295. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/utils/define.py +0 -0
  296. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/utils/diff_graph.py +0 -0
  297. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/utils/dtype.py +0 -0
  298. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/utils/errors.py +0 -0
  299. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/utils/graph.py +0 -0
  300. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/utils/installed_packages.py +0 -0
  301. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/utils/logging.py +0 -0
  302. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/utils/model.py +0 -0
  303. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/utils/mx/__init__.py +0 -0
  304. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/utils/mx/elemwise_ops.py +0 -0
  305. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/utils/mx/formats.py +0 -0
  306. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/utils/mx/mx_ops.py +0 -0
  307. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/utils/padding.py +0 -0
  308. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/utils/passes.py +0 -0
  309. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/utils/pytree_utils.py +0 -0
  310. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/utils/record_input.py +0 -0
  311. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/utils/register_custom_op.py +0 -0
  312. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/utils/serialize.py +0 -0
  313. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/utils/signature.py +0 -0
  314. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/utils/trace_decorators.py +0 -0
  315. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/utils/utils.py +0 -0
  316. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico/utils/validate_args_kwargs.py +0 -0
  317. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico.egg-info/dependency_links.txt +0 -0
  318. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico.egg-info/entry_points.txt +0 -0
  319. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico.egg-info/requires.txt +0 -0
  320. {tico-0.2.0.dev260326 → tico-0.2.0.dev260331}/tico.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: tico
3
- Version: 0.2.0.dev260326
3
+ Version: 0.2.0.dev260331
4
4
  Summary: Convert Exported Torch Module To Circle
5
5
  License: This file provides full text of licenses used in this project
6
6
 
@@ -0,0 +1 @@
1
+ __version__ = "0.2.0.dev260331"
@@ -12,6 +12,7 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
+ import copy
15
16
  from typing import Any, Dict, Optional, Tuple
16
17
 
17
18
  from tico.quantization.config.ptq import PTQConfig, WrapperVariant
@@ -113,7 +114,7 @@ def _set_nested_override(
113
114
  current = root
114
115
  for key in path[:-1]:
115
116
  current = current.setdefault(key, {})
116
- current[path[-1]] = value
117
+ current[path[-1]] = copy.deepcopy(value)
117
118
 
118
119
 
119
120
  def _build_weight_override(weight_dtype: Optional[DType]) -> Dict[str, Any]:
@@ -0,0 +1,604 @@
1
+ # Copyright (c) 2026 Samsung Electronics Co., Ltd. All Rights Reserved
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ import argparse
16
+ import copy
17
+ from dataclasses import dataclass
18
+ from typing import List, Literal, Optional, Sequence, Tuple
19
+
20
+ import torch
21
+ import torch.nn as nn
22
+ from transformers import AutoModelForCausalLM, AutoTokenizer
23
+
24
+ from tico.quantization import prepare
25
+ from tico.quantization.config.ptq import PTQConfig
26
+ from tico.quantization.evaluation.metric import compute_peir
27
+ from tico.quantization.wrapq.wrappers.llama.quant_decoder_layer_decode import (
28
+ QuantLlamaDecoderLayerDecode,
29
+ )
30
+ from tico.quantization.wrapq.wrappers.llama.quant_decoder_layer_prefill import (
31
+ QuantLlamaDecoderLayerPrefill,
32
+ )
33
+
34
+
35
+ @dataclass
36
+ class LayerCache:
37
+ past_k: torch.Tensor
38
+ past_v: torch.Tensor
39
+
40
+
41
+ def parse_args():
42
+ parser = argparse.ArgumentParser(
43
+ description="Static-shape Llama layer runtime with prefill/decode wrappers."
44
+ )
45
+ parser.add_argument(
46
+ "--model",
47
+ type=str,
48
+ default="Maykeye/TinyLLama-v0",
49
+ help="HF model name or local model path.",
50
+ )
51
+ parser.add_argument(
52
+ "--max-seq",
53
+ type=int,
54
+ default=256,
55
+ help="Static maximum sequence length for decode runtime.",
56
+ )
57
+ parser.add_argument(
58
+ "--device",
59
+ type=str,
60
+ default="cpu",
61
+ help="Execution device, e.g. cpu or cuda.",
62
+ )
63
+ parser.add_argument(
64
+ "--prompt",
65
+ type=str,
66
+ default="The capital of France is",
67
+ help="Prompt used for verification and greedy generation.",
68
+ )
69
+ parser.add_argument(
70
+ "--verify-steps",
71
+ type=int,
72
+ default=6,
73
+ help="Number of decode steps for reference verification.",
74
+ )
75
+ parser.add_argument(
76
+ "--gen-steps",
77
+ type=int,
78
+ default=16,
79
+ help="Maximum number of new tokens for greedy generation.",
80
+ )
81
+ return parser.parse_args()
82
+
83
+
84
+ def _clone_layer_with_variant(
85
+ layer: nn.Module, variant: Literal["common", "prefill", "decode"]
86
+ ) -> nn.Module:
87
+ """
88
+ Build a wrapped decoder layer for a specific runtime phase.
89
+
90
+ The returned module is expected to be a PTQWrapper whose `.wrapped`
91
+ is either QuantLlamaDecoderLayerPrefill or QuantLlamaDecoderLayerDecode.
92
+ """
93
+ qlayer = prepare(
94
+ layer,
95
+ PTQConfig(wrapper_variant=variant),
96
+ )
97
+ return qlayer
98
+
99
+
100
+ def _build_rope_templates_from_config(
101
+ config,
102
+ max_seq: int,
103
+ device: torch.device,
104
+ dtype: torch.dtype = torch.float32,
105
+ ) -> Tuple[torch.Tensor, torch.Tensor]:
106
+ """
107
+ Build full RoPE tables using the same simplified logic as the wrappers.
108
+
109
+ Output shapes:
110
+ cos: (1, max_seq, head_dim)
111
+ sin: (1, max_seq, head_dim)
112
+ """
113
+ head_dim = getattr(config, "head_dim", None) or (
114
+ config.hidden_size // config.num_attention_heads
115
+ )
116
+
117
+ rope_params = getattr(config, "rope_parameters", None)
118
+ if (
119
+ rope_params is not None
120
+ and isinstance(rope_params, dict)
121
+ and "rope_theta" in rope_params
122
+ ):
123
+ base = float(rope_params["rope_theta"])
124
+ else:
125
+ base = float(getattr(config, "rope_theta", 10000.0))
126
+
127
+ inv_freq = 1.0 / (
128
+ base
129
+ ** (torch.arange(0, head_dim, 2, dtype=torch.float32, device=device) / head_dim)
130
+ )
131
+
132
+ pos = torch.arange(max_seq, dtype=torch.float32, device=device)
133
+ freqs = torch.outer(pos, inv_freq)
134
+ emb = torch.cat([freqs, freqs], dim=-1)
135
+
136
+ cos = emb.cos()
137
+ sin = emb.sin()
138
+
139
+ half_dim = head_dim // 2
140
+ sin[..., :half_dim] = -sin[..., :half_dim]
141
+
142
+ cos = cos.unsqueeze(0).to(dtype=dtype)
143
+ sin = sin.unsqueeze(0).to(dtype=dtype)
144
+ return cos, sin
145
+
146
+
147
+ def _slice_rope(
148
+ rope_cos: torch.Tensor,
149
+ rope_sin: torch.Tensor,
150
+ position: int,
151
+ batch_size: int,
152
+ device: torch.device,
153
+ dtype: torch.dtype,
154
+ ) -> Tuple[torch.Tensor, torch.Tensor]:
155
+ """
156
+ Slice one-step RoPE tensors for decode.
157
+
158
+ Output shapes:
159
+ cos: (B, 1, head_dim)
160
+ sin: (B, 1, head_dim)
161
+ """
162
+ cos = rope_cos[:, position : position + 1, :].to(device=device, dtype=dtype)
163
+ sin = rope_sin[:, position : position + 1, :].to(device=device, dtype=dtype)
164
+
165
+ if batch_size != 1:
166
+ cos = cos.expand(batch_size, -1, -1).contiguous()
167
+ sin = sin.expand(batch_size, -1, -1).contiguous()
168
+
169
+ return cos, sin
170
+
171
+
172
+ def _build_decode_attention_mask(
173
+ batch_size: int,
174
+ past_len: int,
175
+ max_seq: int,
176
+ device: torch.device,
177
+ dtype: torch.dtype,
178
+ mask_value: float = -120.0,
179
+ ) -> torch.Tensor:
180
+ """
181
+ Build a fully static decode mask.
182
+
183
+ Layout assumption:
184
+ - past KV occupies the first `past_len` slots inside the static past buffer
185
+ - padded past slots are masked
186
+ - current token is appended internally by the attention module at the last slot
187
+
188
+ Returned shape:
189
+ (B, 1, max_seq)
190
+
191
+ Valid columns:
192
+ [0, 1, ..., past_len - 1, max_seq - 1]
193
+ Masked columns:
194
+ [past_len, ..., max_seq - 2]
195
+ """
196
+ mask = torch.full((batch_size, 1, max_seq), mask_value, device=device, dtype=dtype)
197
+
198
+ if past_len > 0:
199
+ mask[:, :, :past_len] = 0.0
200
+
201
+ mask[:, :, max_seq - 1] = 0.0
202
+ return mask
203
+
204
+
205
+ class StaticLlamaLayerRuntime:
206
+ """
207
+ Hybrid runtime that uses:
208
+ - wrapped decoder layers for prefill and decode
209
+ - original embedding / final norm / lm_head on CPU or a chosen device
210
+
211
+ This runtime enforces static decode shapes:
212
+ hidden_states: (B, 1, D)
213
+ attention_mask: (B, 1, max_seq)
214
+ past_key_value: (B, n_kv, max_seq - 1, head_dim)
215
+ position_embeddings: (B, 1, head_dim)
216
+ """
217
+
218
+ def __init__(
219
+ self,
220
+ model: AutoModelForCausalLM,
221
+ tokenizer: AutoTokenizer,
222
+ max_seq: int,
223
+ device: str = "cpu",
224
+ prefill_layers: Optional[Sequence[nn.Module]] = None,
225
+ decode_layers: Optional[Sequence[nn.Module]] = None,
226
+ ):
227
+ self.model = model.eval().to(device)
228
+ self.tokenizer = tokenizer
229
+ self.max_seq = max_seq
230
+ self.device = torch.device(device)
231
+
232
+ self.embed_tokens = self.model.model.embed_tokens
233
+ self.final_norm = self.model.model.norm
234
+ self.lm_head = self.model.lm_head
235
+ self.layers_ref = self.model.model.layers
236
+
237
+ if prefill_layers is None:
238
+ self.prefill_layers = nn.ModuleList(
239
+ [
240
+ _clone_layer_with_variant(layer, "prefill")
241
+ for i, layer in enumerate(self.layers_ref)
242
+ ]
243
+ ).to(self.device)
244
+ else:
245
+ self.prefill_layers = nn.ModuleList(prefill_layers).to(self.device)
246
+
247
+ if decode_layers is None:
248
+ self.decode_layers = nn.ModuleList(
249
+ [
250
+ _clone_layer_with_variant(layer, "decode")
251
+ for i, layer in enumerate(self.layers_ref)
252
+ ]
253
+ ).to(self.device)
254
+ else:
255
+ self.decode_layers = nn.ModuleList(decode_layers).to(self.device)
256
+
257
+ for layer in self.prefill_layers:
258
+ assert hasattr(layer, "wrapped")
259
+ assert isinstance(layer.wrapped, QuantLlamaDecoderLayerPrefill)
260
+
261
+ for layer in self.decode_layers:
262
+ assert hasattr(layer, "wrapped")
263
+ assert isinstance(layer.wrapped, QuantLlamaDecoderLayerDecode)
264
+
265
+ self.config = self.model.config
266
+ self.hidden_size = self.config.hidden_size
267
+ self.num_hidden_layers = self.config.num_hidden_layers
268
+ self.num_kv_heads = self.config.num_key_value_heads
269
+ self.head_dim = getattr(self.config, "head_dim", None) or (
270
+ self.hidden_size // self.config.num_attention_heads
271
+ )
272
+
273
+ self.rope_cos, self.rope_sin = _build_rope_templates_from_config(
274
+ self.config,
275
+ max_seq=self.max_seq,
276
+ device=self.device,
277
+ dtype=torch.float32,
278
+ )
279
+
280
+ self.layer_caches: List[LayerCache] = []
281
+ self.past_len = 0
282
+
283
+ def reset_cache(self) -> None:
284
+ """
285
+ Reset all runtime KV caches.
286
+ """
287
+ self.layer_caches = []
288
+ self.past_len = 0
289
+
290
+ def _allocate_empty_cache(
291
+ self, batch_size: int, dtype: torch.dtype
292
+ ) -> List[LayerCache]:
293
+ """
294
+ Allocate external static KV buffers for all layers.
295
+
296
+ The runtime stores only past tokens in these buffers.
297
+ The current token is always produced as a delta by the decode wrapper.
298
+ """
299
+ caches = []
300
+ for _ in range(self.num_hidden_layers):
301
+ past_k = torch.zeros(
302
+ batch_size,
303
+ self.num_kv_heads,
304
+ self.max_seq - 1,
305
+ self.head_dim,
306
+ device=self.device,
307
+ dtype=dtype,
308
+ )
309
+ past_v = torch.zeros_like(past_k)
310
+ caches.append(LayerCache(past_k=past_k, past_v=past_v))
311
+ return caches
312
+
313
+ @torch.no_grad()
314
+ def prefill(self, input_ids: torch.LongTensor) -> torch.Tensor:
315
+ """
316
+ Run the prompt through all prefill layers and initialize static decode caches.
317
+
318
+ Input:
319
+ input_ids: (B, L)
320
+
321
+ Returns:
322
+ logits_last: (B, vocab_size)
323
+ """
324
+ assert (
325
+ input_ids.dim() == 2
326
+ ), f"Expected input_ids as (B, L), got {tuple(input_ids.shape)}"
327
+ batch_size, prompt_len = input_ids.shape
328
+ assert prompt_len < self.max_seq, (
329
+ f"Prompt length must be < max_seq so that decode still has one current slot. "
330
+ f"Got prompt_len={prompt_len}, max_seq={self.max_seq}"
331
+ )
332
+
333
+ hidden_states = self.embed_tokens(input_ids.to(self.device))
334
+ runtime_dtype = hidden_states.dtype
335
+
336
+ self.layer_caches = self._allocate_empty_cache(batch_size, runtime_dtype)
337
+
338
+ for layer_idx, layer in enumerate(self.prefill_layers):
339
+ out = layer(
340
+ hidden_states=hidden_states,
341
+ attention_mask=None,
342
+ position_embeddings=None,
343
+ use_cache=True,
344
+ )
345
+
346
+ hidden_states, present_key_value = out
347
+ present_k, present_v = present_key_value
348
+
349
+ assert present_k.size(2) == prompt_len
350
+ assert present_v.size(2) == prompt_len
351
+
352
+ self.layer_caches[layer_idx].past_k[:, :, :prompt_len, :] = present_k
353
+ self.layer_caches[layer_idx].past_v[:, :, :prompt_len, :] = present_v
354
+
355
+ self.past_len = prompt_len
356
+
357
+ hidden_states = self.final_norm(hidden_states)
358
+ logits = self.lm_head(hidden_states)
359
+ logits_last = logits[:, -1, :]
360
+ return logits_last
361
+
362
+ @torch.no_grad()
363
+ def decode_one(self, input_ids: torch.LongTensor) -> torch.Tensor:
364
+ """
365
+ Run one decode step with strict static input shapes.
366
+
367
+ Input:
368
+ input_ids: (B, 1)
369
+
370
+ Returns:
371
+ logits_last: (B, vocab_size)
372
+ """
373
+ assert (
374
+ input_ids.dim() == 2 and input_ids.size(1) == 1
375
+ ), f"Decode expects input_ids as (B, 1), got {tuple(input_ids.shape)}"
376
+ assert (
377
+ len(self.layer_caches) == self.num_hidden_layers
378
+ ), "Caches are not initialized. Call prefill() first."
379
+ assert (
380
+ self.past_len < self.max_seq
381
+ ), f"Decode position overflow: past_len={self.past_len}, max_seq={self.max_seq}"
382
+
383
+ batch_size = input_ids.size(0)
384
+ hidden_states = self.embed_tokens(input_ids.to(self.device))
385
+
386
+ attention_mask = _build_decode_attention_mask(
387
+ batch_size=batch_size,
388
+ past_len=self.past_len,
389
+ max_seq=self.max_seq,
390
+ device=self.device,
391
+ dtype=hidden_states.dtype,
392
+ )
393
+
394
+ position_embeddings = _slice_rope(
395
+ self.rope_cos,
396
+ self.rope_sin,
397
+ position=self.past_len,
398
+ batch_size=batch_size,
399
+ device=self.device,
400
+ dtype=hidden_states.dtype,
401
+ )
402
+
403
+ for layer_idx, layer in enumerate(self.decode_layers):
404
+ cache = self.layer_caches[layer_idx]
405
+
406
+ out = layer(
407
+ hidden_states=hidden_states,
408
+ attention_mask=attention_mask,
409
+ past_key_value=(cache.past_k, cache.past_v),
410
+ position_embeddings=position_embeddings,
411
+ use_cache=True,
412
+ )
413
+
414
+ hidden_states, present_key_value = out
415
+ new_k, new_v = present_key_value
416
+
417
+ cache.past_k[:, :, self.past_len : self.past_len + 1, :] = new_k
418
+ cache.past_v[:, :, self.past_len : self.past_len + 1, :] = new_v
419
+
420
+ self.past_len += 1
421
+
422
+ hidden_states = self.final_norm(hidden_states)
423
+ logits = self.lm_head(hidden_states)
424
+ logits_last = logits[:, -1, :]
425
+ return logits_last
426
+
427
+ @torch.no_grad()
428
+ def generate_greedy(
429
+ self,
430
+ prompt: str,
431
+ max_new_tokens: int,
432
+ eos_token_id: Optional[int] = None,
433
+ ) -> torch.LongTensor:
434
+ """
435
+ Greedy generation using prefill once and then decode-only static steps.
436
+ """
437
+ batch = self.tokenizer(prompt, return_tensors="pt", add_special_tokens=True)
438
+ input_ids = batch["input_ids"].to(self.device)
439
+
440
+ if eos_token_id is None:
441
+ eos_token_id = self.tokenizer.eos_token_id
442
+
443
+ self.reset_cache()
444
+ logits = self.prefill(input_ids)
445
+
446
+ generated = input_ids.clone()
447
+
448
+ for _ in range(max_new_tokens):
449
+ next_token = torch.argmax(logits, dim=-1, keepdim=True)
450
+ generated = torch.cat([generated, next_token], dim=1)
451
+
452
+ if eos_token_id is not None and torch.all(next_token == eos_token_id):
453
+ break
454
+
455
+ logits = self.decode_one(next_token)
456
+
457
+ return generated
458
+
459
+ @torch.no_grad()
460
+ def verify_against_reference(
461
+ self,
462
+ prompt: str,
463
+ steps: int = 8,
464
+ verbose: bool = True,
465
+ ) -> None:
466
+ """
467
+ Compare runtime logits step-by-step against the full reference model.
468
+
469
+ This verifies runtime correctness, not export correctness.
470
+ If the wrapped layers are still FP-like, the mismatch should be tiny.
471
+ If they were converted to quantized mode, some quantization error is expected.
472
+ """
473
+ batch = self.tokenizer(prompt, return_tensors="pt", add_special_tokens=True)
474
+ input_ids = batch["input_ids"].to(self.device)
475
+
476
+ self.reset_cache()
477
+
478
+ logits_rt = self.prefill(input_ids)
479
+ ref_out = self.model(input_ids=input_ids)
480
+ logits_ref = ref_out.logits[:, -1, :]
481
+
482
+ diff = (logits_rt - logits_ref).abs()
483
+ mean_diff = diff.mean().item()
484
+ max_diff = diff.max().item()
485
+
486
+ if verbose:
487
+ print("=" * 100)
488
+ print("Step 0: prefill last-token logits")
489
+ print(f"mean|diff| = {mean_diff:.8f}")
490
+ print(f" max|diff| = {max_diff:.8f}")
491
+ print(f"PEIR = {compute_peir(logits_rt, logits_ref) * 100:.6f} %")
492
+
493
+ generated = input_ids.clone()
494
+ next_token = torch.argmax(logits_rt, dim=-1, keepdim=True)
495
+ generated = torch.cat([generated, next_token], dim=1)
496
+
497
+ for step in range(1, steps + 1):
498
+ logits_rt = self.decode_one(next_token)
499
+
500
+ ref_out = self.model(input_ids=generated)
501
+ logits_ref = ref_out.logits[:, -1, :]
502
+
503
+ diff = (logits_rt - logits_ref).abs()
504
+ mean_diff = diff.mean().item()
505
+ max_diff = diff.max().item()
506
+
507
+ if verbose:
508
+ print("-" * 100)
509
+ print(f"Step {step}: decode logits")
510
+ print(f"sequence length = {generated.size(1)}")
511
+ print(f"mean|diff| = {mean_diff:.8f}")
512
+ print(f" max|diff| = {max_diff:.8f}")
513
+ print(f"PEIR = {compute_peir(logits_rt, logits_ref) * 100:.6f} %")
514
+
515
+ next_token = torch.argmax(logits_rt, dim=-1, keepdim=True)
516
+ generated = torch.cat([generated, next_token], dim=1)
517
+
518
+ if generated.size(1) >= self.max_seq:
519
+ if verbose:
520
+ print("-" * 100)
521
+ print("Stopped because the static decode window is full.")
522
+ break
523
+
524
+ if verbose:
525
+ print("=" * 100)
526
+ print("Verification finished.")
527
+
528
+ @torch.no_grad()
529
+ def dump_decode_inputs(
530
+ self,
531
+ input_id: int,
532
+ ) -> Tuple[torch.Tensor, torch.Tensor, Tuple[torch.Tensor, torch.Tensor]]:
533
+ """
534
+ Prepare one-step decode inputs without running the layers.
535
+
536
+ This is useful when debugging export/runtime parity.
537
+ """
538
+ x = torch.tensor([[input_id]], device=self.device, dtype=torch.long)
539
+ hidden_states = self.embed_tokens(x)
540
+
541
+ attention_mask = _build_decode_attention_mask(
542
+ batch_size=1,
543
+ past_len=self.past_len,
544
+ max_seq=self.max_seq,
545
+ device=self.device,
546
+ dtype=hidden_states.dtype,
547
+ )
548
+
549
+ position_embeddings = _slice_rope(
550
+ self.rope_cos,
551
+ self.rope_sin,
552
+ position=self.past_len,
553
+ batch_size=1,
554
+ device=self.device,
555
+ dtype=hidden_states.dtype,
556
+ )
557
+
558
+ return hidden_states, attention_mask, position_embeddings
559
+
560
+
561
+ def main():
562
+ """
563
+ Build the runtime, verify step-by-step parity, and run greedy generation.
564
+ """
565
+ args = parse_args()
566
+ torch.set_grad_enabled(False)
567
+
568
+ model = AutoModelForCausalLM.from_pretrained(
569
+ args.model,
570
+ dtype=torch.float32,
571
+ ).to(args.device)
572
+ tokenizer = AutoTokenizer.from_pretrained(args.model, legacy=False)
573
+
574
+ if tokenizer.pad_token_id is None:
575
+ tokenizer.pad_token = tokenizer.eos_token
576
+
577
+ model.config.max_position_embeddings = args.max_seq
578
+
579
+ runtime = StaticLlamaLayerRuntime(
580
+ model=model,
581
+ tokenizer=tokenizer,
582
+ max_seq=args.max_seq,
583
+ device=args.device,
584
+ )
585
+
586
+ runtime.verify_against_reference(
587
+ prompt=args.prompt,
588
+ steps=args.verify_steps,
589
+ verbose=True,
590
+ )
591
+
592
+ out_ids = runtime.generate_greedy(
593
+ prompt=args.prompt,
594
+ max_new_tokens=args.gen_steps,
595
+ eos_token_id=tokenizer.eos_token_id,
596
+ )
597
+
598
+ print("=" * 100)
599
+ print("Generated text:")
600
+ print(tokenizer.decode(out_ids[0], skip_special_tokens=True))
601
+
602
+
603
+ if __name__ == "__main__":
604
+ main()
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: tico
3
- Version: 0.2.0.dev260326
3
+ Version: 0.2.0.dev260331
4
4
  Summary: Convert Exported Torch Module To Circle
5
5
  License: This file provides full text of licenses used in this project
6
6
 
@@ -120,6 +120,7 @@ tico/quantization/wrapq/examples/quantize_full_qmodel_with_gptq.py
120
120
  tico/quantization/wrapq/examples/quantize_full_vlm_model_with_gptq.py
121
121
  tico/quantization/wrapq/examples/quantize_qwen3_vl_with_gptq.py
122
122
  tico/quantization/wrapq/examples/quantize_with_gptq.py
123
+ tico/quantization/wrapq/examples/static_llama_layer_runtime.py
123
124
  tico/quantization/wrapq/examples/llama/__init__.py
124
125
  tico/quantization/wrapq/examples/llama/quantize_attn_decode.py
125
126
  tico/quantization/wrapq/examples/llama/quantize_attn_prefill.py
@@ -1 +0,0 @@
1
- __version__ = "0.2.0.dev260326"
File without changes
File without changes
File without changes