tico 0.1.0.dev251026__py3-none-any.whl → 0.1.0.dev251028__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of tico might be problematic. Click here for more details.

Files changed (110) hide show
  1. tico/__init__.py +1 -1
  2. tico/quantization/__init__.py +6 -0
  3. tico/{experimental/quantization → quantization}/algorithm/gptq/gptq.py +1 -1
  4. tico/{experimental/quantization → quantization}/algorithm/gptq/quantizer.py +5 -5
  5. tico/{experimental/quantization → quantization}/algorithm/pt2e/annotation/annotator.py +6 -8
  6. tico/{experimental/quantization → quantization}/algorithm/pt2e/annotation/op/adaptive_avg_pool2d.py +4 -6
  7. tico/{experimental/quantization → quantization}/algorithm/pt2e/annotation/op/add.py +4 -6
  8. tico/{experimental/quantization → quantization}/algorithm/pt2e/annotation/op/conv2d.py +4 -6
  9. tico/{experimental/quantization → quantization}/algorithm/pt2e/annotation/op/div.py +4 -6
  10. tico/{experimental/quantization → quantization}/algorithm/pt2e/annotation/op/linear.py +4 -6
  11. tico/{experimental/quantization → quantization}/algorithm/pt2e/annotation/op/mean.py +4 -6
  12. tico/{experimental/quantization → quantization}/algorithm/pt2e/annotation/op/mul.py +4 -6
  13. tico/{experimental/quantization → quantization}/algorithm/pt2e/annotation/op/relu6.py +4 -6
  14. tico/{experimental/quantization → quantization}/algorithm/pt2e/annotation/op/rsqrt.py +4 -6
  15. tico/{experimental/quantization → quantization}/algorithm/pt2e/annotation/op/sub.py +4 -6
  16. tico/{experimental/quantization → quantization}/algorithm/pt2e/annotation/spec.py +1 -3
  17. tico/{experimental/quantization → quantization}/algorithm/pt2e/annotation/utils.py +1 -1
  18. tico/{experimental/quantization → quantization}/algorithm/pt2e/quantizer.py +4 -4
  19. tico/{experimental/quantization → quantization}/algorithm/pt2e/utils.py +1 -3
  20. tico/{experimental/quantization → quantization}/algorithm/smoothquant/quantizer.py +6 -10
  21. tico/{experimental/quantization → quantization}/config/gptq.py +1 -1
  22. tico/{experimental/quantization → quantization}/config/pt2e.py +1 -1
  23. tico/{experimental/quantization → quantization}/config/ptq.py +6 -6
  24. tico/{experimental/quantization → quantization}/config/smoothquant.py +1 -1
  25. tico/{experimental/quantization → quantization}/evaluation/evaluate.py +6 -12
  26. tico/{experimental/quantization → quantization}/evaluation/executor/circle_executor.py +1 -3
  27. tico/{experimental/quantization → quantization}/evaluation/executor/triv24_executor.py +2 -4
  28. tico/{experimental/quantization → quantization}/public_interface.py +5 -5
  29. tico/{experimental/quantization → quantization}/quantizer.py +1 -1
  30. tico/{experimental/quantization → quantization}/quantizer_registry.py +4 -6
  31. tico/{experimental/quantization/ptq → quantization/wrapq}/examples/compare_ppl.py +3 -3
  32. tico/{experimental/quantization/ptq → quantization/wrapq}/examples/debug_quant_outputs.py +4 -4
  33. tico/{experimental/quantization/ptq → quantization/wrapq}/examples/quantize_linear.py +6 -6
  34. tico/{experimental/quantization/ptq → quantization/wrapq}/examples/quantize_llama_attn.py +6 -8
  35. tico/{experimental/quantization/ptq → quantization/wrapq}/examples/quantize_llama_decoder_layer.py +6 -6
  36. tico/{experimental/quantization/ptq → quantization/wrapq}/examples/quantize_llama_mlp.py +8 -8
  37. tico/{experimental/quantization/ptq → quantization/wrapq}/examples/quantize_with_gptq.py +8 -10
  38. tico/{experimental/quantization/ptq → quantization/wrapq}/observers/affine_base.py +3 -3
  39. tico/{experimental/quantization/ptq → quantization/wrapq}/observers/base.py +2 -2
  40. tico/{experimental/quantization/ptq → quantization/wrapq}/observers/ema.py +2 -2
  41. tico/{experimental/quantization/ptq → quantization/wrapq}/observers/identity.py +1 -1
  42. tico/{experimental/quantization/ptq → quantization/wrapq}/observers/minmax.py +2 -2
  43. tico/{experimental/quantization/ptq → quantization/wrapq}/observers/mx.py +1 -1
  44. tico/{experimental/quantization/ptq → quantization/wrapq}/quantizer.py +6 -8
  45. tico/{experimental/quantization/ptq → quantization/wrapq}/utils/introspection.py +3 -5
  46. tico/{experimental/quantization/ptq → quantization/wrapq}/utils/metrics.py +3 -2
  47. tico/{experimental/quantization/ptq → quantization/wrapq}/wrappers/fairseq/__init__.py +1 -1
  48. tico/{experimental/quantization/ptq → quantization/wrapq}/wrappers/fairseq/quant_decoder.py +4 -6
  49. tico/{experimental/quantization/ptq → quantization/wrapq}/wrappers/fairseq/quant_decoder_layer.py +5 -7
  50. tico/{experimental/quantization/ptq → quantization/wrapq}/wrappers/fairseq/quant_encoder.py +4 -6
  51. tico/{experimental/quantization/ptq → quantization/wrapq}/wrappers/fairseq/quant_encoder_layer.py +5 -7
  52. tico/{experimental/quantization/ptq → quantization/wrapq}/wrappers/fairseq/quant_mha.py +4 -6
  53. tico/{experimental/quantization/ptq → quantization/wrapq}/wrappers/llama/quant_attn.py +4 -6
  54. tico/{experimental/quantization/ptq → quantization/wrapq}/wrappers/llama/quant_decoder_layer.py +6 -10
  55. tico/{experimental/quantization/ptq → quantization/wrapq}/wrappers/llama/quant_mlp.py +4 -6
  56. tico/{experimental/quantization/ptq → quantization/wrapq}/wrappers/nn/quant_layernorm.py +4 -6
  57. tico/{experimental/quantization/ptq → quantization/wrapq}/wrappers/nn/quant_linear.py +5 -7
  58. tico/{experimental/quantization/ptq → quantization/wrapq}/wrappers/nn/quant_silu.py +3 -5
  59. tico/{experimental/quantization/ptq → quantization/wrapq}/wrappers/ptq_wrapper.py +3 -5
  60. tico/{experimental/quantization/ptq → quantization/wrapq}/wrappers/quant_elementwise.py +3 -5
  61. tico/{experimental/quantization/ptq → quantization/wrapq}/wrappers/quant_module_base.py +3 -3
  62. tico/{experimental/quantization/ptq → quantization/wrapq}/wrappers/registry.py +12 -14
  63. tico/utils/convert.py +8 -14
  64. {tico-0.1.0.dev251026.dist-info → tico-0.1.0.dev251028.dist-info}/METADATA +1 -1
  65. {tico-0.1.0.dev251026.dist-info → tico-0.1.0.dev251028.dist-info}/RECORD +109 -109
  66. tico/experimental/quantization/__init__.py +0 -6
  67. /tico/{experimental/quantization → quantization}/algorithm/__init__.py +0 -0
  68. /tico/{experimental/quantization → quantization}/algorithm/gptq/__init__.py +0 -0
  69. /tico/{experimental/quantization → quantization}/algorithm/gptq/quant.py +0 -0
  70. /tico/{experimental/quantization → quantization}/algorithm/gptq/utils.py +0 -0
  71. /tico/{experimental/quantization → quantization}/algorithm/pt2e/__init__.py +0 -0
  72. /tico/{experimental/quantization → quantization}/algorithm/pt2e/annotation/__init__.py +0 -0
  73. /tico/{experimental/quantization → quantization}/algorithm/pt2e/annotation/config.py +0 -0
  74. /tico/{experimental/quantization → quantization}/algorithm/pt2e/annotation/op/__init__.py +0 -0
  75. /tico/{experimental/quantization → quantization}/algorithm/pt2e/transformation/__init__.py +0 -0
  76. /tico/{experimental/quantization → quantization}/algorithm/pt2e/transformation/convert_scalars_to_attrs.py +0 -0
  77. /tico/{experimental/quantization → quantization}/algorithm/smoothquant/__init__.py +0 -0
  78. /tico/{experimental/quantization → quantization}/algorithm/smoothquant/observer.py +0 -0
  79. /tico/{experimental/quantization → quantization}/algorithm/smoothquant/smooth_quant.py +0 -0
  80. /tico/{experimental/quantization → quantization}/config/__init__.py +0 -0
  81. /tico/{experimental/quantization → quantization}/config/base.py +0 -0
  82. /tico/{experimental/quantization → quantization}/evaluation/__init__.py +0 -0
  83. /tico/{experimental/quantization → quantization}/evaluation/backend.py +0 -0
  84. /tico/{experimental/quantization → quantization}/evaluation/executor/__init__.py +0 -0
  85. /tico/{experimental/quantization → quantization}/evaluation/executor/backend_executor.py +0 -0
  86. /tico/{experimental/quantization → quantization}/evaluation/metric.py +0 -0
  87. /tico/{experimental/quantization → quantization}/evaluation/utils.py +0 -0
  88. /tico/{experimental/quantization → quantization}/passes/__init__.py +0 -0
  89. /tico/{experimental/quantization → quantization}/passes/fold_quant_ops.py +0 -0
  90. /tico/{experimental/quantization → quantization}/passes/insert_quantize_on_dtype_mismatch.py +0 -0
  91. /tico/{experimental/quantization → quantization}/passes/propagate_qparam_backward.py +0 -0
  92. /tico/{experimental/quantization → quantization}/passes/propagate_qparam_forward.py +0 -0
  93. /tico/{experimental/quantization → quantization}/passes/quantize_bias.py +0 -0
  94. /tico/{experimental/quantization → quantization}/passes/remove_weight_dequant_op.py +0 -0
  95. /tico/{experimental/quantization/ptq → quantization/wrapq}/__init__.py +0 -0
  96. /tico/{experimental/quantization/ptq → quantization/wrapq}/dtypes.py +0 -0
  97. /tico/{experimental/quantization/ptq → quantization/wrapq}/examples/__init__.py +0 -0
  98. /tico/{experimental/quantization/ptq → quantization/wrapq}/mode.py +0 -0
  99. /tico/{experimental/quantization/ptq → quantization/wrapq}/observers/__init__.py +0 -0
  100. /tico/{experimental/quantization/ptq → quantization/wrapq}/qscheme.py +0 -0
  101. /tico/{experimental/quantization/ptq → quantization/wrapq}/utils/__init__.py +0 -0
  102. /tico/{experimental/quantization/ptq → quantization/wrapq}/utils/reduce_utils.py +0 -0
  103. /tico/{experimental/quantization/ptq → quantization/wrapq}/wrappers/__init__.py +0 -0
  104. /tico/{experimental/quantization/ptq → quantization/wrapq}/wrappers/fairseq/decoder_export_single_step.py +0 -0
  105. /tico/{experimental/quantization/ptq → quantization/wrapq}/wrappers/llama/__init__.py +0 -0
  106. /tico/{experimental/quantization/ptq → quantization/wrapq}/wrappers/nn/__init__.py +0 -0
  107. {tico-0.1.0.dev251026.dist-info → tico-0.1.0.dev251028.dist-info}/LICENSE +0 -0
  108. {tico-0.1.0.dev251026.dist-info → tico-0.1.0.dev251028.dist-info}/WHEEL +0 -0
  109. {tico-0.1.0.dev251026.dist-info → tico-0.1.0.dev251028.dist-info}/entry_points.txt +0 -0
  110. {tico-0.1.0.dev251026.dist-info → tico-0.1.0.dev251028.dist-info}/top_level.txt +0 -0
tico/__init__.py CHANGED
@@ -29,7 +29,7 @@ __all__ = [
29
29
  ]
30
30
 
31
31
  # THIS LINE IS AUTOMATICALLY GENERATED BY setup.py
32
- __version__ = "0.1.0.dev251026"
32
+ __version__ = "0.1.0.dev251028"
33
33
 
34
34
  MINIMUM_SUPPORTED_VERSION = "2.5.0"
35
35
  SECURE_TORCH_VERSION = "2.6.0"
@@ -0,0 +1,6 @@
1
+ from tico.quantization.public_interface import convert, prepare
2
+
3
+ __all__ = [
4
+ "convert",
5
+ "prepare",
6
+ ]
@@ -25,7 +25,7 @@ from typing import Optional
25
25
  import torch
26
26
  import torch.nn as nn
27
27
 
28
- from tico.experimental.quantization.algorithm.gptq.quant import quantize, Quantizer
28
+ from tico.quantization.algorithm.gptq.quant import quantize, Quantizer
29
29
 
30
30
  torch.backends.cuda.matmul.allow_tf32 = False
31
31
  torch.backends.cudnn.allow_tf32 = False
@@ -19,15 +19,15 @@ from typing import Any, Callable, Dict, List, Optional
19
19
  import torch
20
20
  from tqdm.auto import tqdm
21
21
 
22
- from tico.experimental.quantization.algorithm.gptq.gptq import GPTQ
23
- from tico.experimental.quantization.algorithm.gptq.utils import (
22
+ from tico.quantization.algorithm.gptq.gptq import GPTQ
23
+ from tico.quantization.algorithm.gptq.utils import (
24
24
  find_layers,
25
25
  gather_single_batch_from_dict,
26
26
  gather_single_batch_from_list,
27
27
  )
28
- from tico.experimental.quantization.config.gptq import GPTQConfig
29
- from tico.experimental.quantization.quantizer import BaseQuantizer
30
- from tico.experimental.quantization.quantizer_registry import register_quantizer
28
+ from tico.quantization.config.gptq import GPTQConfig
29
+ from tico.quantization.quantizer import BaseQuantizer
30
+ from tico.quantization.quantizer_registry import register_quantizer
31
31
 
32
32
 
33
33
  class StopForward(Exception):
@@ -25,14 +25,12 @@ from torch.ao.quantization.observer import MinMaxObserver, PerChannelMinMaxObser
25
25
  from torch.ao.quantization.quantizer import QuantizationSpec, Quantizer
26
26
  from torch.ao.quantization.quantizer.utils import _get_module_name_filter
27
27
 
28
- from tico.experimental.quantization.algorithm.pt2e.annotation.op import *
29
- import tico.experimental.quantization.algorithm.pt2e.annotation.spec as annot_spec
30
- import tico.experimental.quantization.algorithm.pt2e.annotation.utils as annot_utils
31
- import tico.experimental.quantization.algorithm.pt2e.utils as quant_utils
32
- from tico.experimental.quantization.algorithm.pt2e.annotation.config import (
33
- QuantizationConfig,
34
- )
35
- from tico.experimental.quantization.algorithm.pt2e.transformation.convert_scalars_to_attrs import (
28
+ from tico.quantization.algorithm.pt2e.annotation.op import *
29
+ import tico.quantization.algorithm.pt2e.annotation.spec as annot_spec
30
+ import tico.quantization.algorithm.pt2e.annotation.utils as annot_utils
31
+ import tico.quantization.algorithm.pt2e.utils as quant_utils
32
+ from tico.quantization.algorithm.pt2e.annotation.config import QuantizationConfig
33
+ from tico.quantization.algorithm.pt2e.transformation.convert_scalars_to_attrs import (
36
34
  convert_scalars_to_attrs,
37
35
  )
38
36
 
@@ -19,12 +19,10 @@ if TYPE_CHECKING:
19
19
  import torch
20
20
  from torch.ao.quantization.quantizer import SharedQuantizationSpec
21
21
 
22
- import tico.experimental.quantization.algorithm.pt2e.annotation.spec as annot_spec
23
- import tico.experimental.quantization.algorithm.pt2e.annotation.utils as annot_utils
24
- import tico.experimental.quantization.algorithm.pt2e.utils as quant_utils
25
- from tico.experimental.quantization.algorithm.pt2e.annotation.config import (
26
- QuantizationConfig,
27
- )
22
+ import tico.quantization.algorithm.pt2e.annotation.spec as annot_spec
23
+ import tico.quantization.algorithm.pt2e.annotation.utils as annot_utils
24
+ import tico.quantization.algorithm.pt2e.utils as quant_utils
25
+ from tico.quantization.algorithm.pt2e.annotation.config import QuantizationConfig
28
26
  from tico.utils.validate_args_kwargs import AdaptiveAvgPool2dArgs
29
27
 
30
28
 
@@ -18,12 +18,10 @@ if TYPE_CHECKING:
18
18
  import torch.fx
19
19
  import torch
20
20
 
21
- import tico.experimental.quantization.algorithm.pt2e.annotation.spec as annot_spec
22
- import tico.experimental.quantization.algorithm.pt2e.annotation.utils as annot_utils
23
- import tico.experimental.quantization.algorithm.pt2e.utils as quant_utils
24
- from tico.experimental.quantization.algorithm.pt2e.annotation.config import (
25
- QuantizationConfig,
26
- )
21
+ import tico.quantization.algorithm.pt2e.annotation.spec as annot_spec
22
+ import tico.quantization.algorithm.pt2e.annotation.utils as annot_utils
23
+ import tico.quantization.algorithm.pt2e.utils as quant_utils
24
+ from tico.quantization.algorithm.pt2e.annotation.config import QuantizationConfig
27
25
  from tico.utils.validate_args_kwargs import AddTensorArgs
28
26
 
29
27
 
@@ -19,12 +19,10 @@ if TYPE_CHECKING:
19
19
  import torch
20
20
  from torch.ao.quantization.quantizer import DerivedQuantizationSpec
21
21
 
22
- import tico.experimental.quantization.algorithm.pt2e.annotation.spec as annot_spec
23
- import tico.experimental.quantization.algorithm.pt2e.annotation.utils as annot_utils
24
- import tico.experimental.quantization.algorithm.pt2e.utils as quant_utils
25
- from tico.experimental.quantization.algorithm.pt2e.annotation.config import (
26
- QuantizationConfig,
27
- )
22
+ import tico.quantization.algorithm.pt2e.annotation.spec as annot_spec
23
+ import tico.quantization.algorithm.pt2e.annotation.utils as annot_utils
24
+ import tico.quantization.algorithm.pt2e.utils as quant_utils
25
+ from tico.quantization.algorithm.pt2e.annotation.config import QuantizationConfig
28
26
  from tico.utils.validate_args_kwargs import Conv2DArgs
29
27
 
30
28
 
@@ -18,12 +18,10 @@ if TYPE_CHECKING:
18
18
  import torch.fx
19
19
  import torch
20
20
 
21
- import tico.experimental.quantization.algorithm.pt2e.annotation.spec as annot_spec
22
- import tico.experimental.quantization.algorithm.pt2e.annotation.utils as annot_utils
23
- import tico.experimental.quantization.algorithm.pt2e.utils as quant_utils
24
- from tico.experimental.quantization.algorithm.pt2e.annotation.config import (
25
- QuantizationConfig,
26
- )
21
+ import tico.quantization.algorithm.pt2e.annotation.spec as annot_spec
22
+ import tico.quantization.algorithm.pt2e.annotation.utils as annot_utils
23
+ import tico.quantization.algorithm.pt2e.utils as quant_utils
24
+ from tico.quantization.algorithm.pt2e.annotation.config import QuantizationConfig
27
25
  from tico.utils.validate_args_kwargs import DivTensorArgs
28
26
 
29
27
 
@@ -19,12 +19,10 @@ if TYPE_CHECKING:
19
19
  import torch
20
20
  from torch.ao.quantization.quantizer import DerivedQuantizationSpec
21
21
 
22
- import tico.experimental.quantization.algorithm.pt2e.annotation.spec as annot_spec
23
- import tico.experimental.quantization.algorithm.pt2e.annotation.utils as annot_utils
24
- import tico.experimental.quantization.algorithm.pt2e.utils as quant_utils
25
- from tico.experimental.quantization.algorithm.pt2e.annotation.config import (
26
- QuantizationConfig,
27
- )
22
+ import tico.quantization.algorithm.pt2e.annotation.spec as annot_spec
23
+ import tico.quantization.algorithm.pt2e.annotation.utils as annot_utils
24
+ import tico.quantization.algorithm.pt2e.utils as quant_utils
25
+ from tico.quantization.algorithm.pt2e.annotation.config import QuantizationConfig
28
26
  from tico.utils.validate_args_kwargs import LinearArgs
29
27
 
30
28
 
@@ -18,12 +18,10 @@ if TYPE_CHECKING:
18
18
  import torch.fx
19
19
  import torch
20
20
 
21
- import tico.experimental.quantization.algorithm.pt2e.annotation.spec as annot_spec
22
- import tico.experimental.quantization.algorithm.pt2e.annotation.utils as annot_utils
23
- import tico.experimental.quantization.algorithm.pt2e.utils as quant_utils
24
- from tico.experimental.quantization.algorithm.pt2e.annotation.config import (
25
- QuantizationConfig,
26
- )
21
+ import tico.quantization.algorithm.pt2e.annotation.spec as annot_spec
22
+ import tico.quantization.algorithm.pt2e.annotation.utils as annot_utils
23
+ import tico.quantization.algorithm.pt2e.utils as quant_utils
24
+ from tico.quantization.algorithm.pt2e.annotation.config import QuantizationConfig
27
25
  from tico.utils.validate_args_kwargs import MeanDimArgs
28
26
 
29
27
 
@@ -18,12 +18,10 @@ if TYPE_CHECKING:
18
18
  import torch.fx
19
19
  import torch
20
20
 
21
- import tico.experimental.quantization.algorithm.pt2e.annotation.spec as annot_spec
22
- import tico.experimental.quantization.algorithm.pt2e.annotation.utils as annot_utils
23
- import tico.experimental.quantization.algorithm.pt2e.utils as quant_utils
24
- from tico.experimental.quantization.algorithm.pt2e.annotation.config import (
25
- QuantizationConfig,
26
- )
21
+ import tico.quantization.algorithm.pt2e.annotation.spec as annot_spec
22
+ import tico.quantization.algorithm.pt2e.annotation.utils as annot_utils
23
+ import tico.quantization.algorithm.pt2e.utils as quant_utils
24
+ from tico.quantization.algorithm.pt2e.annotation.config import QuantizationConfig
27
25
  from tico.utils.validate_args_kwargs import MulTensorArgs
28
26
 
29
27
 
@@ -18,12 +18,10 @@ if TYPE_CHECKING:
18
18
  import torch.fx
19
19
  import torch
20
20
 
21
- import tico.experimental.quantization.algorithm.pt2e.annotation.spec as annot_spec
22
- import tico.experimental.quantization.algorithm.pt2e.annotation.utils as annot_utils
23
- import tico.experimental.quantization.algorithm.pt2e.utils as quant_utils
24
- from tico.experimental.quantization.algorithm.pt2e.annotation.config import (
25
- QuantizationConfig,
26
- )
21
+ import tico.quantization.algorithm.pt2e.annotation.spec as annot_spec
22
+ import tico.quantization.algorithm.pt2e.annotation.utils as annot_utils
23
+ import tico.quantization.algorithm.pt2e.utils as quant_utils
24
+ from tico.quantization.algorithm.pt2e.annotation.config import QuantizationConfig
27
25
  from tico.utils.validate_args_kwargs import Relu6Args
28
26
 
29
27
 
@@ -18,12 +18,10 @@ if TYPE_CHECKING:
18
18
  import torch.fx
19
19
  import torch
20
20
 
21
- import tico.experimental.quantization.algorithm.pt2e.annotation.spec as annot_spec
22
- import tico.experimental.quantization.algorithm.pt2e.annotation.utils as annot_utils
23
- import tico.experimental.quantization.algorithm.pt2e.utils as quant_utils
24
- from tico.experimental.quantization.algorithm.pt2e.annotation.config import (
25
- QuantizationConfig,
26
- )
21
+ import tico.quantization.algorithm.pt2e.annotation.spec as annot_spec
22
+ import tico.quantization.algorithm.pt2e.annotation.utils as annot_utils
23
+ import tico.quantization.algorithm.pt2e.utils as quant_utils
24
+ from tico.quantization.algorithm.pt2e.annotation.config import QuantizationConfig
27
25
  from tico.utils.validate_args_kwargs import RsqrtArgs
28
26
 
29
27
 
@@ -18,12 +18,10 @@ if TYPE_CHECKING:
18
18
  import torch.fx
19
19
  import torch
20
20
 
21
- import tico.experimental.quantization.algorithm.pt2e.annotation.spec as annot_spec
22
- import tico.experimental.quantization.algorithm.pt2e.annotation.utils as annot_utils
23
- import tico.experimental.quantization.algorithm.pt2e.utils as quant_utils
24
- from tico.experimental.quantization.algorithm.pt2e.annotation.config import (
25
- QuantizationConfig,
26
- )
21
+ import tico.quantization.algorithm.pt2e.annotation.spec as annot_spec
22
+ import tico.quantization.algorithm.pt2e.annotation.utils as annot_utils
23
+ import tico.quantization.algorithm.pt2e.utils as quant_utils
24
+ from tico.quantization.algorithm.pt2e.annotation.config import QuantizationConfig
27
25
  from tico.utils.validate_args_kwargs import SubTensorArgs
28
26
 
29
27
 
@@ -18,9 +18,7 @@ if TYPE_CHECKING:
18
18
  import torch.fx
19
19
  import torch
20
20
 
21
- from tico.experimental.quantization.algorithm.pt2e.annotation.config import (
22
- QuantizationConfig,
23
- )
21
+ from tico.quantization.algorithm.pt2e.annotation.config import QuantizationConfig
24
22
 
25
23
  AnnotatorType = Callable[
26
24
  [
@@ -22,7 +22,7 @@ from torch.ao.quantization.quantizer import (
22
22
  SharedQuantizationSpec,
23
23
  )
24
24
 
25
- import tico.experimental.quantization.algorithm.pt2e.annotation.spec as annot_spec
25
+ import tico.quantization.algorithm.pt2e.annotation.spec as annot_spec
26
26
 
27
27
 
28
28
  def annotate_input_qspec_map(node: torch.fx.Node, input_node: torch.fx.Node, qspec):
@@ -18,13 +18,13 @@ import torch
18
18
 
19
19
  from torch.ao.quantization.quantize_pt2e import convert_pt2e, prepare_pt2e
20
20
 
21
- from tico.experimental.quantization.algorithm.pt2e.annotation.annotator import (
21
+ from tico.quantization.algorithm.pt2e.annotation.annotator import (
22
22
  get_asymmetric_quantization_config,
23
23
  PT2EAnnotator,
24
24
  )
25
- from tico.experimental.quantization.config.pt2e import PT2EConfig
26
- from tico.experimental.quantization.quantizer import BaseQuantizer
27
- from tico.experimental.quantization.quantizer_registry import register_quantizer
25
+ from tico.quantization.config.pt2e import PT2EConfig
26
+ from tico.quantization.quantizer import BaseQuantizer
27
+ from tico.quantization.quantizer_registry import register_quantizer
28
28
 
29
29
 
30
30
  @register_quantizer(PT2EConfig)
@@ -20,9 +20,7 @@ import torch
20
20
  from torch.ao.quantization.quantizer import QuantizationSpec
21
21
  from torch.ao.quantization.quantizer.utils import _get_module_name_filter
22
22
 
23
- from tico.experimental.quantization.algorithm.pt2e.annotation.config import (
24
- QuantizationConfig,
25
- )
23
+ from tico.quantization.algorithm.pt2e.annotation.config import QuantizationConfig
26
24
 
27
25
 
28
26
  def get_module_type_filter(tp: Callable):
@@ -16,16 +16,12 @@ from typing import Any, Dict, Optional
16
16
 
17
17
  import torch
18
18
 
19
- from tico.experimental.quantization.algorithm.smoothquant.observer import (
20
- ChannelwiseMaxActsObserver,
21
- )
22
-
23
- from tico.experimental.quantization.algorithm.smoothquant.smooth_quant import (
24
- apply_smoothing,
25
- )
26
- from tico.experimental.quantization.config.smoothquant import SmoothQuantConfig
27
- from tico.experimental.quantization.quantizer import BaseQuantizer
28
- from tico.experimental.quantization.quantizer_registry import register_quantizer
19
+ from tico.quantization.algorithm.smoothquant.observer import ChannelwiseMaxActsObserver
20
+
21
+ from tico.quantization.algorithm.smoothquant.smooth_quant import apply_smoothing
22
+ from tico.quantization.config.smoothquant import SmoothQuantConfig
23
+ from tico.quantization.quantizer import BaseQuantizer
24
+ from tico.quantization.quantizer_registry import register_quantizer
29
25
 
30
26
 
31
27
  @register_quantizer(SmoothQuantConfig)
@@ -12,7 +12,7 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
- from tico.experimental.quantization.config.base import BaseConfig
15
+ from tico.quantization.config.base import BaseConfig
16
16
 
17
17
 
18
18
  class GPTQConfig(BaseConfig):
@@ -12,7 +12,7 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
- from tico.experimental.quantization.config.base import BaseConfig
15
+ from tico.quantization.config.base import BaseConfig
16
16
 
17
17
 
18
18
  class PT2EConfig(BaseConfig):
@@ -15,11 +15,11 @@
15
15
  from dataclasses import dataclass, field
16
16
  from typing import Any, Dict, Mapping, Type
17
17
 
18
- from tico.experimental.quantization.config.base import BaseConfig
19
- from tico.experimental.quantization.ptq.dtypes import DType
20
- from tico.experimental.quantization.ptq.observers.base import ObserverBase
21
- from tico.experimental.quantization.ptq.observers.minmax import MinMaxObserver
22
- from tico.experimental.quantization.ptq.qscheme import QScheme
18
+ from tico.quantization.config.base import BaseConfig
19
+ from tico.quantization.wrapq.dtypes import DType
20
+ from tico.quantization.wrapq.observers.base import ObserverBase
21
+ from tico.quantization.wrapq.observers.minmax import MinMaxObserver
22
+ from tico.quantization.wrapq.qscheme import QScheme
23
23
 
24
24
 
25
25
  @dataclass
@@ -55,7 +55,7 @@ class PTQConfig(BaseConfig):
55
55
  Example
56
56
  -------
57
57
  ```python
58
- from ptq.observers import PercentileObserver
58
+ from wrapq.observers import PercentileObserver
59
59
 
60
60
  cfg = PTQConfig(
61
61
  default_dtype = DType.uint(8),
@@ -14,7 +14,7 @@
14
14
 
15
15
  from typing import Dict, Literal, Optional
16
16
 
17
- from tico.experimental.quantization.config.base import BaseConfig
17
+ from tico.quantization.config.base import BaseConfig
18
18
 
19
19
 
20
20
  class SmoothQuantConfig(BaseConfig):
@@ -20,18 +20,12 @@ import torch
20
20
  from circle_schema import circle
21
21
  from torch.utils import _pytree as pytree
22
22
 
23
- from tico.experimental.quantization.evaluation.backend import BACKEND
24
- from tico.experimental.quantization.evaluation.executor.backend_executor import (
25
- BackendExecutor,
26
- )
27
- from tico.experimental.quantization.evaluation.executor.circle_executor import (
28
- CircleExecutor,
29
- )
30
- from tico.experimental.quantization.evaluation.executor.triv24_executor import (
31
- Triv24Executor,
32
- )
33
- from tico.experimental.quantization.evaluation.metric import MetricCalculator
34
- from tico.experimental.quantization.evaluation.utils import (
23
+ from tico.quantization.evaluation.backend import BACKEND
24
+ from tico.quantization.evaluation.executor.backend_executor import BackendExecutor
25
+ from tico.quantization.evaluation.executor.circle_executor import CircleExecutor
26
+ from tico.quantization.evaluation.executor.triv24_executor import Triv24Executor
27
+ from tico.quantization.evaluation.metric import MetricCalculator
28
+ from tico.quantization.evaluation.utils import (
35
29
  ensure_list,
36
30
  find_invalid_types,
37
31
  get_graph_input_output,
@@ -19,9 +19,7 @@ from typing import List
19
19
  import numpy as np
20
20
  import torch
21
21
 
22
- from tico.experimental.quantization.evaluation.executor.backend_executor import (
23
- BackendExecutor,
24
- )
22
+ from tico.quantization.evaluation.executor.backend_executor import BackendExecutor
25
23
  from tico.utils.model import CircleModel
26
24
  from tico.utils.utils import run_bash_cmd
27
25
 
@@ -20,10 +20,8 @@ import numpy as np
20
20
  import torch
21
21
  from circle_schema import circle
22
22
 
23
- from tico.experimental.quantization.evaluation.executor.backend_executor import (
24
- BackendExecutor,
25
- )
26
- from tico.experimental.quantization.evaluation.utils import (
23
+ from tico.quantization.evaluation.executor.backend_executor import BackendExecutor
24
+ from tico.quantization.evaluation.utils import (
27
25
  dequantize,
28
26
  get_graph_input_output,
29
27
  quantize,
@@ -17,11 +17,11 @@ from typing import Any, Dict, Optional
17
17
 
18
18
  import torch
19
19
 
20
- from tico.experimental.quantization.algorithm.gptq.quantizer import GPTQQuantizer
21
- from tico.experimental.quantization.algorithm.pt2e.quantizer import PT2EQuantizer
22
- from tico.experimental.quantization.config.base import BaseConfig
23
- from tico.experimental.quantization.quantizer import BaseQuantizer
24
- from tico.experimental.quantization.quantizer_registry import get_quantizer
20
+ from tico.quantization.algorithm.gptq.quantizer import GPTQQuantizer
21
+ from tico.quantization.algorithm.pt2e.quantizer import PT2EQuantizer
22
+ from tico.quantization.config.base import BaseConfig
23
+ from tico.quantization.quantizer import BaseQuantizer
24
+ from tico.quantization.quantizer_registry import get_quantizer
25
25
 
26
26
 
27
27
  QUANTIZER_ATTRIBUTE_NAME = "tico_quantizer"
@@ -17,7 +17,7 @@ from typing import Any, Dict, Optional
17
17
 
18
18
  import torch
19
19
 
20
- from tico.experimental.quantization.config.base import BaseConfig
20
+ from tico.quantization.config.base import BaseConfig
21
21
 
22
22
 
23
23
  class BaseQuantizer(ABC):
@@ -15,8 +15,8 @@
15
15
  import importlib
16
16
  from typing import Dict, Optional, Type, TypeVar
17
17
 
18
- from tico.experimental.quantization.config.base import BaseConfig
19
- from tico.experimental.quantization.quantizer import BaseQuantizer
18
+ from tico.quantization.config.base import BaseConfig
19
+ from tico.quantization.quantizer import BaseQuantizer
20
20
 
21
21
  TQ = TypeVar("TQ", bound=BaseQuantizer)
22
22
 
@@ -54,12 +54,10 @@ def get_quantizer(cfg: BaseConfig) -> BaseQuantizer:
54
54
  name = getattr(cfg, "name", None)
55
55
  if name:
56
56
  if name == "ptq":
57
- importlib.import_module(f"tico.experimental.quantization.ptq.quantizer")
57
+ importlib.import_module(f"tico.quantization.wrapq.quantizer")
58
58
  else:
59
59
  try:
60
- importlib.import_module(
61
- f"tico.experimental.quantization.algorithm.{name}.quantizer"
62
- )
60
+ importlib.import_module(f"tico.quantization.algorithm.{name}.quantizer")
63
61
  except Exception as e:
64
62
  raise RuntimeError(
65
63
  f"Failed to import quantizer module for config name='{name}': {e}"
@@ -28,9 +28,9 @@ import tqdm
28
28
  from datasets import load_dataset
29
29
  from transformers import AutoModelForCausalLM, AutoTokenizer
30
30
 
31
- from tico.experimental.quantization import convert, prepare
32
- from tico.experimental.quantization.config.ptq import PTQConfig
33
- from tico.experimental.quantization.ptq.utils.metrics import perplexity
31
+ from tico.quantization import convert, prepare
32
+ from tico.quantization.config.ptq import PTQConfig
33
+ from tico.quantization.wrapq.utils.metrics import perplexity
34
34
 
35
35
  # Token-budget presets for activation calibration
36
36
  TOKENS: dict[str, int] = {
@@ -38,14 +38,14 @@ import tqdm
38
38
  from datasets import load_dataset
39
39
  from transformers import AutoModelForCausalLM, AutoTokenizer
40
40
 
41
- from tico.experimental.quantization import convert, prepare
42
- from tico.experimental.quantization.config.ptq import PTQConfig
43
- from tico.experimental.quantization.ptq.utils.introspection import (
41
+ from tico.quantization import convert, prepare
42
+ from tico.quantization.config.ptq import PTQConfig
43
+ from tico.quantization.wrapq.utils.introspection import (
44
44
  build_fqn_map,
45
45
  compare_layer_outputs,
46
46
  save_fp_outputs,
47
47
  )
48
- from tico.experimental.quantization.ptq.wrappers.ptq_wrapper import PTQWrapper
48
+ from tico.quantization.wrapq.wrappers.ptq_wrapper import PTQWrapper
49
49
 
50
50
  # Token-budget presets for activation calibration
51
51
  TOKENS: dict[str, int] = {
@@ -29,12 +29,12 @@ import pathlib
29
29
  import torch
30
30
  import torch.nn as nn
31
31
 
32
- from tico.experimental.quantization import convert, prepare
33
- from tico.experimental.quantization.config.ptq import PTQConfig
34
- from tico.experimental.quantization.evaluation.metric import compute_peir
35
- from tico.experimental.quantization.evaluation.utils import plot_two_outputs
36
- from tico.experimental.quantization.ptq.mode import Mode
37
- from tico.experimental.quantization.ptq.wrappers.nn.quant_linear import QuantLinear
32
+ from tico.quantization import convert, prepare
33
+ from tico.quantization.config.ptq import PTQConfig
34
+ from tico.quantization.evaluation.metric import compute_peir
35
+ from tico.quantization.evaluation.utils import plot_two_outputs
36
+ from tico.quantization.wrapq.mode import Mode
37
+ from tico.quantization.wrapq.wrappers.nn.quant_linear import QuantLinear
38
38
  from tico.utils.utils import SuppressWarning
39
39
 
40
40
 
@@ -17,14 +17,12 @@ import pathlib
17
17
  import torch
18
18
  from transformers import AutoModelForCausalLM, AutoTokenizer
19
19
 
20
- from tico.experimental.quantization import convert, prepare
21
- from tico.experimental.quantization.config.ptq import PTQConfig
22
- from tico.experimental.quantization.evaluation.metric import compute_peir
23
- from tico.experimental.quantization.evaluation.utils import plot_two_outputs
24
- from tico.experimental.quantization.ptq.mode import Mode
25
- from tico.experimental.quantization.ptq.wrappers.llama.quant_attn import (
26
- QuantLlamaAttention,
27
- )
20
+ from tico.quantization import convert, prepare
21
+ from tico.quantization.config.ptq import PTQConfig
22
+ from tico.quantization.evaluation.metric import compute_peir
23
+ from tico.quantization.evaluation.utils import plot_two_outputs
24
+ from tico.quantization.wrapq.mode import Mode
25
+ from tico.quantization.wrapq.wrappers.llama.quant_attn import QuantLlamaAttention
28
26
  from tico.utils.utils import SuppressWarning
29
27
 
30
28
  name = "Maykeye/TinyLLama-v0"
@@ -31,12 +31,12 @@ import pathlib
31
31
  import torch
32
32
  from transformers import AutoModelForCausalLM, AutoTokenizer
33
33
 
34
- from tico.experimental.quantization import convert, prepare
35
- from tico.experimental.quantization.config.ptq import PTQConfig
36
- from tico.experimental.quantization.evaluation.metric import compute_peir
37
- from tico.experimental.quantization.evaluation.utils import plot_two_outputs
38
- from tico.experimental.quantization.ptq.mode import Mode
39
- from tico.experimental.quantization.ptq.wrappers.llama.quant_decoder_layer import (
34
+ from tico.quantization import convert, prepare
35
+ from tico.quantization.config.ptq import PTQConfig
36
+ from tico.quantization.evaluation.metric import compute_peir
37
+ from tico.quantization.evaluation.utils import plot_two_outputs
38
+ from tico.quantization.wrapq.mode import Mode
39
+ from tico.quantization.wrapq.wrappers.llama.quant_decoder_layer import (
40
40
  QuantLlamaDecoderLayer,
41
41
  )
42
42
  from tico.utils.utils import SuppressWarning
@@ -18,14 +18,14 @@ import torch
18
18
  from transformers import AutoModelForCausalLM, AutoTokenizer
19
19
 
20
20
  import tico
21
- from tico.experimental.quantization import convert, prepare
22
- from tico.experimental.quantization.config.ptq import PTQConfig
23
- from tico.experimental.quantization.evaluation.metric import compute_peir
24
- from tico.experimental.quantization.evaluation.utils import plot_two_outputs
25
- from tico.experimental.quantization.ptq.dtypes import INT16
26
- from tico.experimental.quantization.ptq.mode import Mode
27
- from tico.experimental.quantization.ptq.qscheme import QScheme
28
- from tico.experimental.quantization.ptq.wrappers.llama.quant_mlp import QuantLlamaMLP
21
+ from tico.quantization import convert, prepare
22
+ from tico.quantization.config.ptq import PTQConfig
23
+ from tico.quantization.evaluation.metric import compute_peir
24
+ from tico.quantization.evaluation.utils import plot_two_outputs
25
+ from tico.quantization.wrapq.dtypes import INT16
26
+ from tico.quantization.wrapq.mode import Mode
27
+ from tico.quantization.wrapq.qscheme import QScheme
28
+ from tico.quantization.wrapq.wrappers.llama.quant_mlp import QuantLlamaMLP
29
29
  from tico.utils.utils import SuppressWarning
30
30
 
31
31
  name = "Maykeye/TinyLLama-v0"