tico 0.1.0.dev251023__py3-none-any.whl → 0.1.0.dev251027__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of tico might be problematic. Click here for more details.

Files changed (110) hide show
  1. tico/__init__.py +1 -1
  2. tico/quantization/__init__.py +6 -0
  3. tico/{experimental/quantization → quantization}/algorithm/gptq/gptq.py +1 -1
  4. tico/{experimental/quantization → quantization}/algorithm/gptq/quantizer.py +5 -5
  5. tico/{experimental/quantization → quantization}/algorithm/pt2e/annotation/annotator.py +6 -8
  6. tico/{experimental/quantization → quantization}/algorithm/pt2e/annotation/op/adaptive_avg_pool2d.py +4 -6
  7. tico/{experimental/quantization → quantization}/algorithm/pt2e/annotation/op/add.py +4 -6
  8. tico/{experimental/quantization → quantization}/algorithm/pt2e/annotation/op/conv2d.py +4 -6
  9. tico/{experimental/quantization → quantization}/algorithm/pt2e/annotation/op/div.py +4 -6
  10. tico/{experimental/quantization → quantization}/algorithm/pt2e/annotation/op/linear.py +4 -6
  11. tico/{experimental/quantization → quantization}/algorithm/pt2e/annotation/op/mean.py +4 -6
  12. tico/{experimental/quantization → quantization}/algorithm/pt2e/annotation/op/mul.py +4 -6
  13. tico/{experimental/quantization → quantization}/algorithm/pt2e/annotation/op/relu6.py +4 -6
  14. tico/{experimental/quantization → quantization}/algorithm/pt2e/annotation/op/rsqrt.py +4 -6
  15. tico/{experimental/quantization → quantization}/algorithm/pt2e/annotation/op/sub.py +4 -6
  16. tico/{experimental/quantization → quantization}/algorithm/pt2e/annotation/spec.py +1 -3
  17. tico/{experimental/quantization → quantization}/algorithm/pt2e/annotation/utils.py +1 -1
  18. tico/{experimental/quantization → quantization}/algorithm/pt2e/quantizer.py +4 -4
  19. tico/{experimental/quantization → quantization}/algorithm/pt2e/utils.py +1 -3
  20. tico/{experimental/quantization → quantization}/algorithm/smoothquant/quantizer.py +6 -10
  21. tico/{experimental/quantization → quantization}/config/gptq.py +1 -1
  22. tico/{experimental/quantization → quantization}/config/pt2e.py +1 -1
  23. tico/{experimental/quantization → quantization}/config/ptq.py +6 -6
  24. tico/{experimental/quantization → quantization}/config/smoothquant.py +1 -1
  25. tico/{experimental/quantization → quantization}/evaluation/evaluate.py +6 -12
  26. tico/{experimental/quantization → quantization}/evaluation/executor/circle_executor.py +1 -3
  27. tico/{experimental/quantization → quantization}/evaluation/executor/triv24_executor.py +2 -4
  28. tico/{experimental/quantization → quantization}/public_interface.py +5 -5
  29. tico/{experimental/quantization → quantization}/quantizer.py +1 -1
  30. tico/{experimental/quantization → quantization}/quantizer_registry.py +4 -6
  31. tico/{experimental/quantization/ptq → quantization/wrapq}/examples/compare_ppl.py +3 -3
  32. tico/{experimental/quantization/ptq → quantization/wrapq}/examples/debug_quant_outputs.py +4 -4
  33. tico/{experimental/quantization/ptq → quantization/wrapq}/examples/quantize_linear.py +6 -6
  34. tico/{experimental/quantization/ptq → quantization/wrapq}/examples/quantize_llama_attn.py +6 -8
  35. tico/{experimental/quantization/ptq → quantization/wrapq}/examples/quantize_llama_decoder_layer.py +6 -6
  36. tico/{experimental/quantization/ptq → quantization/wrapq}/examples/quantize_llama_mlp.py +8 -8
  37. tico/{experimental/quantization/ptq → quantization/wrapq}/examples/quantize_with_gptq.py +8 -10
  38. tico/{experimental/quantization/ptq → quantization/wrapq}/observers/affine_base.py +3 -3
  39. tico/{experimental/quantization/ptq → quantization/wrapq}/observers/base.py +2 -2
  40. tico/{experimental/quantization/ptq → quantization/wrapq}/observers/ema.py +2 -2
  41. tico/{experimental/quantization/ptq → quantization/wrapq}/observers/identity.py +1 -1
  42. tico/{experimental/quantization/ptq → quantization/wrapq}/observers/minmax.py +2 -2
  43. tico/{experimental/quantization/ptq → quantization/wrapq}/observers/mx.py +1 -1
  44. tico/{experimental/quantization/ptq → quantization/wrapq}/quantizer.py +6 -8
  45. tico/{experimental/quantization/ptq → quantization/wrapq}/utils/introspection.py +3 -5
  46. tico/{experimental/quantization/ptq → quantization/wrapq}/utils/metrics.py +3 -2
  47. tico/{experimental/quantization/ptq → quantization/wrapq}/wrappers/fairseq/__init__.py +1 -1
  48. tico/{experimental/quantization/ptq → quantization/wrapq}/wrappers/fairseq/quant_decoder.py +4 -6
  49. tico/{experimental/quantization/ptq → quantization/wrapq}/wrappers/fairseq/quant_decoder_layer.py +5 -7
  50. tico/{experimental/quantization/ptq → quantization/wrapq}/wrappers/fairseq/quant_encoder.py +4 -6
  51. tico/{experimental/quantization/ptq → quantization/wrapq}/wrappers/fairseq/quant_encoder_layer.py +5 -7
  52. tico/{experimental/quantization/ptq → quantization/wrapq}/wrappers/fairseq/quant_mha.py +4 -6
  53. tico/{experimental/quantization/ptq → quantization/wrapq}/wrappers/llama/quant_attn.py +4 -6
  54. tico/{experimental/quantization/ptq → quantization/wrapq}/wrappers/llama/quant_decoder_layer.py +6 -10
  55. tico/{experimental/quantization/ptq → quantization/wrapq}/wrappers/llama/quant_mlp.py +4 -6
  56. tico/{experimental/quantization/ptq → quantization/wrapq}/wrappers/nn/quant_layernorm.py +4 -6
  57. tico/{experimental/quantization/ptq → quantization/wrapq}/wrappers/nn/quant_linear.py +5 -7
  58. tico/{experimental/quantization/ptq → quantization/wrapq}/wrappers/nn/quant_silu.py +3 -5
  59. tico/{experimental/quantization/ptq → quantization/wrapq}/wrappers/ptq_wrapper.py +3 -5
  60. tico/{experimental/quantization/ptq → quantization/wrapq}/wrappers/quant_elementwise.py +3 -5
  61. tico/{experimental/quantization/ptq → quantization/wrapq}/wrappers/quant_module_base.py +3 -3
  62. tico/{experimental/quantization/ptq → quantization/wrapq}/wrappers/registry.py +12 -14
  63. tico/utils/convert.py +8 -14
  64. {tico-0.1.0.dev251023.dist-info → tico-0.1.0.dev251027.dist-info}/METADATA +1 -1
  65. {tico-0.1.0.dev251023.dist-info → tico-0.1.0.dev251027.dist-info}/RECORD +109 -109
  66. tico/experimental/quantization/__init__.py +0 -6
  67. /tico/{experimental/quantization → quantization}/algorithm/__init__.py +0 -0
  68. /tico/{experimental/quantization → quantization}/algorithm/gptq/__init__.py +0 -0
  69. /tico/{experimental/quantization → quantization}/algorithm/gptq/quant.py +0 -0
  70. /tico/{experimental/quantization → quantization}/algorithm/gptq/utils.py +0 -0
  71. /tico/{experimental/quantization → quantization}/algorithm/pt2e/__init__.py +0 -0
  72. /tico/{experimental/quantization → quantization}/algorithm/pt2e/annotation/__init__.py +0 -0
  73. /tico/{experimental/quantization → quantization}/algorithm/pt2e/annotation/config.py +0 -0
  74. /tico/{experimental/quantization → quantization}/algorithm/pt2e/annotation/op/__init__.py +0 -0
  75. /tico/{experimental/quantization → quantization}/algorithm/pt2e/transformation/__init__.py +0 -0
  76. /tico/{experimental/quantization → quantization}/algorithm/pt2e/transformation/convert_scalars_to_attrs.py +0 -0
  77. /tico/{experimental/quantization → quantization}/algorithm/smoothquant/__init__.py +0 -0
  78. /tico/{experimental/quantization → quantization}/algorithm/smoothquant/observer.py +0 -0
  79. /tico/{experimental/quantization → quantization}/algorithm/smoothquant/smooth_quant.py +0 -0
  80. /tico/{experimental/quantization → quantization}/config/__init__.py +0 -0
  81. /tico/{experimental/quantization → quantization}/config/base.py +0 -0
  82. /tico/{experimental/quantization → quantization}/evaluation/__init__.py +0 -0
  83. /tico/{experimental/quantization → quantization}/evaluation/backend.py +0 -0
  84. /tico/{experimental/quantization → quantization}/evaluation/executor/__init__.py +0 -0
  85. /tico/{experimental/quantization → quantization}/evaluation/executor/backend_executor.py +0 -0
  86. /tico/{experimental/quantization → quantization}/evaluation/metric.py +0 -0
  87. /tico/{experimental/quantization → quantization}/evaluation/utils.py +0 -0
  88. /tico/{experimental/quantization → quantization}/passes/__init__.py +0 -0
  89. /tico/{experimental/quantization → quantization}/passes/fold_quant_ops.py +0 -0
  90. /tico/{experimental/quantization → quantization}/passes/insert_quantize_on_dtype_mismatch.py +0 -0
  91. /tico/{experimental/quantization → quantization}/passes/propagate_qparam_backward.py +0 -0
  92. /tico/{experimental/quantization → quantization}/passes/propagate_qparam_forward.py +0 -0
  93. /tico/{experimental/quantization → quantization}/passes/quantize_bias.py +0 -0
  94. /tico/{experimental/quantization → quantization}/passes/remove_weight_dequant_op.py +0 -0
  95. /tico/{experimental/quantization/ptq → quantization/wrapq}/__init__.py +0 -0
  96. /tico/{experimental/quantization/ptq → quantization/wrapq}/dtypes.py +0 -0
  97. /tico/{experimental/quantization/ptq → quantization/wrapq}/examples/__init__.py +0 -0
  98. /tico/{experimental/quantization/ptq → quantization/wrapq}/mode.py +0 -0
  99. /tico/{experimental/quantization/ptq → quantization/wrapq}/observers/__init__.py +0 -0
  100. /tico/{experimental/quantization/ptq → quantization/wrapq}/qscheme.py +0 -0
  101. /tico/{experimental/quantization/ptq → quantization/wrapq}/utils/__init__.py +0 -0
  102. /tico/{experimental/quantization/ptq → quantization/wrapq}/utils/reduce_utils.py +0 -0
  103. /tico/{experimental/quantization/ptq → quantization/wrapq}/wrappers/__init__.py +0 -0
  104. /tico/{experimental/quantization/ptq → quantization/wrapq}/wrappers/fairseq/decoder_export_single_step.py +0 -0
  105. /tico/{experimental/quantization/ptq → quantization/wrapq}/wrappers/llama/__init__.py +0 -0
  106. /tico/{experimental/quantization/ptq → quantization/wrapq}/wrappers/nn/__init__.py +0 -0
  107. {tico-0.1.0.dev251023.dist-info → tico-0.1.0.dev251027.dist-info}/LICENSE +0 -0
  108. {tico-0.1.0.dev251023.dist-info → tico-0.1.0.dev251027.dist-info}/WHEEL +0 -0
  109. {tico-0.1.0.dev251023.dist-info → tico-0.1.0.dev251027.dist-info}/entry_points.txt +0 -0
  110. {tico-0.1.0.dev251023.dist-info → tico-0.1.0.dev251027.dist-info}/top_level.txt +0 -0
@@ -33,16 +33,14 @@ import tqdm
33
33
  from datasets import load_dataset
34
34
  from transformers import AutoModelForCausalLM, AutoTokenizer
35
35
 
36
- from tico.experimental.quantization import convert, prepare
37
- from tico.experimental.quantization.config.gptq import GPTQConfig
38
- from tico.experimental.quantization.config.ptq import PTQConfig
39
- from tico.experimental.quantization.ptq.observers.affine_base import AffineObserverBase
40
- from tico.experimental.quantization.ptq.utils.introspection import build_fqn_map
41
- from tico.experimental.quantization.ptq.utils.metrics import perplexity
42
- from tico.experimental.quantization.ptq.wrappers.ptq_wrapper import PTQWrapper
43
- from tico.experimental.quantization.ptq.wrappers.quant_module_base import (
44
- QuantModuleBase,
45
- )
36
+ from tico.quantization import convert, prepare
37
+ from tico.quantization.config.gptq import GPTQConfig
38
+ from tico.quantization.config.ptq import PTQConfig
39
+ from tico.quantization.wrapq.observers.affine_base import AffineObserverBase
40
+ from tico.quantization.wrapq.utils.introspection import build_fqn_map
41
+ from tico.quantization.wrapq.utils.metrics import perplexity
42
+ from tico.quantization.wrapq.wrappers.ptq_wrapper import PTQWrapper
43
+ from tico.quantization.wrapq.wrappers.quant_module_base import QuantModuleBase
46
44
 
47
45
 
48
46
  # Token-budget presets for activation calibration
@@ -17,9 +17,9 @@ from typing import Optional, Tuple
17
17
 
18
18
  import torch
19
19
 
20
- from tico.experimental.quantization.ptq.dtypes import DType, UINT8
21
- from tico.experimental.quantization.ptq.observers.base import ObserverBase
22
- from tico.experimental.quantization.ptq.qscheme import QScheme
20
+ from tico.quantization.wrapq.dtypes import DType, UINT8
21
+ from tico.quantization.wrapq.observers.base import ObserverBase
22
+ from tico.quantization.wrapq.qscheme import QScheme
23
23
 
24
24
 
25
25
  class AffineObserverBase(ObserverBase):
@@ -17,8 +17,8 @@ from typing import Optional, Tuple
17
17
 
18
18
  import torch
19
19
 
20
- from tico.experimental.quantization.ptq.dtypes import DType, UINT8
21
- from tico.experimental.quantization.ptq.qscheme import QScheme
20
+ from tico.quantization.wrapq.dtypes import DType, UINT8
21
+ from tico.quantization.wrapq.qscheme import QScheme
22
22
 
23
23
 
24
24
  class ObserverBase(ABC):
@@ -14,8 +14,8 @@
14
14
 
15
15
  import torch
16
16
 
17
- from tico.experimental.quantization.ptq.observers.affine_base import AffineObserverBase
18
- from tico.experimental.quantization.ptq.utils.reduce_utils import channelwise_minmax
17
+ from tico.quantization.wrapq.observers.affine_base import AffineObserverBase
18
+ from tico.quantization.wrapq.utils.reduce_utils import channelwise_minmax
19
19
 
20
20
 
21
21
  class EMAObserver(AffineObserverBase):
@@ -24,7 +24,7 @@ performing any statistics gathering or fake-quantization.
24
24
  """
25
25
  import torch
26
26
 
27
- from tico.experimental.quantization.ptq.observers.affine_base import AffineObserverBase
27
+ from tico.quantization.wrapq.observers.affine_base import AffineObserverBase
28
28
 
29
29
 
30
30
  class IdentityObserver(AffineObserverBase):
@@ -14,8 +14,8 @@
14
14
 
15
15
  import torch
16
16
 
17
- from tico.experimental.quantization.ptq.observers.affine_base import AffineObserverBase
18
- from tico.experimental.quantization.ptq.utils.reduce_utils import channelwise_minmax
17
+ from tico.quantization.wrapq.observers.affine_base import AffineObserverBase
18
+ from tico.quantization.wrapq.utils.reduce_utils import channelwise_minmax
19
19
 
20
20
 
21
21
  class MinMaxObserver(AffineObserverBase):
@@ -14,7 +14,7 @@
14
14
 
15
15
  import torch
16
16
 
17
- from tico.experimental.quantization.ptq.observers.base import ObserverBase
17
+ from tico.quantization.wrapq.observers.base import ObserverBase
18
18
  from tico.utils.mx.mx_ops import quantize_mx
19
19
 
20
20
 
@@ -17,14 +17,12 @@ from typing import Any, Dict, Optional
17
17
  import torch
18
18
  import torch.nn as nn
19
19
 
20
- from tico.experimental.quantization.config.ptq import PTQConfig
21
-
22
- from tico.experimental.quantization.ptq.wrappers.ptq_wrapper import PTQWrapper
23
- from tico.experimental.quantization.ptq.wrappers.quant_module_base import (
24
- QuantModuleBase,
25
- )
26
- from tico.experimental.quantization.quantizer import BaseQuantizer
27
- from tico.experimental.quantization.quantizer_registry import register_quantizer
20
+ from tico.quantization.config.ptq import PTQConfig
21
+ from tico.quantization.quantizer import BaseQuantizer
22
+ from tico.quantization.quantizer_registry import register_quantizer
23
+
24
+ from tico.quantization.wrapq.wrappers.ptq_wrapper import PTQWrapper
25
+ from tico.quantization.wrapq.wrappers.quant_module_base import QuantModuleBase
28
26
 
29
27
 
30
28
  @register_quantizer(PTQConfig)
@@ -16,11 +16,9 @@ from typing import Callable, Dict, List, Optional, Tuple
16
16
 
17
17
  import torch
18
18
 
19
- from tico.experimental.quantization.evaluation.metric import MetricCalculator
20
- from tico.experimental.quantization.ptq.wrappers.ptq_wrapper import PTQWrapper
21
- from tico.experimental.quantization.ptq.wrappers.quant_module_base import (
22
- QuantModuleBase,
23
- )
19
+ from tico.quantization.evaluation.metric import MetricCalculator
20
+ from tico.quantization.wrapq.wrappers.ptq_wrapper import PTQWrapper
21
+ from tico.quantization.wrapq.wrappers.quant_module_base import QuantModuleBase
24
22
 
25
23
 
26
24
  def build_fqn_map(root: torch.nn.Module) -> dict[torch.nn.Module, str]:
@@ -98,7 +98,8 @@ def perplexity(
98
98
 
99
99
  input_ids = input_ids_full[:, begin:end]
100
100
  target_ids = input_ids.clone()
101
- target_ids[:, :-trg_len] = ignore_index # mask previously-scored tokens
101
+ # mask previously-scored tokens
102
+ target_ids[:, :-trg_len] = ignore_index # type: ignore[assignment]
102
103
 
103
104
  with torch.no_grad():
104
105
  outputs = model(input_ids, labels=target_ids)
@@ -106,7 +107,7 @@ def perplexity(
106
107
  neg_log_likelihood = outputs.loss
107
108
 
108
109
  # exact number of labels that contributed to loss
109
- loss_tokens = (target_ids[:, 1:] != ignore_index).sum().item()
110
+ loss_tokens = (target_ids[:, 1:] != ignore_index).sum().item() # type: ignore[attr-defined]
110
111
  nll_sum += neg_log_likelihood * loss_tokens
111
112
  n_tokens += int(loss_tokens)
112
113
 
@@ -1,4 +1,4 @@
1
- from tico.experimental.quantization.ptq.wrappers.fairseq.quant_mha import (
1
+ from tico.quantization.wrapq.wrappers.fairseq.quant_mha import (
2
2
  QuantFairseqMultiheadAttention,
3
3
  )
4
4
 
@@ -25,12 +25,10 @@ import torch
25
25
  import torch.nn.functional as F
26
26
  from torch import nn, Tensor
27
27
 
28
- from tico.experimental.quantization.config.ptq import PTQConfig
29
- from tico.experimental.quantization.ptq.wrappers.ptq_wrapper import PTQWrapper
30
- from tico.experimental.quantization.ptq.wrappers.quant_module_base import (
31
- QuantModuleBase,
32
- )
33
- from tico.experimental.quantization.ptq.wrappers.registry import try_register
28
+ from tico.quantization.config.ptq import PTQConfig
29
+ from tico.quantization.wrapq.wrappers.ptq_wrapper import PTQWrapper
30
+ from tico.quantization.wrapq.wrappers.quant_module_base import QuantModuleBase
31
+ from tico.quantization.wrapq.wrappers.registry import try_register
34
32
 
35
33
 
36
34
  @try_register("fairseq.models.transformer.TransformerDecoderBase")
@@ -23,15 +23,13 @@ from typing import Dict, Iterable, List, Optional, Tuple
23
23
  import torch
24
24
  from torch import nn, Tensor
25
25
 
26
- from tico.experimental.quantization.config.ptq import PTQConfig
27
- from tico.experimental.quantization.ptq.wrappers.fairseq.quant_mha import (
26
+ from tico.quantization.config.ptq import PTQConfig
27
+ from tico.quantization.wrapq.wrappers.fairseq.quant_mha import (
28
28
  QuantFairseqMultiheadAttention,
29
29
  )
30
- from tico.experimental.quantization.ptq.wrappers.ptq_wrapper import PTQWrapper
31
- from tico.experimental.quantization.ptq.wrappers.quant_module_base import (
32
- QuantModuleBase,
33
- )
34
- from tico.experimental.quantization.ptq.wrappers.registry import try_register
30
+ from tico.quantization.wrapq.wrappers.ptq_wrapper import PTQWrapper
31
+ from tico.quantization.wrapq.wrappers.quant_module_base import QuantModuleBase
32
+ from tico.quantization.wrapq.wrappers.registry import try_register
35
33
 
36
34
 
37
35
  @try_register("fairseq.modules.transformer_layer.TransformerDecoderLayerBase")
@@ -25,12 +25,10 @@ import torch
25
25
  import torch.nn as nn
26
26
  from torch import Tensor
27
27
 
28
- from tico.experimental.quantization.config.ptq import PTQConfig
29
- from tico.experimental.quantization.ptq.wrappers.ptq_wrapper import PTQWrapper
30
- from tico.experimental.quantization.ptq.wrappers.quant_module_base import (
31
- QuantModuleBase,
32
- )
33
- from tico.experimental.quantization.ptq.wrappers.registry import try_register
28
+ from tico.quantization.config.ptq import PTQConfig
29
+ from tico.quantization.wrapq.wrappers.ptq_wrapper import PTQWrapper
30
+ from tico.quantization.wrapq.wrappers.quant_module_base import QuantModuleBase
31
+ from tico.quantization.wrapq.wrappers.registry import try_register
34
32
 
35
33
 
36
34
  @try_register("fairseq.models.transformer.TransformerEncoderBase")
@@ -23,15 +23,13 @@ from typing import Optional
23
23
  import torch.nn as nn
24
24
  from torch import Tensor
25
25
 
26
- from tico.experimental.quantization.config.ptq import PTQConfig
27
- from tico.experimental.quantization.ptq.wrappers.fairseq.quant_mha import (
26
+ from tico.quantization.config.ptq import PTQConfig
27
+ from tico.quantization.wrapq.wrappers.fairseq.quant_mha import (
28
28
  QuantFairseqMultiheadAttention,
29
29
  )
30
- from tico.experimental.quantization.ptq.wrappers.ptq_wrapper import PTQWrapper
31
- from tico.experimental.quantization.ptq.wrappers.quant_module_base import (
32
- QuantModuleBase,
33
- )
34
- from tico.experimental.quantization.ptq.wrappers.registry import try_register
30
+ from tico.quantization.wrapq.wrappers.ptq_wrapper import PTQWrapper
31
+ from tico.quantization.wrapq.wrappers.quant_module_base import QuantModuleBase
32
+ from tico.quantization.wrapq.wrappers.registry import try_register
35
33
 
36
34
 
37
35
  @try_register("fairseq.modules.transformer_layer.TransformerEncoderLayerBase")
@@ -24,12 +24,10 @@ import torch
24
24
  import torch.nn as nn
25
25
  import torch.nn.functional as F
26
26
 
27
- from tico.experimental.quantization.config.ptq import PTQConfig
28
- from tico.experimental.quantization.ptq.wrappers.ptq_wrapper import PTQWrapper
29
- from tico.experimental.quantization.ptq.wrappers.quant_module_base import (
30
- QuantModuleBase,
31
- )
32
- from tico.experimental.quantization.ptq.wrappers.registry import try_register
27
+ from tico.quantization.config.ptq import PTQConfig
28
+ from tico.quantization.wrapq.wrappers.ptq_wrapper import PTQWrapper
29
+ from tico.quantization.wrapq.wrappers.quant_module_base import QuantModuleBase
30
+ from tico.quantization.wrapq.wrappers.registry import try_register
33
31
 
34
32
 
35
33
  @try_register("fairseq.modules.multihead_attention.MultiheadAttention")
@@ -17,12 +17,10 @@ from typing import Optional, Tuple
17
17
  import torch
18
18
  import torch.nn as nn
19
19
 
20
- from tico.experimental.quantization.config.ptq import PTQConfig
21
- from tico.experimental.quantization.ptq.wrappers.ptq_wrapper import PTQWrapper
22
- from tico.experimental.quantization.ptq.wrappers.quant_module_base import (
23
- QuantModuleBase,
24
- )
25
- from tico.experimental.quantization.ptq.wrappers.registry import try_register
20
+ from tico.quantization.config.ptq import PTQConfig
21
+ from tico.quantization.wrapq.wrappers.ptq_wrapper import PTQWrapper
22
+ from tico.quantization.wrapq.wrappers.quant_module_base import QuantModuleBase
23
+ from tico.quantization.wrapq.wrappers.registry import try_register
26
24
 
27
25
 
28
26
  @try_register(
@@ -17,16 +17,12 @@ from typing import Optional, Tuple
17
17
  import torch
18
18
  import torch.nn as nn
19
19
 
20
- from tico.experimental.quantization.config.ptq import PTQConfig
21
- from tico.experimental.quantization.ptq.wrappers.llama.quant_attn import (
22
- QuantLlamaAttention,
23
- )
24
- from tico.experimental.quantization.ptq.wrappers.llama.quant_mlp import QuantLlamaMLP
25
- from tico.experimental.quantization.ptq.wrappers.ptq_wrapper import PTQWrapper
26
- from tico.experimental.quantization.ptq.wrappers.quant_module_base import (
27
- QuantModuleBase,
28
- )
29
- from tico.experimental.quantization.ptq.wrappers.registry import try_register
20
+ from tico.quantization.config.ptq import PTQConfig
21
+ from tico.quantization.wrapq.wrappers.llama.quant_attn import QuantLlamaAttention
22
+ from tico.quantization.wrapq.wrappers.llama.quant_mlp import QuantLlamaMLP
23
+ from tico.quantization.wrapq.wrappers.ptq_wrapper import PTQWrapper
24
+ from tico.quantization.wrapq.wrappers.quant_module_base import QuantModuleBase
25
+ from tico.quantization.wrapq.wrappers.registry import try_register
30
26
 
31
27
 
32
28
  @try_register("transformers.models.llama.modeling_llama.LlamaDecoderLayer")
@@ -17,12 +17,10 @@ from typing import Optional
17
17
  import torch
18
18
  import torch.nn as nn
19
19
 
20
- from tico.experimental.quantization.config.ptq import PTQConfig
21
- from tico.experimental.quantization.ptq.wrappers.ptq_wrapper import PTQWrapper
22
- from tico.experimental.quantization.ptq.wrappers.quant_module_base import (
23
- QuantModuleBase,
24
- )
25
- from tico.experimental.quantization.ptq.wrappers.registry import try_register
20
+ from tico.quantization.config.ptq import PTQConfig
21
+ from tico.quantization.wrapq.wrappers.ptq_wrapper import PTQWrapper
22
+ from tico.quantization.wrapq.wrappers.quant_module_base import QuantModuleBase
23
+ from tico.quantization.wrapq.wrappers.registry import try_register
26
24
 
27
25
 
28
26
  @try_register("transformers.models.llama.modeling_llama.LlamaMLP")
@@ -17,13 +17,11 @@ from typing import Iterable, Optional, Tuple
17
17
  import torch
18
18
  import torch.nn as nn
19
19
 
20
- from tico.experimental.quantization.config.ptq import PTQConfig
20
+ from tico.quantization.config.ptq import PTQConfig
21
21
 
22
- from tico.experimental.quantization.ptq.mode import Mode
23
- from tico.experimental.quantization.ptq.wrappers.quant_module_base import (
24
- QuantModuleBase,
25
- )
26
- from tico.experimental.quantization.ptq.wrappers.registry import register
22
+ from tico.quantization.wrapq.mode import Mode
23
+ from tico.quantization.wrapq.wrappers.quant_module_base import QuantModuleBase
24
+ from tico.quantization.wrapq.wrappers.registry import register
27
25
 
28
26
 
29
27
  @register(nn.LayerNorm)
@@ -17,14 +17,12 @@ from typing import Optional
17
17
  import torch.nn as nn
18
18
  import torch.nn.functional as F
19
19
 
20
- from tico.experimental.quantization.config.ptq import PTQConfig
20
+ from tico.quantization.config.ptq import PTQConfig
21
21
 
22
- from tico.experimental.quantization.ptq.mode import Mode
23
- from tico.experimental.quantization.ptq.qscheme import QScheme
24
- from tico.experimental.quantization.ptq.wrappers.quant_module_base import (
25
- QuantModuleBase,
26
- )
27
- from tico.experimental.quantization.ptq.wrappers.registry import register
22
+ from tico.quantization.wrapq.mode import Mode
23
+ from tico.quantization.wrapq.qscheme import QScheme
24
+ from tico.quantization.wrapq.wrappers.quant_module_base import QuantModuleBase
25
+ from tico.quantization.wrapq.wrappers.registry import register
28
26
 
29
27
 
30
28
  @register(nn.Linear)
@@ -17,11 +17,9 @@ from typing import Optional
17
17
  import torch
18
18
  import torch.nn as nn
19
19
 
20
- from tico.experimental.quantization.config.ptq import PTQConfig
21
- from tico.experimental.quantization.ptq.wrappers.quant_module_base import (
22
- QuantModuleBase,
23
- )
24
- from tico.experimental.quantization.ptq.wrappers.registry import register
20
+ from tico.quantization.config.ptq import PTQConfig
21
+ from tico.quantization.wrapq.wrappers.quant_module_base import QuantModuleBase
22
+ from tico.quantization.wrapq.wrappers.registry import register
25
23
 
26
24
 
27
25
  @register(nn.SiLU)
@@ -16,11 +16,9 @@ from typing import Optional
16
16
 
17
17
  import torch
18
18
 
19
- from tico.experimental.quantization.config.ptq import PTQConfig
20
- from tico.experimental.quantization.ptq.wrappers.quant_module_base import (
21
- QuantModuleBase,
22
- )
23
- from tico.experimental.quantization.ptq.wrappers.registry import lookup
19
+ from tico.quantization.config.ptq import PTQConfig
20
+ from tico.quantization.wrapq.wrappers.quant_module_base import QuantModuleBase
21
+ from tico.quantization.wrapq.wrappers.registry import lookup
24
22
 
25
23
 
26
24
  class PTQWrapper(QuantModuleBase):
@@ -17,11 +17,9 @@ from typing import Callable, Optional
17
17
  import torch
18
18
  import torch.nn as nn
19
19
 
20
- from tico.experimental.quantization.config.ptq import PTQConfig
21
- from tico.experimental.quantization.ptq.wrappers.quant_module_base import (
22
- QuantModuleBase,
23
- )
24
- from tico.experimental.quantization.ptq.wrappers.registry import register
20
+ from tico.quantization.config.ptq import PTQConfig
21
+ from tico.quantization.wrapq.wrappers.quant_module_base import QuantModuleBase
22
+ from tico.quantization.wrapq.wrappers.registry import register
25
23
 
26
24
 
27
25
  class QuantElementwise(QuantModuleBase):
@@ -17,10 +17,10 @@ from typing import Iterable, Optional, Tuple
17
17
 
18
18
  import torch.nn as nn
19
19
 
20
- from tico.experimental.quantization.config.ptq import PTQConfig
20
+ from tico.quantization.config.ptq import PTQConfig
21
21
 
22
- from tico.experimental.quantization.ptq.mode import Mode
23
- from tico.experimental.quantization.ptq.observers.base import ObserverBase
22
+ from tico.quantization.wrapq.mode import Mode
23
+ from tico.quantization.wrapq.observers.base import ObserverBase
24
24
 
25
25
 
26
26
  class QuantModuleBase(nn.Module, ABC):
@@ -17,26 +17,24 @@ from typing import Callable, Dict, Type
17
17
 
18
18
  import torch.nn as nn
19
19
 
20
- from tico.experimental.quantization.ptq.wrappers.quant_module_base import (
21
- QuantModuleBase,
22
- )
20
+ from tico.quantization.wrapq.wrappers.quant_module_base import QuantModuleBase
23
21
 
24
22
  _WRAPPERS: Dict[Type[nn.Module], Type[QuantModuleBase]] = {}
25
23
  _IMPORT_ONCE = False
26
24
  _CORE_MODULES = (
27
- "tico.experimental.quantization.ptq.wrappers.quant_elementwise",
28
- "tico.experimental.quantization.ptq.wrappers.nn.quant_layernorm",
29
- "tico.experimental.quantization.ptq.wrappers.nn.quant_linear",
30
- "tico.experimental.quantization.ptq.wrappers.nn.quant_silu",
25
+ "tico.quantization.wrapq.wrappers.quant_elementwise",
26
+ "tico.quantization.wrapq.wrappers.nn.quant_layernorm",
27
+ "tico.quantization.wrapq.wrappers.nn.quant_linear",
28
+ "tico.quantization.wrapq.wrappers.nn.quant_silu",
31
29
  # llama
32
- "tico.experimental.quantization.ptq.wrappers.llama.quant_attn",
33
- "tico.experimental.quantization.ptq.wrappers.llama.quant_decoder_layer",
34
- "tico.experimental.quantization.ptq.wrappers.llama.quant_mlp",
30
+ "tico.quantization.wrapq.wrappers.llama.quant_attn",
31
+ "tico.quantization.wrapq.wrappers.llama.quant_decoder_layer",
32
+ "tico.quantization.wrapq.wrappers.llama.quant_mlp",
35
33
  # fairseq
36
- "tico.experimental.quantization.ptq.wrappers.fairseq.quant_decoder_layer",
37
- "tico.experimental.quantization.ptq.wrappers.fairseq.quant_encoder",
38
- "tico.experimental.quantization.ptq.wrappers.fairseq.quant_encoder_layer",
39
- "tico.experimental.quantization.ptq.wrappers.fairseq.quant_mha",
34
+ "tico.quantization.wrapq.wrappers.fairseq.quant_decoder_layer",
35
+ "tico.quantization.wrapq.wrappers.fairseq.quant_encoder",
36
+ "tico.quantization.wrapq.wrappers.fairseq.quant_encoder_layer",
37
+ "tico.quantization.wrapq.wrappers.fairseq.quant_mha",
40
38
  # add future core wrappers here
41
39
  )
42
40
 
tico/utils/convert.py CHANGED
@@ -20,20 +20,6 @@ import torch
20
20
  from torch.export import export, ExportedProgram
21
21
 
22
22
  from tico.config import CompileConfigBase, get_default_config
23
- from tico.experimental.quantization.passes.fold_quant_ops import FoldQuantOps
24
- from tico.experimental.quantization.passes.insert_quantize_on_dtype_mismatch import (
25
- InsertQuantizeOnDtypeMismatch,
26
- )
27
- from tico.experimental.quantization.passes.propagate_qparam_backward import (
28
- PropagateQParamBackward,
29
- )
30
- from tico.experimental.quantization.passes.propagate_qparam_forward import (
31
- PropagateQParamForward,
32
- )
33
- from tico.experimental.quantization.passes.quantize_bias import QuantizeBias
34
- from tico.experimental.quantization.passes.remove_weight_dequant_op import (
35
- RemoveWeightDequantOp,
36
- )
37
23
  from tico.passes.cast_aten_where_arg_type import CastATenWhereArgType
38
24
  from tico.passes.cast_clamp_mixed_type_args import CastClampMixedTypeArgs
39
25
  from tico.passes.cast_mixed_type_args import CastMixedTypeArgs
@@ -74,6 +60,14 @@ from tico.passes.remove_redundant_slice import RemoveRedundantSlice
74
60
  from tico.passes.remove_redundant_to_copy import RemoveRedundantToCopy
75
61
  from tico.passes.restore_linear import RestoreLinear
76
62
  from tico.passes.segment_index_select import SegmentIndexSelectConst
63
+ from tico.quantization.passes.fold_quant_ops import FoldQuantOps
64
+ from tico.quantization.passes.insert_quantize_on_dtype_mismatch import (
65
+ InsertQuantizeOnDtypeMismatch,
66
+ )
67
+ from tico.quantization.passes.propagate_qparam_backward import PropagateQParamBackward
68
+ from tico.quantization.passes.propagate_qparam_forward import PropagateQParamForward
69
+ from tico.quantization.passes.quantize_bias import QuantizeBias
70
+ from tico.quantization.passes.remove_weight_dequant_op import RemoveWeightDequantOp
77
71
  from tico.serialize.circle_serializer import build_circle
78
72
  from tico.serialize.operators.node_visitor import get_support_targets
79
73
  from tico.utils import logging
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: tico
3
- Version: 0.1.0.dev251023
3
+ Version: 0.1.0.dev251027
4
4
  Summary: Convert exported Torch module to circle
5
5
  Home-page: UNKNOWN
6
6
  License: UNKNOWN