tico 0.1.0.dev251026__py3-none-any.whl → 0.1.0.dev251028__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of tico might be problematic. Click here for more details.
- tico/__init__.py +1 -1
- tico/quantization/__init__.py +6 -0
- tico/{experimental/quantization → quantization}/algorithm/gptq/gptq.py +1 -1
- tico/{experimental/quantization → quantization}/algorithm/gptq/quantizer.py +5 -5
- tico/{experimental/quantization → quantization}/algorithm/pt2e/annotation/annotator.py +6 -8
- tico/{experimental/quantization → quantization}/algorithm/pt2e/annotation/op/adaptive_avg_pool2d.py +4 -6
- tico/{experimental/quantization → quantization}/algorithm/pt2e/annotation/op/add.py +4 -6
- tico/{experimental/quantization → quantization}/algorithm/pt2e/annotation/op/conv2d.py +4 -6
- tico/{experimental/quantization → quantization}/algorithm/pt2e/annotation/op/div.py +4 -6
- tico/{experimental/quantization → quantization}/algorithm/pt2e/annotation/op/linear.py +4 -6
- tico/{experimental/quantization → quantization}/algorithm/pt2e/annotation/op/mean.py +4 -6
- tico/{experimental/quantization → quantization}/algorithm/pt2e/annotation/op/mul.py +4 -6
- tico/{experimental/quantization → quantization}/algorithm/pt2e/annotation/op/relu6.py +4 -6
- tico/{experimental/quantization → quantization}/algorithm/pt2e/annotation/op/rsqrt.py +4 -6
- tico/{experimental/quantization → quantization}/algorithm/pt2e/annotation/op/sub.py +4 -6
- tico/{experimental/quantization → quantization}/algorithm/pt2e/annotation/spec.py +1 -3
- tico/{experimental/quantization → quantization}/algorithm/pt2e/annotation/utils.py +1 -1
- tico/{experimental/quantization → quantization}/algorithm/pt2e/quantizer.py +4 -4
- tico/{experimental/quantization → quantization}/algorithm/pt2e/utils.py +1 -3
- tico/{experimental/quantization → quantization}/algorithm/smoothquant/quantizer.py +6 -10
- tico/{experimental/quantization → quantization}/config/gptq.py +1 -1
- tico/{experimental/quantization → quantization}/config/pt2e.py +1 -1
- tico/{experimental/quantization → quantization}/config/ptq.py +6 -6
- tico/{experimental/quantization → quantization}/config/smoothquant.py +1 -1
- tico/{experimental/quantization → quantization}/evaluation/evaluate.py +6 -12
- tico/{experimental/quantization → quantization}/evaluation/executor/circle_executor.py +1 -3
- tico/{experimental/quantization → quantization}/evaluation/executor/triv24_executor.py +2 -4
- tico/{experimental/quantization → quantization}/public_interface.py +5 -5
- tico/{experimental/quantization → quantization}/quantizer.py +1 -1
- tico/{experimental/quantization → quantization}/quantizer_registry.py +4 -6
- tico/{experimental/quantization/ptq → quantization/wrapq}/examples/compare_ppl.py +3 -3
- tico/{experimental/quantization/ptq → quantization/wrapq}/examples/debug_quant_outputs.py +4 -4
- tico/{experimental/quantization/ptq → quantization/wrapq}/examples/quantize_linear.py +6 -6
- tico/{experimental/quantization/ptq → quantization/wrapq}/examples/quantize_llama_attn.py +6 -8
- tico/{experimental/quantization/ptq → quantization/wrapq}/examples/quantize_llama_decoder_layer.py +6 -6
- tico/{experimental/quantization/ptq → quantization/wrapq}/examples/quantize_llama_mlp.py +8 -8
- tico/{experimental/quantization/ptq → quantization/wrapq}/examples/quantize_with_gptq.py +8 -10
- tico/{experimental/quantization/ptq → quantization/wrapq}/observers/affine_base.py +3 -3
- tico/{experimental/quantization/ptq → quantization/wrapq}/observers/base.py +2 -2
- tico/{experimental/quantization/ptq → quantization/wrapq}/observers/ema.py +2 -2
- tico/{experimental/quantization/ptq → quantization/wrapq}/observers/identity.py +1 -1
- tico/{experimental/quantization/ptq → quantization/wrapq}/observers/minmax.py +2 -2
- tico/{experimental/quantization/ptq → quantization/wrapq}/observers/mx.py +1 -1
- tico/{experimental/quantization/ptq → quantization/wrapq}/quantizer.py +6 -8
- tico/{experimental/quantization/ptq → quantization/wrapq}/utils/introspection.py +3 -5
- tico/{experimental/quantization/ptq → quantization/wrapq}/utils/metrics.py +3 -2
- tico/{experimental/quantization/ptq → quantization/wrapq}/wrappers/fairseq/__init__.py +1 -1
- tico/{experimental/quantization/ptq → quantization/wrapq}/wrappers/fairseq/quant_decoder.py +4 -6
- tico/{experimental/quantization/ptq → quantization/wrapq}/wrappers/fairseq/quant_decoder_layer.py +5 -7
- tico/{experimental/quantization/ptq → quantization/wrapq}/wrappers/fairseq/quant_encoder.py +4 -6
- tico/{experimental/quantization/ptq → quantization/wrapq}/wrappers/fairseq/quant_encoder_layer.py +5 -7
- tico/{experimental/quantization/ptq → quantization/wrapq}/wrappers/fairseq/quant_mha.py +4 -6
- tico/{experimental/quantization/ptq → quantization/wrapq}/wrappers/llama/quant_attn.py +4 -6
- tico/{experimental/quantization/ptq → quantization/wrapq}/wrappers/llama/quant_decoder_layer.py +6 -10
- tico/{experimental/quantization/ptq → quantization/wrapq}/wrappers/llama/quant_mlp.py +4 -6
- tico/{experimental/quantization/ptq → quantization/wrapq}/wrappers/nn/quant_layernorm.py +4 -6
- tico/{experimental/quantization/ptq → quantization/wrapq}/wrappers/nn/quant_linear.py +5 -7
- tico/{experimental/quantization/ptq → quantization/wrapq}/wrappers/nn/quant_silu.py +3 -5
- tico/{experimental/quantization/ptq → quantization/wrapq}/wrappers/ptq_wrapper.py +3 -5
- tico/{experimental/quantization/ptq → quantization/wrapq}/wrappers/quant_elementwise.py +3 -5
- tico/{experimental/quantization/ptq → quantization/wrapq}/wrappers/quant_module_base.py +3 -3
- tico/{experimental/quantization/ptq → quantization/wrapq}/wrappers/registry.py +12 -14
- tico/utils/convert.py +8 -14
- {tico-0.1.0.dev251026.dist-info → tico-0.1.0.dev251028.dist-info}/METADATA +1 -1
- {tico-0.1.0.dev251026.dist-info → tico-0.1.0.dev251028.dist-info}/RECORD +109 -109
- tico/experimental/quantization/__init__.py +0 -6
- /tico/{experimental/quantization → quantization}/algorithm/__init__.py +0 -0
- /tico/{experimental/quantization → quantization}/algorithm/gptq/__init__.py +0 -0
- /tico/{experimental/quantization → quantization}/algorithm/gptq/quant.py +0 -0
- /tico/{experimental/quantization → quantization}/algorithm/gptq/utils.py +0 -0
- /tico/{experimental/quantization → quantization}/algorithm/pt2e/__init__.py +0 -0
- /tico/{experimental/quantization → quantization}/algorithm/pt2e/annotation/__init__.py +0 -0
- /tico/{experimental/quantization → quantization}/algorithm/pt2e/annotation/config.py +0 -0
- /tico/{experimental/quantization → quantization}/algorithm/pt2e/annotation/op/__init__.py +0 -0
- /tico/{experimental/quantization → quantization}/algorithm/pt2e/transformation/__init__.py +0 -0
- /tico/{experimental/quantization → quantization}/algorithm/pt2e/transformation/convert_scalars_to_attrs.py +0 -0
- /tico/{experimental/quantization → quantization}/algorithm/smoothquant/__init__.py +0 -0
- /tico/{experimental/quantization → quantization}/algorithm/smoothquant/observer.py +0 -0
- /tico/{experimental/quantization → quantization}/algorithm/smoothquant/smooth_quant.py +0 -0
- /tico/{experimental/quantization → quantization}/config/__init__.py +0 -0
- /tico/{experimental/quantization → quantization}/config/base.py +0 -0
- /tico/{experimental/quantization → quantization}/evaluation/__init__.py +0 -0
- /tico/{experimental/quantization → quantization}/evaluation/backend.py +0 -0
- /tico/{experimental/quantization → quantization}/evaluation/executor/__init__.py +0 -0
- /tico/{experimental/quantization → quantization}/evaluation/executor/backend_executor.py +0 -0
- /tico/{experimental/quantization → quantization}/evaluation/metric.py +0 -0
- /tico/{experimental/quantization → quantization}/evaluation/utils.py +0 -0
- /tico/{experimental/quantization → quantization}/passes/__init__.py +0 -0
- /tico/{experimental/quantization → quantization}/passes/fold_quant_ops.py +0 -0
- /tico/{experimental/quantization → quantization}/passes/insert_quantize_on_dtype_mismatch.py +0 -0
- /tico/{experimental/quantization → quantization}/passes/propagate_qparam_backward.py +0 -0
- /tico/{experimental/quantization → quantization}/passes/propagate_qparam_forward.py +0 -0
- /tico/{experimental/quantization → quantization}/passes/quantize_bias.py +0 -0
- /tico/{experimental/quantization → quantization}/passes/remove_weight_dequant_op.py +0 -0
- /tico/{experimental/quantization/ptq → quantization/wrapq}/__init__.py +0 -0
- /tico/{experimental/quantization/ptq → quantization/wrapq}/dtypes.py +0 -0
- /tico/{experimental/quantization/ptq → quantization/wrapq}/examples/__init__.py +0 -0
- /tico/{experimental/quantization/ptq → quantization/wrapq}/mode.py +0 -0
- /tico/{experimental/quantization/ptq → quantization/wrapq}/observers/__init__.py +0 -0
- /tico/{experimental/quantization/ptq → quantization/wrapq}/qscheme.py +0 -0
- /tico/{experimental/quantization/ptq → quantization/wrapq}/utils/__init__.py +0 -0
- /tico/{experimental/quantization/ptq → quantization/wrapq}/utils/reduce_utils.py +0 -0
- /tico/{experimental/quantization/ptq → quantization/wrapq}/wrappers/__init__.py +0 -0
- /tico/{experimental/quantization/ptq → quantization/wrapq}/wrappers/fairseq/decoder_export_single_step.py +0 -0
- /tico/{experimental/quantization/ptq → quantization/wrapq}/wrappers/llama/__init__.py +0 -0
- /tico/{experimental/quantization/ptq → quantization/wrapq}/wrappers/nn/__init__.py +0 -0
- {tico-0.1.0.dev251026.dist-info → tico-0.1.0.dev251028.dist-info}/LICENSE +0 -0
- {tico-0.1.0.dev251026.dist-info → tico-0.1.0.dev251028.dist-info}/WHEEL +0 -0
- {tico-0.1.0.dev251026.dist-info → tico-0.1.0.dev251028.dist-info}/entry_points.txt +0 -0
- {tico-0.1.0.dev251026.dist-info → tico-0.1.0.dev251028.dist-info}/top_level.txt +0 -0
|
@@ -33,16 +33,14 @@ import tqdm
|
|
|
33
33
|
from datasets import load_dataset
|
|
34
34
|
from transformers import AutoModelForCausalLM, AutoTokenizer
|
|
35
35
|
|
|
36
|
-
from tico.
|
|
37
|
-
from tico.
|
|
38
|
-
from tico.
|
|
39
|
-
from tico.
|
|
40
|
-
from tico.
|
|
41
|
-
from tico.
|
|
42
|
-
from tico.
|
|
43
|
-
from tico.
|
|
44
|
-
QuantModuleBase,
|
|
45
|
-
)
|
|
36
|
+
from tico.quantization import convert, prepare
|
|
37
|
+
from tico.quantization.config.gptq import GPTQConfig
|
|
38
|
+
from tico.quantization.config.ptq import PTQConfig
|
|
39
|
+
from tico.quantization.wrapq.observers.affine_base import AffineObserverBase
|
|
40
|
+
from tico.quantization.wrapq.utils.introspection import build_fqn_map
|
|
41
|
+
from tico.quantization.wrapq.utils.metrics import perplexity
|
|
42
|
+
from tico.quantization.wrapq.wrappers.ptq_wrapper import PTQWrapper
|
|
43
|
+
from tico.quantization.wrapq.wrappers.quant_module_base import QuantModuleBase
|
|
46
44
|
|
|
47
45
|
|
|
48
46
|
# Token-budget presets for activation calibration
|
|
@@ -17,9 +17,9 @@ from typing import Optional, Tuple
|
|
|
17
17
|
|
|
18
18
|
import torch
|
|
19
19
|
|
|
20
|
-
from tico.
|
|
21
|
-
from tico.
|
|
22
|
-
from tico.
|
|
20
|
+
from tico.quantization.wrapq.dtypes import DType, UINT8
|
|
21
|
+
from tico.quantization.wrapq.observers.base import ObserverBase
|
|
22
|
+
from tico.quantization.wrapq.qscheme import QScheme
|
|
23
23
|
|
|
24
24
|
|
|
25
25
|
class AffineObserverBase(ObserverBase):
|
|
@@ -17,8 +17,8 @@ from typing import Optional, Tuple
|
|
|
17
17
|
|
|
18
18
|
import torch
|
|
19
19
|
|
|
20
|
-
from tico.
|
|
21
|
-
from tico.
|
|
20
|
+
from tico.quantization.wrapq.dtypes import DType, UINT8
|
|
21
|
+
from tico.quantization.wrapq.qscheme import QScheme
|
|
22
22
|
|
|
23
23
|
|
|
24
24
|
class ObserverBase(ABC):
|
|
@@ -14,8 +14,8 @@
|
|
|
14
14
|
|
|
15
15
|
import torch
|
|
16
16
|
|
|
17
|
-
from tico.
|
|
18
|
-
from tico.
|
|
17
|
+
from tico.quantization.wrapq.observers.affine_base import AffineObserverBase
|
|
18
|
+
from tico.quantization.wrapq.utils.reduce_utils import channelwise_minmax
|
|
19
19
|
|
|
20
20
|
|
|
21
21
|
class EMAObserver(AffineObserverBase):
|
|
@@ -24,7 +24,7 @@ performing any statistics gathering or fake-quantization.
|
|
|
24
24
|
"""
|
|
25
25
|
import torch
|
|
26
26
|
|
|
27
|
-
from tico.
|
|
27
|
+
from tico.quantization.wrapq.observers.affine_base import AffineObserverBase
|
|
28
28
|
|
|
29
29
|
|
|
30
30
|
class IdentityObserver(AffineObserverBase):
|
|
@@ -14,8 +14,8 @@
|
|
|
14
14
|
|
|
15
15
|
import torch
|
|
16
16
|
|
|
17
|
-
from tico.
|
|
18
|
-
from tico.
|
|
17
|
+
from tico.quantization.wrapq.observers.affine_base import AffineObserverBase
|
|
18
|
+
from tico.quantization.wrapq.utils.reduce_utils import channelwise_minmax
|
|
19
19
|
|
|
20
20
|
|
|
21
21
|
class MinMaxObserver(AffineObserverBase):
|
|
@@ -17,14 +17,12 @@ from typing import Any, Dict, Optional
|
|
|
17
17
|
import torch
|
|
18
18
|
import torch.nn as nn
|
|
19
19
|
|
|
20
|
-
from tico.
|
|
21
|
-
|
|
22
|
-
from tico.
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
from tico.experimental.quantization.quantizer import BaseQuantizer
|
|
27
|
-
from tico.experimental.quantization.quantizer_registry import register_quantizer
|
|
20
|
+
from tico.quantization.config.ptq import PTQConfig
|
|
21
|
+
from tico.quantization.quantizer import BaseQuantizer
|
|
22
|
+
from tico.quantization.quantizer_registry import register_quantizer
|
|
23
|
+
|
|
24
|
+
from tico.quantization.wrapq.wrappers.ptq_wrapper import PTQWrapper
|
|
25
|
+
from tico.quantization.wrapq.wrappers.quant_module_base import QuantModuleBase
|
|
28
26
|
|
|
29
27
|
|
|
30
28
|
@register_quantizer(PTQConfig)
|
|
@@ -16,11 +16,9 @@ from typing import Callable, Dict, List, Optional, Tuple
|
|
|
16
16
|
|
|
17
17
|
import torch
|
|
18
18
|
|
|
19
|
-
from tico.
|
|
20
|
-
from tico.
|
|
21
|
-
from tico.
|
|
22
|
-
QuantModuleBase,
|
|
23
|
-
)
|
|
19
|
+
from tico.quantization.evaluation.metric import MetricCalculator
|
|
20
|
+
from tico.quantization.wrapq.wrappers.ptq_wrapper import PTQWrapper
|
|
21
|
+
from tico.quantization.wrapq.wrappers.quant_module_base import QuantModuleBase
|
|
24
22
|
|
|
25
23
|
|
|
26
24
|
def build_fqn_map(root: torch.nn.Module) -> dict[torch.nn.Module, str]:
|
|
@@ -98,7 +98,8 @@ def perplexity(
|
|
|
98
98
|
|
|
99
99
|
input_ids = input_ids_full[:, begin:end]
|
|
100
100
|
target_ids = input_ids.clone()
|
|
101
|
-
|
|
101
|
+
# mask previously-scored tokens
|
|
102
|
+
target_ids[:, :-trg_len] = ignore_index # type: ignore[assignment]
|
|
102
103
|
|
|
103
104
|
with torch.no_grad():
|
|
104
105
|
outputs = model(input_ids, labels=target_ids)
|
|
@@ -106,7 +107,7 @@ def perplexity(
|
|
|
106
107
|
neg_log_likelihood = outputs.loss
|
|
107
108
|
|
|
108
109
|
# exact number of labels that contributed to loss
|
|
109
|
-
loss_tokens = (target_ids[:, 1:] != ignore_index).sum().item()
|
|
110
|
+
loss_tokens = (target_ids[:, 1:] != ignore_index).sum().item() # type: ignore[attr-defined]
|
|
110
111
|
nll_sum += neg_log_likelihood * loss_tokens
|
|
111
112
|
n_tokens += int(loss_tokens)
|
|
112
113
|
|
|
@@ -25,12 +25,10 @@ import torch
|
|
|
25
25
|
import torch.nn.functional as F
|
|
26
26
|
from torch import nn, Tensor
|
|
27
27
|
|
|
28
|
-
from tico.
|
|
29
|
-
from tico.
|
|
30
|
-
from tico.
|
|
31
|
-
|
|
32
|
-
)
|
|
33
|
-
from tico.experimental.quantization.ptq.wrappers.registry import try_register
|
|
28
|
+
from tico.quantization.config.ptq import PTQConfig
|
|
29
|
+
from tico.quantization.wrapq.wrappers.ptq_wrapper import PTQWrapper
|
|
30
|
+
from tico.quantization.wrapq.wrappers.quant_module_base import QuantModuleBase
|
|
31
|
+
from tico.quantization.wrapq.wrappers.registry import try_register
|
|
34
32
|
|
|
35
33
|
|
|
36
34
|
@try_register("fairseq.models.transformer.TransformerDecoderBase")
|
tico/{experimental/quantization/ptq → quantization/wrapq}/wrappers/fairseq/quant_decoder_layer.py
RENAMED
|
@@ -23,15 +23,13 @@ from typing import Dict, Iterable, List, Optional, Tuple
|
|
|
23
23
|
import torch
|
|
24
24
|
from torch import nn, Tensor
|
|
25
25
|
|
|
26
|
-
from tico.
|
|
27
|
-
from tico.
|
|
26
|
+
from tico.quantization.config.ptq import PTQConfig
|
|
27
|
+
from tico.quantization.wrapq.wrappers.fairseq.quant_mha import (
|
|
28
28
|
QuantFairseqMultiheadAttention,
|
|
29
29
|
)
|
|
30
|
-
from tico.
|
|
31
|
-
from tico.
|
|
32
|
-
|
|
33
|
-
)
|
|
34
|
-
from tico.experimental.quantization.ptq.wrappers.registry import try_register
|
|
30
|
+
from tico.quantization.wrapq.wrappers.ptq_wrapper import PTQWrapper
|
|
31
|
+
from tico.quantization.wrapq.wrappers.quant_module_base import QuantModuleBase
|
|
32
|
+
from tico.quantization.wrapq.wrappers.registry import try_register
|
|
35
33
|
|
|
36
34
|
|
|
37
35
|
@try_register("fairseq.modules.transformer_layer.TransformerDecoderLayerBase")
|
|
@@ -25,12 +25,10 @@ import torch
|
|
|
25
25
|
import torch.nn as nn
|
|
26
26
|
from torch import Tensor
|
|
27
27
|
|
|
28
|
-
from tico.
|
|
29
|
-
from tico.
|
|
30
|
-
from tico.
|
|
31
|
-
|
|
32
|
-
)
|
|
33
|
-
from tico.experimental.quantization.ptq.wrappers.registry import try_register
|
|
28
|
+
from tico.quantization.config.ptq import PTQConfig
|
|
29
|
+
from tico.quantization.wrapq.wrappers.ptq_wrapper import PTQWrapper
|
|
30
|
+
from tico.quantization.wrapq.wrappers.quant_module_base import QuantModuleBase
|
|
31
|
+
from tico.quantization.wrapq.wrappers.registry import try_register
|
|
34
32
|
|
|
35
33
|
|
|
36
34
|
@try_register("fairseq.models.transformer.TransformerEncoderBase")
|
tico/{experimental/quantization/ptq → quantization/wrapq}/wrappers/fairseq/quant_encoder_layer.py
RENAMED
|
@@ -23,15 +23,13 @@ from typing import Optional
|
|
|
23
23
|
import torch.nn as nn
|
|
24
24
|
from torch import Tensor
|
|
25
25
|
|
|
26
|
-
from tico.
|
|
27
|
-
from tico.
|
|
26
|
+
from tico.quantization.config.ptq import PTQConfig
|
|
27
|
+
from tico.quantization.wrapq.wrappers.fairseq.quant_mha import (
|
|
28
28
|
QuantFairseqMultiheadAttention,
|
|
29
29
|
)
|
|
30
|
-
from tico.
|
|
31
|
-
from tico.
|
|
32
|
-
|
|
33
|
-
)
|
|
34
|
-
from tico.experimental.quantization.ptq.wrappers.registry import try_register
|
|
30
|
+
from tico.quantization.wrapq.wrappers.ptq_wrapper import PTQWrapper
|
|
31
|
+
from tico.quantization.wrapq.wrappers.quant_module_base import QuantModuleBase
|
|
32
|
+
from tico.quantization.wrapq.wrappers.registry import try_register
|
|
35
33
|
|
|
36
34
|
|
|
37
35
|
@try_register("fairseq.modules.transformer_layer.TransformerEncoderLayerBase")
|
|
@@ -24,12 +24,10 @@ import torch
|
|
|
24
24
|
import torch.nn as nn
|
|
25
25
|
import torch.nn.functional as F
|
|
26
26
|
|
|
27
|
-
from tico.
|
|
28
|
-
from tico.
|
|
29
|
-
from tico.
|
|
30
|
-
|
|
31
|
-
)
|
|
32
|
-
from tico.experimental.quantization.ptq.wrappers.registry import try_register
|
|
27
|
+
from tico.quantization.config.ptq import PTQConfig
|
|
28
|
+
from tico.quantization.wrapq.wrappers.ptq_wrapper import PTQWrapper
|
|
29
|
+
from tico.quantization.wrapq.wrappers.quant_module_base import QuantModuleBase
|
|
30
|
+
from tico.quantization.wrapq.wrappers.registry import try_register
|
|
33
31
|
|
|
34
32
|
|
|
35
33
|
@try_register("fairseq.modules.multihead_attention.MultiheadAttention")
|
|
@@ -17,12 +17,10 @@ from typing import Optional, Tuple
|
|
|
17
17
|
import torch
|
|
18
18
|
import torch.nn as nn
|
|
19
19
|
|
|
20
|
-
from tico.
|
|
21
|
-
from tico.
|
|
22
|
-
from tico.
|
|
23
|
-
|
|
24
|
-
)
|
|
25
|
-
from tico.experimental.quantization.ptq.wrappers.registry import try_register
|
|
20
|
+
from tico.quantization.config.ptq import PTQConfig
|
|
21
|
+
from tico.quantization.wrapq.wrappers.ptq_wrapper import PTQWrapper
|
|
22
|
+
from tico.quantization.wrapq.wrappers.quant_module_base import QuantModuleBase
|
|
23
|
+
from tico.quantization.wrapq.wrappers.registry import try_register
|
|
26
24
|
|
|
27
25
|
|
|
28
26
|
@try_register(
|
tico/{experimental/quantization/ptq → quantization/wrapq}/wrappers/llama/quant_decoder_layer.py
RENAMED
|
@@ -17,16 +17,12 @@ from typing import Optional, Tuple
|
|
|
17
17
|
import torch
|
|
18
18
|
import torch.nn as nn
|
|
19
19
|
|
|
20
|
-
from tico.
|
|
21
|
-
from tico.
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
from tico.
|
|
25
|
-
from tico.
|
|
26
|
-
from tico.experimental.quantization.ptq.wrappers.quant_module_base import (
|
|
27
|
-
QuantModuleBase,
|
|
28
|
-
)
|
|
29
|
-
from tico.experimental.quantization.ptq.wrappers.registry import try_register
|
|
20
|
+
from tico.quantization.config.ptq import PTQConfig
|
|
21
|
+
from tico.quantization.wrapq.wrappers.llama.quant_attn import QuantLlamaAttention
|
|
22
|
+
from tico.quantization.wrapq.wrappers.llama.quant_mlp import QuantLlamaMLP
|
|
23
|
+
from tico.quantization.wrapq.wrappers.ptq_wrapper import PTQWrapper
|
|
24
|
+
from tico.quantization.wrapq.wrappers.quant_module_base import QuantModuleBase
|
|
25
|
+
from tico.quantization.wrapq.wrappers.registry import try_register
|
|
30
26
|
|
|
31
27
|
|
|
32
28
|
@try_register("transformers.models.llama.modeling_llama.LlamaDecoderLayer")
|
|
@@ -17,12 +17,10 @@ from typing import Optional
|
|
|
17
17
|
import torch
|
|
18
18
|
import torch.nn as nn
|
|
19
19
|
|
|
20
|
-
from tico.
|
|
21
|
-
from tico.
|
|
22
|
-
from tico.
|
|
23
|
-
|
|
24
|
-
)
|
|
25
|
-
from tico.experimental.quantization.ptq.wrappers.registry import try_register
|
|
20
|
+
from tico.quantization.config.ptq import PTQConfig
|
|
21
|
+
from tico.quantization.wrapq.wrappers.ptq_wrapper import PTQWrapper
|
|
22
|
+
from tico.quantization.wrapq.wrappers.quant_module_base import QuantModuleBase
|
|
23
|
+
from tico.quantization.wrapq.wrappers.registry import try_register
|
|
26
24
|
|
|
27
25
|
|
|
28
26
|
@try_register("transformers.models.llama.modeling_llama.LlamaMLP")
|
|
@@ -17,13 +17,11 @@ from typing import Iterable, Optional, Tuple
|
|
|
17
17
|
import torch
|
|
18
18
|
import torch.nn as nn
|
|
19
19
|
|
|
20
|
-
from tico.
|
|
20
|
+
from tico.quantization.config.ptq import PTQConfig
|
|
21
21
|
|
|
22
|
-
from tico.
|
|
23
|
-
from tico.
|
|
24
|
-
|
|
25
|
-
)
|
|
26
|
-
from tico.experimental.quantization.ptq.wrappers.registry import register
|
|
22
|
+
from tico.quantization.wrapq.mode import Mode
|
|
23
|
+
from tico.quantization.wrapq.wrappers.quant_module_base import QuantModuleBase
|
|
24
|
+
from tico.quantization.wrapq.wrappers.registry import register
|
|
27
25
|
|
|
28
26
|
|
|
29
27
|
@register(nn.LayerNorm)
|
|
@@ -17,14 +17,12 @@ from typing import Optional
|
|
|
17
17
|
import torch.nn as nn
|
|
18
18
|
import torch.nn.functional as F
|
|
19
19
|
|
|
20
|
-
from tico.
|
|
20
|
+
from tico.quantization.config.ptq import PTQConfig
|
|
21
21
|
|
|
22
|
-
from tico.
|
|
23
|
-
from tico.
|
|
24
|
-
from tico.
|
|
25
|
-
|
|
26
|
-
)
|
|
27
|
-
from tico.experimental.quantization.ptq.wrappers.registry import register
|
|
22
|
+
from tico.quantization.wrapq.mode import Mode
|
|
23
|
+
from tico.quantization.wrapq.qscheme import QScheme
|
|
24
|
+
from tico.quantization.wrapq.wrappers.quant_module_base import QuantModuleBase
|
|
25
|
+
from tico.quantization.wrapq.wrappers.registry import register
|
|
28
26
|
|
|
29
27
|
|
|
30
28
|
@register(nn.Linear)
|
|
@@ -17,11 +17,9 @@ from typing import Optional
|
|
|
17
17
|
import torch
|
|
18
18
|
import torch.nn as nn
|
|
19
19
|
|
|
20
|
-
from tico.
|
|
21
|
-
from tico.
|
|
22
|
-
|
|
23
|
-
)
|
|
24
|
-
from tico.experimental.quantization.ptq.wrappers.registry import register
|
|
20
|
+
from tico.quantization.config.ptq import PTQConfig
|
|
21
|
+
from tico.quantization.wrapq.wrappers.quant_module_base import QuantModuleBase
|
|
22
|
+
from tico.quantization.wrapq.wrappers.registry import register
|
|
25
23
|
|
|
26
24
|
|
|
27
25
|
@register(nn.SiLU)
|
|
@@ -16,11 +16,9 @@ from typing import Optional
|
|
|
16
16
|
|
|
17
17
|
import torch
|
|
18
18
|
|
|
19
|
-
from tico.
|
|
20
|
-
from tico.
|
|
21
|
-
|
|
22
|
-
)
|
|
23
|
-
from tico.experimental.quantization.ptq.wrappers.registry import lookup
|
|
19
|
+
from tico.quantization.config.ptq import PTQConfig
|
|
20
|
+
from tico.quantization.wrapq.wrappers.quant_module_base import QuantModuleBase
|
|
21
|
+
from tico.quantization.wrapq.wrappers.registry import lookup
|
|
24
22
|
|
|
25
23
|
|
|
26
24
|
class PTQWrapper(QuantModuleBase):
|
|
@@ -17,11 +17,9 @@ from typing import Callable, Optional
|
|
|
17
17
|
import torch
|
|
18
18
|
import torch.nn as nn
|
|
19
19
|
|
|
20
|
-
from tico.
|
|
21
|
-
from tico.
|
|
22
|
-
|
|
23
|
-
)
|
|
24
|
-
from tico.experimental.quantization.ptq.wrappers.registry import register
|
|
20
|
+
from tico.quantization.config.ptq import PTQConfig
|
|
21
|
+
from tico.quantization.wrapq.wrappers.quant_module_base import QuantModuleBase
|
|
22
|
+
from tico.quantization.wrapq.wrappers.registry import register
|
|
25
23
|
|
|
26
24
|
|
|
27
25
|
class QuantElementwise(QuantModuleBase):
|
|
@@ -17,10 +17,10 @@ from typing import Iterable, Optional, Tuple
|
|
|
17
17
|
|
|
18
18
|
import torch.nn as nn
|
|
19
19
|
|
|
20
|
-
from tico.
|
|
20
|
+
from tico.quantization.config.ptq import PTQConfig
|
|
21
21
|
|
|
22
|
-
from tico.
|
|
23
|
-
from tico.
|
|
22
|
+
from tico.quantization.wrapq.mode import Mode
|
|
23
|
+
from tico.quantization.wrapq.observers.base import ObserverBase
|
|
24
24
|
|
|
25
25
|
|
|
26
26
|
class QuantModuleBase(nn.Module, ABC):
|
|
@@ -17,26 +17,24 @@ from typing import Callable, Dict, Type
|
|
|
17
17
|
|
|
18
18
|
import torch.nn as nn
|
|
19
19
|
|
|
20
|
-
from tico.
|
|
21
|
-
QuantModuleBase,
|
|
22
|
-
)
|
|
20
|
+
from tico.quantization.wrapq.wrappers.quant_module_base import QuantModuleBase
|
|
23
21
|
|
|
24
22
|
_WRAPPERS: Dict[Type[nn.Module], Type[QuantModuleBase]] = {}
|
|
25
23
|
_IMPORT_ONCE = False
|
|
26
24
|
_CORE_MODULES = (
|
|
27
|
-
"tico.
|
|
28
|
-
"tico.
|
|
29
|
-
"tico.
|
|
30
|
-
"tico.
|
|
25
|
+
"tico.quantization.wrapq.wrappers.quant_elementwise",
|
|
26
|
+
"tico.quantization.wrapq.wrappers.nn.quant_layernorm",
|
|
27
|
+
"tico.quantization.wrapq.wrappers.nn.quant_linear",
|
|
28
|
+
"tico.quantization.wrapq.wrappers.nn.quant_silu",
|
|
31
29
|
# llama
|
|
32
|
-
"tico.
|
|
33
|
-
"tico.
|
|
34
|
-
"tico.
|
|
30
|
+
"tico.quantization.wrapq.wrappers.llama.quant_attn",
|
|
31
|
+
"tico.quantization.wrapq.wrappers.llama.quant_decoder_layer",
|
|
32
|
+
"tico.quantization.wrapq.wrappers.llama.quant_mlp",
|
|
35
33
|
# fairseq
|
|
36
|
-
"tico.
|
|
37
|
-
"tico.
|
|
38
|
-
"tico.
|
|
39
|
-
"tico.
|
|
34
|
+
"tico.quantization.wrapq.wrappers.fairseq.quant_decoder_layer",
|
|
35
|
+
"tico.quantization.wrapq.wrappers.fairseq.quant_encoder",
|
|
36
|
+
"tico.quantization.wrapq.wrappers.fairseq.quant_encoder_layer",
|
|
37
|
+
"tico.quantization.wrapq.wrappers.fairseq.quant_mha",
|
|
40
38
|
# add future core wrappers here
|
|
41
39
|
)
|
|
42
40
|
|
tico/utils/convert.py
CHANGED
|
@@ -20,20 +20,6 @@ import torch
|
|
|
20
20
|
from torch.export import export, ExportedProgram
|
|
21
21
|
|
|
22
22
|
from tico.config import CompileConfigBase, get_default_config
|
|
23
|
-
from tico.experimental.quantization.passes.fold_quant_ops import FoldQuantOps
|
|
24
|
-
from tico.experimental.quantization.passes.insert_quantize_on_dtype_mismatch import (
|
|
25
|
-
InsertQuantizeOnDtypeMismatch,
|
|
26
|
-
)
|
|
27
|
-
from tico.experimental.quantization.passes.propagate_qparam_backward import (
|
|
28
|
-
PropagateQParamBackward,
|
|
29
|
-
)
|
|
30
|
-
from tico.experimental.quantization.passes.propagate_qparam_forward import (
|
|
31
|
-
PropagateQParamForward,
|
|
32
|
-
)
|
|
33
|
-
from tico.experimental.quantization.passes.quantize_bias import QuantizeBias
|
|
34
|
-
from tico.experimental.quantization.passes.remove_weight_dequant_op import (
|
|
35
|
-
RemoveWeightDequantOp,
|
|
36
|
-
)
|
|
37
23
|
from tico.passes.cast_aten_where_arg_type import CastATenWhereArgType
|
|
38
24
|
from tico.passes.cast_clamp_mixed_type_args import CastClampMixedTypeArgs
|
|
39
25
|
from tico.passes.cast_mixed_type_args import CastMixedTypeArgs
|
|
@@ -74,6 +60,14 @@ from tico.passes.remove_redundant_slice import RemoveRedundantSlice
|
|
|
74
60
|
from tico.passes.remove_redundant_to_copy import RemoveRedundantToCopy
|
|
75
61
|
from tico.passes.restore_linear import RestoreLinear
|
|
76
62
|
from tico.passes.segment_index_select import SegmentIndexSelectConst
|
|
63
|
+
from tico.quantization.passes.fold_quant_ops import FoldQuantOps
|
|
64
|
+
from tico.quantization.passes.insert_quantize_on_dtype_mismatch import (
|
|
65
|
+
InsertQuantizeOnDtypeMismatch,
|
|
66
|
+
)
|
|
67
|
+
from tico.quantization.passes.propagate_qparam_backward import PropagateQParamBackward
|
|
68
|
+
from tico.quantization.passes.propagate_qparam_forward import PropagateQParamForward
|
|
69
|
+
from tico.quantization.passes.quantize_bias import QuantizeBias
|
|
70
|
+
from tico.quantization.passes.remove_weight_dequant_op import RemoveWeightDequantOp
|
|
77
71
|
from tico.serialize.circle_serializer import build_circle
|
|
78
72
|
from tico.serialize.operators.node_visitor import get_support_targets
|
|
79
73
|
from tico.utils import logging
|