tico 0.1.0.dev251026__py3-none-any.whl → 0.1.0.dev251028__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of tico might be problematic. Click here for more details.
- tico/__init__.py +1 -1
- tico/quantization/__init__.py +6 -0
- tico/{experimental/quantization → quantization}/algorithm/gptq/gptq.py +1 -1
- tico/{experimental/quantization → quantization}/algorithm/gptq/quantizer.py +5 -5
- tico/{experimental/quantization → quantization}/algorithm/pt2e/annotation/annotator.py +6 -8
- tico/{experimental/quantization → quantization}/algorithm/pt2e/annotation/op/adaptive_avg_pool2d.py +4 -6
- tico/{experimental/quantization → quantization}/algorithm/pt2e/annotation/op/add.py +4 -6
- tico/{experimental/quantization → quantization}/algorithm/pt2e/annotation/op/conv2d.py +4 -6
- tico/{experimental/quantization → quantization}/algorithm/pt2e/annotation/op/div.py +4 -6
- tico/{experimental/quantization → quantization}/algorithm/pt2e/annotation/op/linear.py +4 -6
- tico/{experimental/quantization → quantization}/algorithm/pt2e/annotation/op/mean.py +4 -6
- tico/{experimental/quantization → quantization}/algorithm/pt2e/annotation/op/mul.py +4 -6
- tico/{experimental/quantization → quantization}/algorithm/pt2e/annotation/op/relu6.py +4 -6
- tico/{experimental/quantization → quantization}/algorithm/pt2e/annotation/op/rsqrt.py +4 -6
- tico/{experimental/quantization → quantization}/algorithm/pt2e/annotation/op/sub.py +4 -6
- tico/{experimental/quantization → quantization}/algorithm/pt2e/annotation/spec.py +1 -3
- tico/{experimental/quantization → quantization}/algorithm/pt2e/annotation/utils.py +1 -1
- tico/{experimental/quantization → quantization}/algorithm/pt2e/quantizer.py +4 -4
- tico/{experimental/quantization → quantization}/algorithm/pt2e/utils.py +1 -3
- tico/{experimental/quantization → quantization}/algorithm/smoothquant/quantizer.py +6 -10
- tico/{experimental/quantization → quantization}/config/gptq.py +1 -1
- tico/{experimental/quantization → quantization}/config/pt2e.py +1 -1
- tico/{experimental/quantization → quantization}/config/ptq.py +6 -6
- tico/{experimental/quantization → quantization}/config/smoothquant.py +1 -1
- tico/{experimental/quantization → quantization}/evaluation/evaluate.py +6 -12
- tico/{experimental/quantization → quantization}/evaluation/executor/circle_executor.py +1 -3
- tico/{experimental/quantization → quantization}/evaluation/executor/triv24_executor.py +2 -4
- tico/{experimental/quantization → quantization}/public_interface.py +5 -5
- tico/{experimental/quantization → quantization}/quantizer.py +1 -1
- tico/{experimental/quantization → quantization}/quantizer_registry.py +4 -6
- tico/{experimental/quantization/ptq → quantization/wrapq}/examples/compare_ppl.py +3 -3
- tico/{experimental/quantization/ptq → quantization/wrapq}/examples/debug_quant_outputs.py +4 -4
- tico/{experimental/quantization/ptq → quantization/wrapq}/examples/quantize_linear.py +6 -6
- tico/{experimental/quantization/ptq → quantization/wrapq}/examples/quantize_llama_attn.py +6 -8
- tico/{experimental/quantization/ptq → quantization/wrapq}/examples/quantize_llama_decoder_layer.py +6 -6
- tico/{experimental/quantization/ptq → quantization/wrapq}/examples/quantize_llama_mlp.py +8 -8
- tico/{experimental/quantization/ptq → quantization/wrapq}/examples/quantize_with_gptq.py +8 -10
- tico/{experimental/quantization/ptq → quantization/wrapq}/observers/affine_base.py +3 -3
- tico/{experimental/quantization/ptq → quantization/wrapq}/observers/base.py +2 -2
- tico/{experimental/quantization/ptq → quantization/wrapq}/observers/ema.py +2 -2
- tico/{experimental/quantization/ptq → quantization/wrapq}/observers/identity.py +1 -1
- tico/{experimental/quantization/ptq → quantization/wrapq}/observers/minmax.py +2 -2
- tico/{experimental/quantization/ptq → quantization/wrapq}/observers/mx.py +1 -1
- tico/{experimental/quantization/ptq → quantization/wrapq}/quantizer.py +6 -8
- tico/{experimental/quantization/ptq → quantization/wrapq}/utils/introspection.py +3 -5
- tico/{experimental/quantization/ptq → quantization/wrapq}/utils/metrics.py +3 -2
- tico/{experimental/quantization/ptq → quantization/wrapq}/wrappers/fairseq/__init__.py +1 -1
- tico/{experimental/quantization/ptq → quantization/wrapq}/wrappers/fairseq/quant_decoder.py +4 -6
- tico/{experimental/quantization/ptq → quantization/wrapq}/wrappers/fairseq/quant_decoder_layer.py +5 -7
- tico/{experimental/quantization/ptq → quantization/wrapq}/wrappers/fairseq/quant_encoder.py +4 -6
- tico/{experimental/quantization/ptq → quantization/wrapq}/wrappers/fairseq/quant_encoder_layer.py +5 -7
- tico/{experimental/quantization/ptq → quantization/wrapq}/wrappers/fairseq/quant_mha.py +4 -6
- tico/{experimental/quantization/ptq → quantization/wrapq}/wrappers/llama/quant_attn.py +4 -6
- tico/{experimental/quantization/ptq → quantization/wrapq}/wrappers/llama/quant_decoder_layer.py +6 -10
- tico/{experimental/quantization/ptq → quantization/wrapq}/wrappers/llama/quant_mlp.py +4 -6
- tico/{experimental/quantization/ptq → quantization/wrapq}/wrappers/nn/quant_layernorm.py +4 -6
- tico/{experimental/quantization/ptq → quantization/wrapq}/wrappers/nn/quant_linear.py +5 -7
- tico/{experimental/quantization/ptq → quantization/wrapq}/wrappers/nn/quant_silu.py +3 -5
- tico/{experimental/quantization/ptq → quantization/wrapq}/wrappers/ptq_wrapper.py +3 -5
- tico/{experimental/quantization/ptq → quantization/wrapq}/wrappers/quant_elementwise.py +3 -5
- tico/{experimental/quantization/ptq → quantization/wrapq}/wrappers/quant_module_base.py +3 -3
- tico/{experimental/quantization/ptq → quantization/wrapq}/wrappers/registry.py +12 -14
- tico/utils/convert.py +8 -14
- {tico-0.1.0.dev251026.dist-info → tico-0.1.0.dev251028.dist-info}/METADATA +1 -1
- {tico-0.1.0.dev251026.dist-info → tico-0.1.0.dev251028.dist-info}/RECORD +109 -109
- tico/experimental/quantization/__init__.py +0 -6
- /tico/{experimental/quantization → quantization}/algorithm/__init__.py +0 -0
- /tico/{experimental/quantization → quantization}/algorithm/gptq/__init__.py +0 -0
- /tico/{experimental/quantization → quantization}/algorithm/gptq/quant.py +0 -0
- /tico/{experimental/quantization → quantization}/algorithm/gptq/utils.py +0 -0
- /tico/{experimental/quantization → quantization}/algorithm/pt2e/__init__.py +0 -0
- /tico/{experimental/quantization → quantization}/algorithm/pt2e/annotation/__init__.py +0 -0
- /tico/{experimental/quantization → quantization}/algorithm/pt2e/annotation/config.py +0 -0
- /tico/{experimental/quantization → quantization}/algorithm/pt2e/annotation/op/__init__.py +0 -0
- /tico/{experimental/quantization → quantization}/algorithm/pt2e/transformation/__init__.py +0 -0
- /tico/{experimental/quantization → quantization}/algorithm/pt2e/transformation/convert_scalars_to_attrs.py +0 -0
- /tico/{experimental/quantization → quantization}/algorithm/smoothquant/__init__.py +0 -0
- /tico/{experimental/quantization → quantization}/algorithm/smoothquant/observer.py +0 -0
- /tico/{experimental/quantization → quantization}/algorithm/smoothquant/smooth_quant.py +0 -0
- /tico/{experimental/quantization → quantization}/config/__init__.py +0 -0
- /tico/{experimental/quantization → quantization}/config/base.py +0 -0
- /tico/{experimental/quantization → quantization}/evaluation/__init__.py +0 -0
- /tico/{experimental/quantization → quantization}/evaluation/backend.py +0 -0
- /tico/{experimental/quantization → quantization}/evaluation/executor/__init__.py +0 -0
- /tico/{experimental/quantization → quantization}/evaluation/executor/backend_executor.py +0 -0
- /tico/{experimental/quantization → quantization}/evaluation/metric.py +0 -0
- /tico/{experimental/quantization → quantization}/evaluation/utils.py +0 -0
- /tico/{experimental/quantization → quantization}/passes/__init__.py +0 -0
- /tico/{experimental/quantization → quantization}/passes/fold_quant_ops.py +0 -0
- /tico/{experimental/quantization → quantization}/passes/insert_quantize_on_dtype_mismatch.py +0 -0
- /tico/{experimental/quantization → quantization}/passes/propagate_qparam_backward.py +0 -0
- /tico/{experimental/quantization → quantization}/passes/propagate_qparam_forward.py +0 -0
- /tico/{experimental/quantization → quantization}/passes/quantize_bias.py +0 -0
- /tico/{experimental/quantization → quantization}/passes/remove_weight_dequant_op.py +0 -0
- /tico/{experimental/quantization/ptq → quantization/wrapq}/__init__.py +0 -0
- /tico/{experimental/quantization/ptq → quantization/wrapq}/dtypes.py +0 -0
- /tico/{experimental/quantization/ptq → quantization/wrapq}/examples/__init__.py +0 -0
- /tico/{experimental/quantization/ptq → quantization/wrapq}/mode.py +0 -0
- /tico/{experimental/quantization/ptq → quantization/wrapq}/observers/__init__.py +0 -0
- /tico/{experimental/quantization/ptq → quantization/wrapq}/qscheme.py +0 -0
- /tico/{experimental/quantization/ptq → quantization/wrapq}/utils/__init__.py +0 -0
- /tico/{experimental/quantization/ptq → quantization/wrapq}/utils/reduce_utils.py +0 -0
- /tico/{experimental/quantization/ptq → quantization/wrapq}/wrappers/__init__.py +0 -0
- /tico/{experimental/quantization/ptq → quantization/wrapq}/wrappers/fairseq/decoder_export_single_step.py +0 -0
- /tico/{experimental/quantization/ptq → quantization/wrapq}/wrappers/llama/__init__.py +0 -0
- /tico/{experimental/quantization/ptq → quantization/wrapq}/wrappers/nn/__init__.py +0 -0
- {tico-0.1.0.dev251026.dist-info → tico-0.1.0.dev251028.dist-info}/LICENSE +0 -0
- {tico-0.1.0.dev251026.dist-info → tico-0.1.0.dev251028.dist-info}/WHEEL +0 -0
- {tico-0.1.0.dev251026.dist-info → tico-0.1.0.dev251028.dist-info}/entry_points.txt +0 -0
- {tico-0.1.0.dev251026.dist-info → tico-0.1.0.dev251028.dist-info}/top_level.txt +0 -0
tico/__init__.py
CHANGED
|
@@ -25,7 +25,7 @@ from typing import Optional
|
|
|
25
25
|
import torch
|
|
26
26
|
import torch.nn as nn
|
|
27
27
|
|
|
28
|
-
from tico.
|
|
28
|
+
from tico.quantization.algorithm.gptq.quant import quantize, Quantizer
|
|
29
29
|
|
|
30
30
|
torch.backends.cuda.matmul.allow_tf32 = False
|
|
31
31
|
torch.backends.cudnn.allow_tf32 = False
|
|
@@ -19,15 +19,15 @@ from typing import Any, Callable, Dict, List, Optional
|
|
|
19
19
|
import torch
|
|
20
20
|
from tqdm.auto import tqdm
|
|
21
21
|
|
|
22
|
-
from tico.
|
|
23
|
-
from tico.
|
|
22
|
+
from tico.quantization.algorithm.gptq.gptq import GPTQ
|
|
23
|
+
from tico.quantization.algorithm.gptq.utils import (
|
|
24
24
|
find_layers,
|
|
25
25
|
gather_single_batch_from_dict,
|
|
26
26
|
gather_single_batch_from_list,
|
|
27
27
|
)
|
|
28
|
-
from tico.
|
|
29
|
-
from tico.
|
|
30
|
-
from tico.
|
|
28
|
+
from tico.quantization.config.gptq import GPTQConfig
|
|
29
|
+
from tico.quantization.quantizer import BaseQuantizer
|
|
30
|
+
from tico.quantization.quantizer_registry import register_quantizer
|
|
31
31
|
|
|
32
32
|
|
|
33
33
|
class StopForward(Exception):
|
|
@@ -25,14 +25,12 @@ from torch.ao.quantization.observer import MinMaxObserver, PerChannelMinMaxObser
|
|
|
25
25
|
from torch.ao.quantization.quantizer import QuantizationSpec, Quantizer
|
|
26
26
|
from torch.ao.quantization.quantizer.utils import _get_module_name_filter
|
|
27
27
|
|
|
28
|
-
from tico.
|
|
29
|
-
import tico.
|
|
30
|
-
import tico.
|
|
31
|
-
import tico.
|
|
32
|
-
from tico.
|
|
33
|
-
|
|
34
|
-
)
|
|
35
|
-
from tico.experimental.quantization.algorithm.pt2e.transformation.convert_scalars_to_attrs import (
|
|
28
|
+
from tico.quantization.algorithm.pt2e.annotation.op import *
|
|
29
|
+
import tico.quantization.algorithm.pt2e.annotation.spec as annot_spec
|
|
30
|
+
import tico.quantization.algorithm.pt2e.annotation.utils as annot_utils
|
|
31
|
+
import tico.quantization.algorithm.pt2e.utils as quant_utils
|
|
32
|
+
from tico.quantization.algorithm.pt2e.annotation.config import QuantizationConfig
|
|
33
|
+
from tico.quantization.algorithm.pt2e.transformation.convert_scalars_to_attrs import (
|
|
36
34
|
convert_scalars_to_attrs,
|
|
37
35
|
)
|
|
38
36
|
|
tico/{experimental/quantization → quantization}/algorithm/pt2e/annotation/op/adaptive_avg_pool2d.py
RENAMED
|
@@ -19,12 +19,10 @@ if TYPE_CHECKING:
|
|
|
19
19
|
import torch
|
|
20
20
|
from torch.ao.quantization.quantizer import SharedQuantizationSpec
|
|
21
21
|
|
|
22
|
-
import tico.
|
|
23
|
-
import tico.
|
|
24
|
-
import tico.
|
|
25
|
-
from tico.
|
|
26
|
-
QuantizationConfig,
|
|
27
|
-
)
|
|
22
|
+
import tico.quantization.algorithm.pt2e.annotation.spec as annot_spec
|
|
23
|
+
import tico.quantization.algorithm.pt2e.annotation.utils as annot_utils
|
|
24
|
+
import tico.quantization.algorithm.pt2e.utils as quant_utils
|
|
25
|
+
from tico.quantization.algorithm.pt2e.annotation.config import QuantizationConfig
|
|
28
26
|
from tico.utils.validate_args_kwargs import AdaptiveAvgPool2dArgs
|
|
29
27
|
|
|
30
28
|
|
|
@@ -18,12 +18,10 @@ if TYPE_CHECKING:
|
|
|
18
18
|
import torch.fx
|
|
19
19
|
import torch
|
|
20
20
|
|
|
21
|
-
import tico.
|
|
22
|
-
import tico.
|
|
23
|
-
import tico.
|
|
24
|
-
from tico.
|
|
25
|
-
QuantizationConfig,
|
|
26
|
-
)
|
|
21
|
+
import tico.quantization.algorithm.pt2e.annotation.spec as annot_spec
|
|
22
|
+
import tico.quantization.algorithm.pt2e.annotation.utils as annot_utils
|
|
23
|
+
import tico.quantization.algorithm.pt2e.utils as quant_utils
|
|
24
|
+
from tico.quantization.algorithm.pt2e.annotation.config import QuantizationConfig
|
|
27
25
|
from tico.utils.validate_args_kwargs import AddTensorArgs
|
|
28
26
|
|
|
29
27
|
|
|
@@ -19,12 +19,10 @@ if TYPE_CHECKING:
|
|
|
19
19
|
import torch
|
|
20
20
|
from torch.ao.quantization.quantizer import DerivedQuantizationSpec
|
|
21
21
|
|
|
22
|
-
import tico.
|
|
23
|
-
import tico.
|
|
24
|
-
import tico.
|
|
25
|
-
from tico.
|
|
26
|
-
QuantizationConfig,
|
|
27
|
-
)
|
|
22
|
+
import tico.quantization.algorithm.pt2e.annotation.spec as annot_spec
|
|
23
|
+
import tico.quantization.algorithm.pt2e.annotation.utils as annot_utils
|
|
24
|
+
import tico.quantization.algorithm.pt2e.utils as quant_utils
|
|
25
|
+
from tico.quantization.algorithm.pt2e.annotation.config import QuantizationConfig
|
|
28
26
|
from tico.utils.validate_args_kwargs import Conv2DArgs
|
|
29
27
|
|
|
30
28
|
|
|
@@ -18,12 +18,10 @@ if TYPE_CHECKING:
|
|
|
18
18
|
import torch.fx
|
|
19
19
|
import torch
|
|
20
20
|
|
|
21
|
-
import tico.
|
|
22
|
-
import tico.
|
|
23
|
-
import tico.
|
|
24
|
-
from tico.
|
|
25
|
-
QuantizationConfig,
|
|
26
|
-
)
|
|
21
|
+
import tico.quantization.algorithm.pt2e.annotation.spec as annot_spec
|
|
22
|
+
import tico.quantization.algorithm.pt2e.annotation.utils as annot_utils
|
|
23
|
+
import tico.quantization.algorithm.pt2e.utils as quant_utils
|
|
24
|
+
from tico.quantization.algorithm.pt2e.annotation.config import QuantizationConfig
|
|
27
25
|
from tico.utils.validate_args_kwargs import DivTensorArgs
|
|
28
26
|
|
|
29
27
|
|
|
@@ -19,12 +19,10 @@ if TYPE_CHECKING:
|
|
|
19
19
|
import torch
|
|
20
20
|
from torch.ao.quantization.quantizer import DerivedQuantizationSpec
|
|
21
21
|
|
|
22
|
-
import tico.
|
|
23
|
-
import tico.
|
|
24
|
-
import tico.
|
|
25
|
-
from tico.
|
|
26
|
-
QuantizationConfig,
|
|
27
|
-
)
|
|
22
|
+
import tico.quantization.algorithm.pt2e.annotation.spec as annot_spec
|
|
23
|
+
import tico.quantization.algorithm.pt2e.annotation.utils as annot_utils
|
|
24
|
+
import tico.quantization.algorithm.pt2e.utils as quant_utils
|
|
25
|
+
from tico.quantization.algorithm.pt2e.annotation.config import QuantizationConfig
|
|
28
26
|
from tico.utils.validate_args_kwargs import LinearArgs
|
|
29
27
|
|
|
30
28
|
|
|
@@ -18,12 +18,10 @@ if TYPE_CHECKING:
|
|
|
18
18
|
import torch.fx
|
|
19
19
|
import torch
|
|
20
20
|
|
|
21
|
-
import tico.
|
|
22
|
-
import tico.
|
|
23
|
-
import tico.
|
|
24
|
-
from tico.
|
|
25
|
-
QuantizationConfig,
|
|
26
|
-
)
|
|
21
|
+
import tico.quantization.algorithm.pt2e.annotation.spec as annot_spec
|
|
22
|
+
import tico.quantization.algorithm.pt2e.annotation.utils as annot_utils
|
|
23
|
+
import tico.quantization.algorithm.pt2e.utils as quant_utils
|
|
24
|
+
from tico.quantization.algorithm.pt2e.annotation.config import QuantizationConfig
|
|
27
25
|
from tico.utils.validate_args_kwargs import MeanDimArgs
|
|
28
26
|
|
|
29
27
|
|
|
@@ -18,12 +18,10 @@ if TYPE_CHECKING:
|
|
|
18
18
|
import torch.fx
|
|
19
19
|
import torch
|
|
20
20
|
|
|
21
|
-
import tico.
|
|
22
|
-
import tico.
|
|
23
|
-
import tico.
|
|
24
|
-
from tico.
|
|
25
|
-
QuantizationConfig,
|
|
26
|
-
)
|
|
21
|
+
import tico.quantization.algorithm.pt2e.annotation.spec as annot_spec
|
|
22
|
+
import tico.quantization.algorithm.pt2e.annotation.utils as annot_utils
|
|
23
|
+
import tico.quantization.algorithm.pt2e.utils as quant_utils
|
|
24
|
+
from tico.quantization.algorithm.pt2e.annotation.config import QuantizationConfig
|
|
27
25
|
from tico.utils.validate_args_kwargs import MulTensorArgs
|
|
28
26
|
|
|
29
27
|
|
|
@@ -18,12 +18,10 @@ if TYPE_CHECKING:
|
|
|
18
18
|
import torch.fx
|
|
19
19
|
import torch
|
|
20
20
|
|
|
21
|
-
import tico.
|
|
22
|
-
import tico.
|
|
23
|
-
import tico.
|
|
24
|
-
from tico.
|
|
25
|
-
QuantizationConfig,
|
|
26
|
-
)
|
|
21
|
+
import tico.quantization.algorithm.pt2e.annotation.spec as annot_spec
|
|
22
|
+
import tico.quantization.algorithm.pt2e.annotation.utils as annot_utils
|
|
23
|
+
import tico.quantization.algorithm.pt2e.utils as quant_utils
|
|
24
|
+
from tico.quantization.algorithm.pt2e.annotation.config import QuantizationConfig
|
|
27
25
|
from tico.utils.validate_args_kwargs import Relu6Args
|
|
28
26
|
|
|
29
27
|
|
|
@@ -18,12 +18,10 @@ if TYPE_CHECKING:
|
|
|
18
18
|
import torch.fx
|
|
19
19
|
import torch
|
|
20
20
|
|
|
21
|
-
import tico.
|
|
22
|
-
import tico.
|
|
23
|
-
import tico.
|
|
24
|
-
from tico.
|
|
25
|
-
QuantizationConfig,
|
|
26
|
-
)
|
|
21
|
+
import tico.quantization.algorithm.pt2e.annotation.spec as annot_spec
|
|
22
|
+
import tico.quantization.algorithm.pt2e.annotation.utils as annot_utils
|
|
23
|
+
import tico.quantization.algorithm.pt2e.utils as quant_utils
|
|
24
|
+
from tico.quantization.algorithm.pt2e.annotation.config import QuantizationConfig
|
|
27
25
|
from tico.utils.validate_args_kwargs import RsqrtArgs
|
|
28
26
|
|
|
29
27
|
|
|
@@ -18,12 +18,10 @@ if TYPE_CHECKING:
|
|
|
18
18
|
import torch.fx
|
|
19
19
|
import torch
|
|
20
20
|
|
|
21
|
-
import tico.
|
|
22
|
-
import tico.
|
|
23
|
-
import tico.
|
|
24
|
-
from tico.
|
|
25
|
-
QuantizationConfig,
|
|
26
|
-
)
|
|
21
|
+
import tico.quantization.algorithm.pt2e.annotation.spec as annot_spec
|
|
22
|
+
import tico.quantization.algorithm.pt2e.annotation.utils as annot_utils
|
|
23
|
+
import tico.quantization.algorithm.pt2e.utils as quant_utils
|
|
24
|
+
from tico.quantization.algorithm.pt2e.annotation.config import QuantizationConfig
|
|
27
25
|
from tico.utils.validate_args_kwargs import SubTensorArgs
|
|
28
26
|
|
|
29
27
|
|
|
@@ -18,9 +18,7 @@ if TYPE_CHECKING:
|
|
|
18
18
|
import torch.fx
|
|
19
19
|
import torch
|
|
20
20
|
|
|
21
|
-
from tico.
|
|
22
|
-
QuantizationConfig,
|
|
23
|
-
)
|
|
21
|
+
from tico.quantization.algorithm.pt2e.annotation.config import QuantizationConfig
|
|
24
22
|
|
|
25
23
|
AnnotatorType = Callable[
|
|
26
24
|
[
|
|
@@ -22,7 +22,7 @@ from torch.ao.quantization.quantizer import (
|
|
|
22
22
|
SharedQuantizationSpec,
|
|
23
23
|
)
|
|
24
24
|
|
|
25
|
-
import tico.
|
|
25
|
+
import tico.quantization.algorithm.pt2e.annotation.spec as annot_spec
|
|
26
26
|
|
|
27
27
|
|
|
28
28
|
def annotate_input_qspec_map(node: torch.fx.Node, input_node: torch.fx.Node, qspec):
|
|
@@ -18,13 +18,13 @@ import torch
|
|
|
18
18
|
|
|
19
19
|
from torch.ao.quantization.quantize_pt2e import convert_pt2e, prepare_pt2e
|
|
20
20
|
|
|
21
|
-
from tico.
|
|
21
|
+
from tico.quantization.algorithm.pt2e.annotation.annotator import (
|
|
22
22
|
get_asymmetric_quantization_config,
|
|
23
23
|
PT2EAnnotator,
|
|
24
24
|
)
|
|
25
|
-
from tico.
|
|
26
|
-
from tico.
|
|
27
|
-
from tico.
|
|
25
|
+
from tico.quantization.config.pt2e import PT2EConfig
|
|
26
|
+
from tico.quantization.quantizer import BaseQuantizer
|
|
27
|
+
from tico.quantization.quantizer_registry import register_quantizer
|
|
28
28
|
|
|
29
29
|
|
|
30
30
|
@register_quantizer(PT2EConfig)
|
|
@@ -20,9 +20,7 @@ import torch
|
|
|
20
20
|
from torch.ao.quantization.quantizer import QuantizationSpec
|
|
21
21
|
from torch.ao.quantization.quantizer.utils import _get_module_name_filter
|
|
22
22
|
|
|
23
|
-
from tico.
|
|
24
|
-
QuantizationConfig,
|
|
25
|
-
)
|
|
23
|
+
from tico.quantization.algorithm.pt2e.annotation.config import QuantizationConfig
|
|
26
24
|
|
|
27
25
|
|
|
28
26
|
def get_module_type_filter(tp: Callable):
|
|
@@ -16,16 +16,12 @@ from typing import Any, Dict, Optional
|
|
|
16
16
|
|
|
17
17
|
import torch
|
|
18
18
|
|
|
19
|
-
from tico.
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
from tico.
|
|
24
|
-
|
|
25
|
-
)
|
|
26
|
-
from tico.experimental.quantization.config.smoothquant import SmoothQuantConfig
|
|
27
|
-
from tico.experimental.quantization.quantizer import BaseQuantizer
|
|
28
|
-
from tico.experimental.quantization.quantizer_registry import register_quantizer
|
|
19
|
+
from tico.quantization.algorithm.smoothquant.observer import ChannelwiseMaxActsObserver
|
|
20
|
+
|
|
21
|
+
from tico.quantization.algorithm.smoothquant.smooth_quant import apply_smoothing
|
|
22
|
+
from tico.quantization.config.smoothquant import SmoothQuantConfig
|
|
23
|
+
from tico.quantization.quantizer import BaseQuantizer
|
|
24
|
+
from tico.quantization.quantizer_registry import register_quantizer
|
|
29
25
|
|
|
30
26
|
|
|
31
27
|
@register_quantizer(SmoothQuantConfig)
|
|
@@ -12,7 +12,7 @@
|
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
|
-
from tico.
|
|
15
|
+
from tico.quantization.config.base import BaseConfig
|
|
16
16
|
|
|
17
17
|
|
|
18
18
|
class GPTQConfig(BaseConfig):
|
|
@@ -12,7 +12,7 @@
|
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
|
-
from tico.
|
|
15
|
+
from tico.quantization.config.base import BaseConfig
|
|
16
16
|
|
|
17
17
|
|
|
18
18
|
class PT2EConfig(BaseConfig):
|
|
@@ -15,11 +15,11 @@
|
|
|
15
15
|
from dataclasses import dataclass, field
|
|
16
16
|
from typing import Any, Dict, Mapping, Type
|
|
17
17
|
|
|
18
|
-
from tico.
|
|
19
|
-
from tico.
|
|
20
|
-
from tico.
|
|
21
|
-
from tico.
|
|
22
|
-
from tico.
|
|
18
|
+
from tico.quantization.config.base import BaseConfig
|
|
19
|
+
from tico.quantization.wrapq.dtypes import DType
|
|
20
|
+
from tico.quantization.wrapq.observers.base import ObserverBase
|
|
21
|
+
from tico.quantization.wrapq.observers.minmax import MinMaxObserver
|
|
22
|
+
from tico.quantization.wrapq.qscheme import QScheme
|
|
23
23
|
|
|
24
24
|
|
|
25
25
|
@dataclass
|
|
@@ -55,7 +55,7 @@ class PTQConfig(BaseConfig):
|
|
|
55
55
|
Example
|
|
56
56
|
-------
|
|
57
57
|
```python
|
|
58
|
-
from
|
|
58
|
+
from wrapq.observers import PercentileObserver
|
|
59
59
|
|
|
60
60
|
cfg = PTQConfig(
|
|
61
61
|
default_dtype = DType.uint(8),
|
|
@@ -20,18 +20,12 @@ import torch
|
|
|
20
20
|
from circle_schema import circle
|
|
21
21
|
from torch.utils import _pytree as pytree
|
|
22
22
|
|
|
23
|
-
from tico.
|
|
24
|
-
from tico.
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
from tico.
|
|
28
|
-
|
|
29
|
-
)
|
|
30
|
-
from tico.experimental.quantization.evaluation.executor.triv24_executor import (
|
|
31
|
-
Triv24Executor,
|
|
32
|
-
)
|
|
33
|
-
from tico.experimental.quantization.evaluation.metric import MetricCalculator
|
|
34
|
-
from tico.experimental.quantization.evaluation.utils import (
|
|
23
|
+
from tico.quantization.evaluation.backend import BACKEND
|
|
24
|
+
from tico.quantization.evaluation.executor.backend_executor import BackendExecutor
|
|
25
|
+
from tico.quantization.evaluation.executor.circle_executor import CircleExecutor
|
|
26
|
+
from tico.quantization.evaluation.executor.triv24_executor import Triv24Executor
|
|
27
|
+
from tico.quantization.evaluation.metric import MetricCalculator
|
|
28
|
+
from tico.quantization.evaluation.utils import (
|
|
35
29
|
ensure_list,
|
|
36
30
|
find_invalid_types,
|
|
37
31
|
get_graph_input_output,
|
|
@@ -19,9 +19,7 @@ from typing import List
|
|
|
19
19
|
import numpy as np
|
|
20
20
|
import torch
|
|
21
21
|
|
|
22
|
-
from tico.
|
|
23
|
-
BackendExecutor,
|
|
24
|
-
)
|
|
22
|
+
from tico.quantization.evaluation.executor.backend_executor import BackendExecutor
|
|
25
23
|
from tico.utils.model import CircleModel
|
|
26
24
|
from tico.utils.utils import run_bash_cmd
|
|
27
25
|
|
|
@@ -20,10 +20,8 @@ import numpy as np
|
|
|
20
20
|
import torch
|
|
21
21
|
from circle_schema import circle
|
|
22
22
|
|
|
23
|
-
from tico.
|
|
24
|
-
|
|
25
|
-
)
|
|
26
|
-
from tico.experimental.quantization.evaluation.utils import (
|
|
23
|
+
from tico.quantization.evaluation.executor.backend_executor import BackendExecutor
|
|
24
|
+
from tico.quantization.evaluation.utils import (
|
|
27
25
|
dequantize,
|
|
28
26
|
get_graph_input_output,
|
|
29
27
|
quantize,
|
|
@@ -17,11 +17,11 @@ from typing import Any, Dict, Optional
|
|
|
17
17
|
|
|
18
18
|
import torch
|
|
19
19
|
|
|
20
|
-
from tico.
|
|
21
|
-
from tico.
|
|
22
|
-
from tico.
|
|
23
|
-
from tico.
|
|
24
|
-
from tico.
|
|
20
|
+
from tico.quantization.algorithm.gptq.quantizer import GPTQQuantizer
|
|
21
|
+
from tico.quantization.algorithm.pt2e.quantizer import PT2EQuantizer
|
|
22
|
+
from tico.quantization.config.base import BaseConfig
|
|
23
|
+
from tico.quantization.quantizer import BaseQuantizer
|
|
24
|
+
from tico.quantization.quantizer_registry import get_quantizer
|
|
25
25
|
|
|
26
26
|
|
|
27
27
|
QUANTIZER_ATTRIBUTE_NAME = "tico_quantizer"
|
|
@@ -15,8 +15,8 @@
|
|
|
15
15
|
import importlib
|
|
16
16
|
from typing import Dict, Optional, Type, TypeVar
|
|
17
17
|
|
|
18
|
-
from tico.
|
|
19
|
-
from tico.
|
|
18
|
+
from tico.quantization.config.base import BaseConfig
|
|
19
|
+
from tico.quantization.quantizer import BaseQuantizer
|
|
20
20
|
|
|
21
21
|
TQ = TypeVar("TQ", bound=BaseQuantizer)
|
|
22
22
|
|
|
@@ -54,12 +54,10 @@ def get_quantizer(cfg: BaseConfig) -> BaseQuantizer:
|
|
|
54
54
|
name = getattr(cfg, "name", None)
|
|
55
55
|
if name:
|
|
56
56
|
if name == "ptq":
|
|
57
|
-
importlib.import_module(f"tico.
|
|
57
|
+
importlib.import_module(f"tico.quantization.wrapq.quantizer")
|
|
58
58
|
else:
|
|
59
59
|
try:
|
|
60
|
-
importlib.import_module(
|
|
61
|
-
f"tico.experimental.quantization.algorithm.{name}.quantizer"
|
|
62
|
-
)
|
|
60
|
+
importlib.import_module(f"tico.quantization.algorithm.{name}.quantizer")
|
|
63
61
|
except Exception as e:
|
|
64
62
|
raise RuntimeError(
|
|
65
63
|
f"Failed to import quantizer module for config name='{name}': {e}"
|
|
@@ -28,9 +28,9 @@ import tqdm
|
|
|
28
28
|
from datasets import load_dataset
|
|
29
29
|
from transformers import AutoModelForCausalLM, AutoTokenizer
|
|
30
30
|
|
|
31
|
-
from tico.
|
|
32
|
-
from tico.
|
|
33
|
-
from tico.
|
|
31
|
+
from tico.quantization import convert, prepare
|
|
32
|
+
from tico.quantization.config.ptq import PTQConfig
|
|
33
|
+
from tico.quantization.wrapq.utils.metrics import perplexity
|
|
34
34
|
|
|
35
35
|
# Token-budget presets for activation calibration
|
|
36
36
|
TOKENS: dict[str, int] = {
|
|
@@ -38,14 +38,14 @@ import tqdm
|
|
|
38
38
|
from datasets import load_dataset
|
|
39
39
|
from transformers import AutoModelForCausalLM, AutoTokenizer
|
|
40
40
|
|
|
41
|
-
from tico.
|
|
42
|
-
from tico.
|
|
43
|
-
from tico.
|
|
41
|
+
from tico.quantization import convert, prepare
|
|
42
|
+
from tico.quantization.config.ptq import PTQConfig
|
|
43
|
+
from tico.quantization.wrapq.utils.introspection import (
|
|
44
44
|
build_fqn_map,
|
|
45
45
|
compare_layer_outputs,
|
|
46
46
|
save_fp_outputs,
|
|
47
47
|
)
|
|
48
|
-
from tico.
|
|
48
|
+
from tico.quantization.wrapq.wrappers.ptq_wrapper import PTQWrapper
|
|
49
49
|
|
|
50
50
|
# Token-budget presets for activation calibration
|
|
51
51
|
TOKENS: dict[str, int] = {
|
|
@@ -29,12 +29,12 @@ import pathlib
|
|
|
29
29
|
import torch
|
|
30
30
|
import torch.nn as nn
|
|
31
31
|
|
|
32
|
-
from tico.
|
|
33
|
-
from tico.
|
|
34
|
-
from tico.
|
|
35
|
-
from tico.
|
|
36
|
-
from tico.
|
|
37
|
-
from tico.
|
|
32
|
+
from tico.quantization import convert, prepare
|
|
33
|
+
from tico.quantization.config.ptq import PTQConfig
|
|
34
|
+
from tico.quantization.evaluation.metric import compute_peir
|
|
35
|
+
from tico.quantization.evaluation.utils import plot_two_outputs
|
|
36
|
+
from tico.quantization.wrapq.mode import Mode
|
|
37
|
+
from tico.quantization.wrapq.wrappers.nn.quant_linear import QuantLinear
|
|
38
38
|
from tico.utils.utils import SuppressWarning
|
|
39
39
|
|
|
40
40
|
|
|
@@ -17,14 +17,12 @@ import pathlib
|
|
|
17
17
|
import torch
|
|
18
18
|
from transformers import AutoModelForCausalLM, AutoTokenizer
|
|
19
19
|
|
|
20
|
-
from tico.
|
|
21
|
-
from tico.
|
|
22
|
-
from tico.
|
|
23
|
-
from tico.
|
|
24
|
-
from tico.
|
|
25
|
-
from tico.
|
|
26
|
-
QuantLlamaAttention,
|
|
27
|
-
)
|
|
20
|
+
from tico.quantization import convert, prepare
|
|
21
|
+
from tico.quantization.config.ptq import PTQConfig
|
|
22
|
+
from tico.quantization.evaluation.metric import compute_peir
|
|
23
|
+
from tico.quantization.evaluation.utils import plot_two_outputs
|
|
24
|
+
from tico.quantization.wrapq.mode import Mode
|
|
25
|
+
from tico.quantization.wrapq.wrappers.llama.quant_attn import QuantLlamaAttention
|
|
28
26
|
from tico.utils.utils import SuppressWarning
|
|
29
27
|
|
|
30
28
|
name = "Maykeye/TinyLLama-v0"
|
tico/{experimental/quantization/ptq → quantization/wrapq}/examples/quantize_llama_decoder_layer.py
RENAMED
|
@@ -31,12 +31,12 @@ import pathlib
|
|
|
31
31
|
import torch
|
|
32
32
|
from transformers import AutoModelForCausalLM, AutoTokenizer
|
|
33
33
|
|
|
34
|
-
from tico.
|
|
35
|
-
from tico.
|
|
36
|
-
from tico.
|
|
37
|
-
from tico.
|
|
38
|
-
from tico.
|
|
39
|
-
from tico.
|
|
34
|
+
from tico.quantization import convert, prepare
|
|
35
|
+
from tico.quantization.config.ptq import PTQConfig
|
|
36
|
+
from tico.quantization.evaluation.metric import compute_peir
|
|
37
|
+
from tico.quantization.evaluation.utils import plot_two_outputs
|
|
38
|
+
from tico.quantization.wrapq.mode import Mode
|
|
39
|
+
from tico.quantization.wrapq.wrappers.llama.quant_decoder_layer import (
|
|
40
40
|
QuantLlamaDecoderLayer,
|
|
41
41
|
)
|
|
42
42
|
from tico.utils.utils import SuppressWarning
|
|
@@ -18,14 +18,14 @@ import torch
|
|
|
18
18
|
from transformers import AutoModelForCausalLM, AutoTokenizer
|
|
19
19
|
|
|
20
20
|
import tico
|
|
21
|
-
from tico.
|
|
22
|
-
from tico.
|
|
23
|
-
from tico.
|
|
24
|
-
from tico.
|
|
25
|
-
from tico.
|
|
26
|
-
from tico.
|
|
27
|
-
from tico.
|
|
28
|
-
from tico.
|
|
21
|
+
from tico.quantization import convert, prepare
|
|
22
|
+
from tico.quantization.config.ptq import PTQConfig
|
|
23
|
+
from tico.quantization.evaluation.metric import compute_peir
|
|
24
|
+
from tico.quantization.evaluation.utils import plot_two_outputs
|
|
25
|
+
from tico.quantization.wrapq.dtypes import INT16
|
|
26
|
+
from tico.quantization.wrapq.mode import Mode
|
|
27
|
+
from tico.quantization.wrapq.qscheme import QScheme
|
|
28
|
+
from tico.quantization.wrapq.wrappers.llama.quant_mlp import QuantLlamaMLP
|
|
29
29
|
from tico.utils.utils import SuppressWarning
|
|
30
30
|
|
|
31
31
|
name = "Maykeye/TinyLLama-v0"
|