transformers 5.0.0rc0__py3-none-any.whl → 5.0.0rc1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- transformers/__init__.py +30 -3
- transformers/cli/serve.py +47 -17
- transformers/conversion_mapping.py +15 -2
- transformers/convert_slow_tokenizer.py +225 -10
- transformers/core_model_loading.py +196 -135
- transformers/data/data_collator.py +12 -4
- transformers/dependency_versions_table.py +1 -2
- transformers/dynamic_module_utils.py +1 -2
- transformers/feature_extraction_utils.py +1 -2
- transformers/file_utils.py +0 -1
- transformers/generation/__init__.py +11 -1
- transformers/generation/configuration_utils.py +3 -2
- transformers/generation/continuous_batching/__init__.py +4 -0
- transformers/generation/continuous_batching/continuous_api.py +134 -79
- transformers/image_processing_base.py +1 -2
- transformers/integrations/__init__.py +4 -2
- transformers/integrations/accelerate.py +15 -3
- transformers/integrations/aqlm.py +38 -66
- transformers/integrations/awq.py +48 -514
- transformers/integrations/bitnet.py +45 -100
- transformers/integrations/bitsandbytes.py +79 -191
- transformers/integrations/deepspeed.py +1 -0
- transformers/integrations/eetq.py +84 -79
- transformers/integrations/fbgemm_fp8.py +191 -145
- transformers/integrations/finegrained_fp8.py +236 -193
- transformers/integrations/fp_quant.py +92 -0
- transformers/integrations/ggml.py +11 -1
- transformers/integrations/higgs.py +40 -62
- transformers/integrations/hub_kernels.py +42 -3
- transformers/integrations/integration_utils.py +10 -0
- transformers/integrations/mxfp4.py +25 -65
- transformers/integrations/peft.py +7 -29
- transformers/integrations/quanto.py +73 -55
- transformers/integrations/quark.py +55 -0
- transformers/integrations/spqr.py +44 -90
- transformers/integrations/torchao.py +32 -38
- transformers/integrations/vptq.py +42 -59
- transformers/modelcard.py +1 -2
- transformers/modeling_gguf_pytorch_utils.py +8 -0
- transformers/modeling_rope_utils.py +30 -6
- transformers/modeling_utils.py +116 -112
- transformers/models/__init__.py +3 -0
- transformers/models/afmoe/modeling_afmoe.py +4 -4
- transformers/models/albert/tokenization_albert.py +6 -12
- transformers/models/align/modeling_align.py +2 -0
- transformers/models/altclip/modeling_altclip.py +4 -0
- transformers/models/apertus/modeling_apertus.py +4 -4
- transformers/models/arcee/modeling_arcee.py +4 -4
- transformers/models/aria/modeling_aria.py +4 -4
- transformers/models/audioflamingo3/modeling_audioflamingo3.py +1 -0
- transformers/models/audioflamingo3/modular_audioflamingo3.py +1 -0
- transformers/models/auto/configuration_auto.py +11 -0
- transformers/models/auto/feature_extraction_auto.py +2 -0
- transformers/models/auto/image_processing_auto.py +1 -0
- transformers/models/auto/modeling_auto.py +6 -0
- transformers/models/auto/processing_auto.py +18 -10
- transformers/models/auto/tokenization_auto.py +74 -472
- transformers/models/autoformer/modeling_autoformer.py +4 -0
- transformers/models/bamba/modeling_bamba.py +4 -3
- transformers/models/bark/modeling_bark.py +2 -0
- transformers/models/bart/modeling_bart.py +7 -0
- transformers/models/barthez/tokenization_barthez.py +5 -10
- transformers/models/beit/modeling_beit.py +6 -1
- transformers/models/bert/tokenization_bert.py +8 -21
- transformers/models/big_bird/modeling_big_bird.py +6 -0
- transformers/models/big_bird/tokenization_big_bird.py +18 -42
- transformers/models/bigbird_pegasus/modeling_bigbird_pegasus.py +8 -2
- transformers/models/biogpt/modeling_biogpt.py +2 -0
- transformers/models/biogpt/modular_biogpt.py +2 -0
- transformers/models/bit/modeling_bit.py +11 -2
- transformers/models/bitnet/modeling_bitnet.py +4 -4
- transformers/models/blenderbot/modeling_blenderbot.py +5 -0
- transformers/models/blenderbot/tokenization_blenderbot.py +12 -16
- transformers/models/blenderbot_small/modeling_blenderbot_small.py +5 -0
- transformers/models/blip/modeling_blip_text.py +2 -0
- transformers/models/blip_2/modeling_blip_2.py +2 -1
- transformers/models/bloom/modeling_bloom.py +4 -0
- transformers/models/blt/modeling_blt.py +2 -2
- transformers/models/blt/modular_blt.py +2 -2
- transformers/models/bridgetower/modeling_bridgetower.py +5 -1
- transformers/models/bros/modeling_bros.py +4 -0
- transformers/models/camembert/tokenization_camembert.py +8 -12
- transformers/models/canine/modeling_canine.py +5 -0
- transformers/models/chameleon/modeling_chameleon.py +2 -1
- transformers/models/chinese_clip/modeling_chinese_clip.py +3 -0
- transformers/models/clap/modeling_clap.py +5 -0
- transformers/models/clip/tokenization_clip.py +22 -44
- transformers/models/clipseg/modeling_clipseg.py +5 -0
- transformers/models/clvp/modeling_clvp.py +5 -0
- transformers/models/clvp/tokenization_clvp.py +1 -63
- transformers/models/code_llama/tokenization_code_llama.py +20 -43
- transformers/models/codegen/tokenization_codegen.py +14 -43
- transformers/models/cohere/modeling_cohere.py +4 -3
- transformers/models/cohere/modular_cohere.py +2 -1
- transformers/models/cohere/tokenization_cohere.py +12 -42
- transformers/models/cohere2/modeling_cohere2.py +7 -6
- transformers/models/cohere2/modular_cohere2.py +5 -5
- transformers/models/cohere2_vision/image_processing_cohere2_vision_fast.py +4 -3
- transformers/models/cohere2_vision/modular_cohere2_vision.py +4 -3
- transformers/models/colqwen2/modeling_colqwen2.py +1 -0
- transformers/models/colqwen2/modular_colqwen2.py +1 -0
- transformers/models/conditional_detr/modeling_conditional_detr.py +5 -0
- transformers/models/convbert/modeling_convbert.py +6 -0
- transformers/models/convnext/modeling_convnext.py +2 -4
- transformers/models/convnextv2/modeling_convnextv2.py +2 -4
- transformers/models/csm/modeling_csm.py +4 -3
- transformers/models/ctrl/modeling_ctrl.py +1 -0
- transformers/models/cvt/modeling_cvt.py +2 -0
- transformers/models/cwm/modeling_cwm.py +4 -4
- transformers/models/d_fine/modeling_d_fine.py +2 -0
- transformers/models/d_fine/modular_d_fine.py +1 -0
- transformers/models/dab_detr/modeling_dab_detr.py +4 -0
- transformers/models/dac/modeling_dac.py +2 -2
- transformers/models/data2vec/modeling_data2vec_audio.py +5 -0
- transformers/models/data2vec/modeling_data2vec_vision.py +4 -1
- transformers/models/dbrx/modeling_dbrx.py +2 -2
- transformers/models/deberta/modeling_deberta.py +5 -0
- transformers/models/deberta/tokenization_deberta.py +11 -20
- transformers/models/deberta_v2/modeling_deberta_v2.py +6 -0
- transformers/models/deberta_v2/tokenization_deberta_v2.py +13 -28
- transformers/models/decision_transformer/modeling_decision_transformer.py +4 -1
- transformers/models/deepseek_v2/modeling_deepseek_v2.py +2 -3
- transformers/models/deepseek_v2/modular_deepseek_v2.py +2 -2
- transformers/models/deepseek_v3/modeling_deepseek_v3.py +3 -2
- transformers/models/deepseek_v3/modular_deepseek_v3.py +1 -0
- transformers/models/deformable_detr/modeling_deformable_detr.py +4 -0
- transformers/models/depth_anything/modeling_depth_anything.py +1 -0
- transformers/models/depth_pro/modeling_depth_pro.py +2 -0
- transformers/models/detr/modeling_detr.py +5 -0
- transformers/models/dia/modeling_dia.py +4 -3
- transformers/models/dia/modular_dia.py +0 -1
- transformers/models/diffllama/modeling_diffllama.py +2 -2
- transformers/models/dinat/modeling_dinat.py +3 -0
- transformers/models/dinov3_convnext/modeling_dinov3_convnext.py +1 -1
- transformers/models/dinov3_vit/modeling_dinov3_vit.py +2 -2
- transformers/models/dinov3_vit/modular_dinov3_vit.py +2 -2
- transformers/models/distilbert/tokenization_distilbert.py +13 -0
- transformers/models/doge/modeling_doge.py +2 -3
- transformers/models/doge/modular_doge.py +0 -1
- transformers/models/donut/modeling_donut_swin.py +2 -0
- transformers/models/dots1/modeling_dots1.py +10 -7
- transformers/models/dots1/modular_dots1.py +5 -3
- transformers/models/dpr/modeling_dpr.py +5 -0
- transformers/models/dpr/tokenization_dpr.py +12 -0
- transformers/models/edgetam/modeling_edgetam.py +1 -1
- transformers/models/edgetam_video/modeling_edgetam_video.py +1 -0
- transformers/models/edgetam_video/modular_edgetam_video.py +1 -0
- transformers/models/efficientloftr/modeling_efficientloftr.py +2 -2
- transformers/models/efficientnet/modeling_efficientnet.py +2 -0
- transformers/models/emu3/modeling_emu3.py +4 -4
- transformers/models/eomt/image_processing_eomt.py +13 -1
- transformers/models/eomt/image_processing_eomt_fast.py +14 -2
- transformers/models/ernie4_5/modeling_ernie4_5.py +4 -4
- transformers/models/ernie4_5/modular_ernie4_5.py +2 -1
- transformers/models/ernie4_5_moe/modeling_ernie4_5_moe.py +5 -5
- transformers/models/ernie4_5_moe/modular_ernie4_5_moe.py +2 -2
- transformers/models/esm/modeling_esmfold.py +5 -4
- transformers/models/evolla/modeling_evolla.py +4 -4
- transformers/models/exaone4/modeling_exaone4.py +2 -2
- transformers/models/exaone4/modular_exaone4.py +0 -1
- transformers/models/falcon/modeling_falcon.py +6 -1
- transformers/models/falcon_h1/modeling_falcon_h1.py +4 -3
- transformers/models/falcon_mamba/modeling_falcon_mamba.py +25 -35
- transformers/models/falcon_mamba/modular_falcon_mamba.py +12 -31
- transformers/{kernels/falcon_mamba → models/fast_vlm}/__init__.py +15 -3
- transformers/models/fast_vlm/configuration_fast_vlm.py +137 -0
- transformers/models/fast_vlm/modeling_fast_vlm.py +455 -0
- transformers/models/fast_vlm/modular_fast_vlm.py +273 -0
- transformers/models/fastspeech2_conformer/modeling_fastspeech2_conformer.py +8 -3
- transformers/models/flaubert/modeling_flaubert.py +7 -0
- transformers/models/flava/modeling_flava.py +6 -1
- transformers/models/flex_olmo/modeling_flex_olmo.py +4 -5
- transformers/models/florence2/modeling_florence2.py +2 -1
- transformers/models/florence2/modular_florence2.py +2 -1
- transformers/models/fnet/modeling_fnet.py +7 -0
- transformers/models/focalnet/modeling_focalnet.py +4 -0
- transformers/models/fsmt/modeling_fsmt.py +2 -0
- transformers/models/funnel/modeling_funnel.py +8 -0
- transformers/models/funnel/tokenization_funnel.py +17 -24
- transformers/models/fuyu/processing_fuyu.py +3 -3
- transformers/models/gemma/modeling_gemma.py +4 -4
- transformers/models/gemma/tokenization_gemma.py +10 -27
- transformers/models/gemma2/modeling_gemma2.py +4 -4
- transformers/models/gemma2/modular_gemma2.py +2 -1
- transformers/models/gemma3/modeling_gemma3.py +14 -84
- transformers/models/gemma3/modular_gemma3.py +12 -81
- transformers/models/gemma3n/modeling_gemma3n.py +18 -209
- transformers/models/gemma3n/modular_gemma3n.py +17 -59
- transformers/models/git/modeling_git.py +2 -0
- transformers/models/glm/modeling_glm.py +4 -4
- transformers/models/glm4/modeling_glm4.py +4 -4
- transformers/models/glm4_moe/modeling_glm4_moe.py +5 -3
- transformers/models/glm4v/configuration_glm4v.py +3 -1
- transformers/models/glm4v/modeling_glm4v.py +3 -3
- transformers/models/glm4v/modular_glm4v.py +6 -4
- transformers/models/glm4v_moe/configuration_glm4v_moe.py +3 -1
- transformers/models/glm4v_moe/modeling_glm4v_moe.py +6 -5
- transformers/models/glm4v_moe/modular_glm4v_moe.py +1 -1
- transformers/models/glpn/modeling_glpn.py +2 -0
- transformers/models/gpt2/modeling_gpt2.py +5 -1
- transformers/models/gpt2/tokenization_gpt2.py +16 -44
- transformers/models/gpt_bigcode/modeling_gpt_bigcode.py +1 -0
- transformers/models/gpt_neo/modeling_gpt_neo.py +4 -0
- transformers/models/gpt_neox/modeling_gpt_neox.py +5 -2
- transformers/models/gpt_neox/modular_gpt_neox.py +3 -0
- transformers/models/gpt_neox/tokenization_gpt_neox.py +10 -49
- transformers/models/gpt_neox_japanese/modeling_gpt_neox_japanese.py +3 -1
- transformers/models/gpt_oss/modeling_gpt_oss.py +5 -6
- transformers/models/gpt_oss/modular_gpt_oss.py +3 -5
- transformers/models/gptj/modeling_gptj.py +3 -0
- transformers/models/granite/modeling_granite.py +4 -4
- transformers/models/granitemoe/modeling_granitemoe.py +4 -6
- transformers/models/granitemoe/modular_granitemoe.py +0 -2
- transformers/models/granitemoehybrid/modeling_granitemoehybrid.py +4 -6
- transformers/models/granitemoeshared/modeling_granitemoeshared.py +4 -6
- transformers/models/grounding_dino/modeling_grounding_dino.py +4 -0
- transformers/models/groupvit/modeling_groupvit.py +3 -0
- transformers/models/helium/modeling_helium.py +4 -3
- transformers/models/herbert/tokenization_herbert.py +9 -25
- transformers/models/hgnet_v2/modeling_hgnet_v2.py +6 -1
- transformers/models/hgnet_v2/modular_hgnet_v2.py +6 -1
- transformers/models/hiera/modeling_hiera.py +4 -0
- transformers/models/hubert/modeling_hubert.py +3 -0
- transformers/models/hubert/modular_hubert.py +1 -0
- transformers/models/hunyuan_v1_dense/modeling_hunyuan_v1_dense.py +4 -4
- transformers/models/hunyuan_v1_moe/modeling_hunyuan_v1_moe.py +4 -4
- transformers/models/ibert/modeling_ibert.py +6 -0
- transformers/models/idefics/modeling_idefics.py +5 -21
- transformers/models/imagegpt/modeling_imagegpt.py +2 -1
- transformers/models/informer/modeling_informer.py +4 -0
- transformers/models/informer/modular_informer.py +1 -0
- transformers/models/internvl/modeling_internvl.py +2 -4
- transformers/models/internvl/modular_internvl.py +2 -4
- transformers/models/jamba/modeling_jamba.py +2 -2
- transformers/models/janus/modeling_janus.py +1 -0
- transformers/models/janus/modular_janus.py +1 -0
- transformers/models/jetmoe/modeling_jetmoe.py +2 -2
- transformers/models/kosmos2/modeling_kosmos2.py +1 -0
- transformers/models/kyutai_speech_to_text/modeling_kyutai_speech_to_text.py +3 -1
- transformers/models/lasr/__init__.py +29 -0
- transformers/models/lasr/configuration_lasr.py +244 -0
- transformers/models/lasr/feature_extraction_lasr.py +277 -0
- transformers/models/lasr/modeling_lasr.py +729 -0
- transformers/models/lasr/modular_lasr.py +569 -0
- transformers/models/lasr/processing_lasr.py +96 -0
- transformers/models/lasr/tokenization_lasr.py +186 -0
- transformers/models/layoutlm/modeling_layoutlm.py +5 -0
- transformers/models/layoutlmv2/modeling_layoutlmv2.py +4 -0
- transformers/models/layoutlmv2/tokenization_layoutlmv2.py +10 -53
- transformers/models/layoutlmv3/modeling_layoutlmv3.py +4 -0
- transformers/models/layoutlmv3/tokenization_layoutlmv3.py +12 -61
- transformers/models/layoutxlm/tokenization_layoutxlm.py +13 -38
- transformers/models/led/modeling_led.py +6 -0
- transformers/models/levit/modeling_levit.py +3 -0
- transformers/models/lfm2/modeling_lfm2.py +4 -5
- transformers/models/lfm2/modular_lfm2.py +0 -1
- transformers/models/lfm2_moe/modeling_lfm2_moe.py +4 -5
- transformers/models/lightglue/modeling_lightglue.py +3 -1
- transformers/models/lightglue/modular_lightglue.py +1 -0
- transformers/models/lilt/modeling_lilt.py +4 -0
- transformers/models/llama/modeling_llama.py +4 -4
- transformers/models/llama/tokenization_llama.py +15 -43
- transformers/models/llama4/modeling_llama4.py +3 -2
- transformers/models/longcat_flash/modeling_longcat_flash.py +4 -4
- transformers/models/longcat_flash/modular_longcat_flash.py +2 -2
- transformers/models/longformer/modeling_longformer.py +6 -0
- transformers/models/longt5/modeling_longt5.py +4 -0
- transformers/models/luke/modeling_luke.py +9 -0
- transformers/models/luke/tokenization_luke.py +11 -38
- transformers/models/lxmert/modeling_lxmert.py +2 -0
- transformers/models/m2m_100/modeling_m2m_100.py +4 -0
- transformers/models/mamba/modeling_mamba.py +14 -22
- transformers/models/marian/modeling_marian.py +5 -0
- transformers/models/markuplm/modeling_markuplm.py +4 -0
- transformers/models/markuplm/tokenization_markuplm.py +28 -61
- transformers/models/mask2former/modeling_mask2former.py +2 -0
- transformers/models/maskformer/modeling_maskformer.py +2 -0
- transformers/models/maskformer/modeling_maskformer_swin.py +2 -0
- transformers/models/mbart/modeling_mbart.py +7 -0
- transformers/models/mbart/tokenization_mbart.py +11 -52
- transformers/models/mbart50/tokenization_mbart50.py +7 -10
- transformers/models/megatron_bert/modeling_megatron_bert.py +7 -0
- transformers/models/mgp_str/modeling_mgp_str.py +2 -0
- transformers/models/mimi/modeling_mimi.py +3 -1
- transformers/models/minimax/modeling_minimax.py +4 -4
- transformers/models/ministral/modeling_ministral.py +4 -4
- transformers/models/ministral3/configuration_ministral3.py +1 -1
- transformers/models/ministral3/modeling_ministral3.py +4 -3
- transformers/models/mistral/modeling_mistral.py +4 -3
- transformers/models/mixtral/modeling_mixtral.py +4 -4
- transformers/models/mllama/modeling_mllama.py +2 -2
- transformers/models/mluke/tokenization_mluke.py +6 -6
- transformers/models/mm_grounding_dino/modeling_mm_grounding_dino.py +4 -0
- transformers/models/mobilenet_v1/modeling_mobilenet_v1.py +2 -0
- transformers/models/mobilenet_v2/modeling_mobilenet_v2.py +3 -0
- transformers/models/mobilevit/modeling_mobilevit.py +3 -0
- transformers/models/mobilevitv2/modeling_mobilevitv2.py +3 -0
- transformers/models/modernbert/modeling_modernbert.py +4 -1
- transformers/models/modernbert/modular_modernbert.py +2 -0
- transformers/models/modernbert_decoder/modeling_modernbert_decoder.py +8 -9
- transformers/models/modernbert_decoder/modular_modernbert_decoder.py +6 -7
- transformers/models/moonshine/modeling_moonshine.py +4 -2
- transformers/models/moshi/modeling_moshi.py +5 -2
- transformers/models/mpnet/modeling_mpnet.py +5 -0
- transformers/models/mpnet/tokenization_mpnet.py +5 -13
- transformers/models/mpt/modeling_mpt.py +2 -0
- transformers/models/mra/modeling_mra.py +6 -0
- transformers/models/mt5/modeling_mt5.py +7 -0
- transformers/models/musicgen/modeling_musicgen.py +2 -0
- transformers/models/musicgen_melody/modeling_musicgen_melody.py +3 -0
- transformers/models/mvp/modeling_mvp.py +7 -0
- transformers/models/nanochat/modeling_nanochat.py +4 -4
- transformers/models/nemotron/modeling_nemotron.py +4 -2
- transformers/models/nllb/tokenization_nllb.py +8 -22
- transformers/models/nougat/tokenization_nougat.py +11 -59
- transformers/models/nystromformer/modeling_nystromformer.py +6 -0
- transformers/models/olmo/modeling_olmo.py +4 -4
- transformers/models/olmo/modular_olmo.py +2 -2
- transformers/models/olmo2/modeling_olmo2.py +4 -5
- transformers/models/olmo2/modular_olmo2.py +0 -1
- transformers/models/olmo3/modeling_olmo3.py +4 -4
- transformers/models/olmoe/modeling_olmoe.py +4 -4
- transformers/models/omdet_turbo/modeling_omdet_turbo.py +2 -0
- transformers/models/oneformer/modeling_oneformer.py +4 -1
- transformers/models/openai/modeling_openai.py +3 -0
- transformers/models/openai/tokenization_openai.py +10 -46
- transformers/models/opt/modeling_opt.py +2 -0
- transformers/models/owlv2/modeling_owlv2.py +4 -0
- transformers/models/owlvit/modeling_owlvit.py +4 -0
- transformers/models/paddleocr_vl/__init__.py +32 -0
- transformers/models/paddleocr_vl/configuration_paddleocr_vl.py +336 -0
- transformers/models/paddleocr_vl/image_processing_paddleocr_vl.py +503 -0
- transformers/models/paddleocr_vl/image_processing_paddleocr_vl_fast.py +209 -0
- transformers/models/paddleocr_vl/modeling_paddleocr_vl.py +1668 -0
- transformers/models/paddleocr_vl/modular_paddleocr_vl.py +1349 -0
- transformers/models/paddleocr_vl/processing_paddleocr_vl.py +135 -0
- transformers/models/parakeet/configuration_parakeet.py +4 -6
- transformers/models/parakeet/modeling_parakeet.py +9 -6
- transformers/models/parakeet/modular_parakeet.py +2 -2
- transformers/models/parakeet/processing_parakeet.py +1 -0
- transformers/models/patchtsmixer/modeling_patchtsmixer.py +6 -0
- transformers/models/patchtst/modeling_patchtst.py +20 -2
- transformers/models/pegasus/modeling_pegasus.py +5 -0
- transformers/models/pegasus/tokenization_pegasus.py +17 -44
- transformers/models/pegasus_x/modeling_pegasus_x.py +4 -0
- transformers/models/perceiver/modeling_perceiver.py +8 -0
- transformers/models/persimmon/modeling_persimmon.py +2 -1
- transformers/models/phi/modeling_phi.py +4 -5
- transformers/models/phi/modular_phi.py +0 -1
- transformers/models/phi3/modeling_phi3.py +2 -1
- transformers/models/phi4_multimodal/modeling_phi4_multimodal.py +5 -5
- transformers/models/phi4_multimodal/modular_phi4_multimodal.py +4 -4
- transformers/models/phimoe/modeling_phimoe.py +4 -4
- transformers/models/phimoe/modular_phimoe.py +2 -2
- transformers/models/pix2struct/modeling_pix2struct.py +2 -0
- transformers/models/pixtral/modeling_pixtral.py +2 -1
- transformers/models/plbart/modeling_plbart.py +6 -0
- transformers/models/plbart/modular_plbart.py +2 -0
- transformers/models/plbart/tokenization_plbart.py +0 -2
- transformers/models/poolformer/modeling_poolformer.py +2 -0
- transformers/models/pop2piano/modeling_pop2piano.py +2 -0
- transformers/models/prompt_depth_anything/modeling_prompt_depth_anything.py +1 -0
- transformers/models/prompt_depth_anything/modular_prompt_depth_anything.py +1 -0
- transformers/models/prophetnet/modeling_prophetnet.py +3 -0
- transformers/models/pvt/modeling_pvt.py +2 -0
- transformers/models/pvt_v2/modeling_pvt_v2.py +3 -0
- transformers/models/qwen2/modeling_qwen2.py +4 -4
- transformers/models/qwen2/tokenization_qwen2.py +14 -18
- transformers/models/qwen2_5_omni/configuration_qwen2_5_omni.py +4 -2
- transformers/models/qwen2_5_omni/modeling_qwen2_5_omni.py +13 -16
- transformers/models/qwen2_5_omni/modular_qwen2_5_omni.py +14 -16
- transformers/models/qwen2_5_vl/configuration_qwen2_5_vl.py +1 -1
- transformers/models/qwen2_5_vl/modeling_qwen2_5_vl.py +5 -6
- transformers/models/qwen2_5_vl/modular_qwen2_5_vl.py +3 -5
- transformers/models/qwen2_audio/modeling_qwen2_audio.py +2 -0
- transformers/models/qwen2_moe/modeling_qwen2_moe.py +4 -4
- transformers/models/qwen2_vl/configuration_qwen2_vl.py +1 -1
- transformers/models/qwen2_vl/modeling_qwen2_vl.py +6 -16
- transformers/models/qwen3/modeling_qwen3.py +4 -4
- transformers/models/qwen3_moe/modeling_qwen3_moe.py +4 -4
- transformers/models/qwen3_next/modeling_qwen3_next.py +4 -3
- transformers/models/qwen3_omni_moe/modeling_qwen3_omni_moe.py +21 -23
- transformers/models/qwen3_omni_moe/modular_qwen3_omni_moe.py +14 -16
- transformers/models/qwen3_vl/modeling_qwen3_vl.py +39 -37
- transformers/models/qwen3_vl/modular_qwen3_vl.py +37 -35
- transformers/models/qwen3_vl_moe/modeling_qwen3_vl_moe.py +39 -37
- transformers/models/qwen3_vl_moe/modular_qwen3_vl_moe.py +4 -1
- transformers/models/rag/modeling_rag.py +1 -0
- transformers/models/recurrent_gemma/modeling_recurrent_gemma.py +15 -1
- transformers/models/reformer/modeling_reformer.py +4 -0
- transformers/models/reformer/tokenization_reformer.py +11 -28
- transformers/models/regnet/modeling_regnet.py +6 -1
- transformers/models/rembert/modeling_rembert.py +6 -0
- transformers/models/rembert/tokenization_rembert.py +3 -10
- transformers/models/resnet/modeling_resnet.py +11 -2
- transformers/models/roberta/tokenization_roberta.py +18 -27
- transformers/models/roformer/modeling_roformer.py +6 -0
- transformers/models/roformer/tokenization_roformer.py +77 -412
- transformers/models/rt_detr/modeling_rt_detr.py +2 -0
- transformers/models/rt_detr/modeling_rt_detr_resnet.py +5 -1
- transformers/models/rt_detr_v2/modeling_rt_detr_v2.py +2 -0
- transformers/models/rwkv/modeling_rwkv.py +1 -0
- transformers/models/sam2/modeling_sam2.py +2 -2
- transformers/models/sam2/modular_sam2.py +2 -2
- transformers/models/sam2_video/modeling_sam2_video.py +1 -0
- transformers/models/sam2_video/modular_sam2_video.py +1 -0
- transformers/models/sam3/modeling_sam3.py +77 -80
- transformers/models/sam3_tracker/modeling_sam3_tracker.py +6 -1
- transformers/models/sam3_tracker/modular_sam3_tracker.py +6 -1
- transformers/models/sam3_tracker_video/modeling_sam3_tracker_video.py +1 -0
- transformers/models/sam3_video/modeling_sam3_video.py +1 -0
- transformers/models/seamless_m4t/modeling_seamless_m4t.py +5 -1
- transformers/models/seamless_m4t/tokenization_seamless_m4t.py +27 -59
- transformers/models/seamless_m4t_v2/modeling_seamless_m4t_v2.py +5 -1
- transformers/models/seed_oss/modeling_seed_oss.py +2 -2
- transformers/models/segformer/modeling_segformer.py +4 -1
- transformers/models/seggpt/modeling_seggpt.py +2 -0
- transformers/models/sew/modeling_sew.py +3 -0
- transformers/models/sew/modular_sew.py +1 -0
- transformers/models/sew_d/modeling_sew_d.py +3 -0
- transformers/models/siglip2/modeling_siglip2.py +4 -0
- transformers/models/siglip2/modular_siglip2.py +4 -0
- transformers/models/smollm3/modeling_smollm3.py +4 -4
- transformers/models/smolvlm/processing_smolvlm.py +0 -7
- transformers/models/speech_to_text/modeling_speech_to_text.py +4 -0
- transformers/models/speecht5/modeling_speecht5.py +13 -1
- transformers/models/splinter/modeling_splinter.py +3 -0
- transformers/models/splinter/tokenization_splinter.py +9 -28
- transformers/models/squeezebert/modeling_squeezebert.py +6 -0
- transformers/models/stablelm/modeling_stablelm.py +3 -1
- transformers/models/starcoder2/modeling_starcoder2.py +4 -3
- transformers/models/superglue/modeling_superglue.py +1 -0
- transformers/models/superpoint/modeling_superpoint.py +1 -0
- transformers/models/swiftformer/modeling_swiftformer.py +2 -0
- transformers/models/swin/modeling_swin.py +4 -0
- transformers/models/swin2sr/modeling_swin2sr.py +2 -0
- transformers/models/swinv2/modeling_swinv2.py +4 -0
- transformers/models/t5/modeling_t5.py +7 -0
- transformers/models/t5/tokenization_t5.py +4 -8
- transformers/models/t5gemma/modeling_t5gemma.py +5 -5
- transformers/models/t5gemma2/modeling_t5gemma2.py +6 -6
- transformers/models/table_transformer/modeling_table_transformer.py +4 -0
- transformers/models/tapas/modeling_tapas.py +3 -0
- transformers/models/textnet/modeling_textnet.py +11 -2
- transformers/models/time_series_transformer/modeling_time_series_transformer.py +4 -0
- transformers/models/timesfm/modeling_timesfm.py +2 -0
- transformers/models/timesfm/modular_timesfm.py +2 -0
- transformers/models/timesformer/modeling_timesformer.py +2 -0
- transformers/models/timm_wrapper/modeling_timm_wrapper.py +1 -1
- transformers/models/trocr/modeling_trocr.py +2 -0
- transformers/models/tvp/modeling_tvp.py +2 -0
- transformers/models/udop/modeling_udop.py +4 -0
- transformers/models/udop/tokenization_udop.py +5 -13
- transformers/models/umt5/modeling_umt5.py +7 -0
- transformers/models/unispeech/modeling_unispeech.py +4 -0
- transformers/models/unispeech/modular_unispeech.py +2 -0
- transformers/models/unispeech_sat/modeling_unispeech_sat.py +6 -0
- transformers/models/unispeech_sat/modular_unispeech_sat.py +2 -0
- transformers/models/univnet/modeling_univnet.py +1 -0
- transformers/models/upernet/modeling_upernet.py +1 -0
- transformers/models/vaultgemma/modeling_vaultgemma.py +4 -4
- transformers/models/vilt/modeling_vilt.py +6 -0
- transformers/models/vision_text_dual_encoder/modeling_vision_text_dual_encoder.py +1 -0
- transformers/models/visual_bert/modeling_visual_bert.py +6 -0
- transformers/models/vitdet/modeling_vitdet.py +2 -0
- transformers/models/vitmatte/modeling_vitmatte.py +1 -0
- transformers/models/vits/modeling_vits.py +1 -0
- transformers/models/vjepa2/modeling_vjepa2.py +1 -0
- transformers/models/wav2vec2/modeling_wav2vec2.py +7 -0
- transformers/models/wav2vec2_bert/modeling_wav2vec2_bert.py +5 -0
- transformers/models/wav2vec2_bert/modular_wav2vec2_bert.py +5 -0
- transformers/models/wav2vec2_conformer/modeling_wav2vec2_conformer.py +6 -0
- transformers/models/wavlm/modeling_wavlm.py +5 -0
- transformers/models/whisper/modeling_whisper.py +6 -0
- transformers/models/whisper/tokenization_whisper.py +4 -15
- transformers/models/x_clip/modeling_x_clip.py +3 -0
- transformers/models/xglm/modeling_xglm.py +1 -0
- transformers/models/xglm/tokenization_xglm.py +4 -9
- transformers/models/xlm/modeling_xlm.py +5 -0
- transformers/models/xlm_roberta/tokenization_xlm_roberta.py +9 -16
- transformers/models/xlnet/tokenization_xlnet.py +3 -7
- transformers/models/yoso/modeling_yoso.py +6 -0
- transformers/models/zamba/modeling_zamba.py +2 -0
- transformers/models/zamba2/modeling_zamba2.py +4 -2
- transformers/models/zamba2/modular_zamba2.py +1 -1
- transformers/models/zoedepth/modeling_zoedepth.py +1 -0
- transformers/pipelines/__init__.py +2 -3
- transformers/pipelines/base.py +1 -9
- transformers/pipelines/document_question_answering.py +3 -1
- transformers/pipelines/text_generation.py +1 -1
- transformers/processing_utils.py +23 -11
- transformers/quantizers/base.py +35 -110
- transformers/quantizers/quantizer_aqlm.py +1 -5
- transformers/quantizers/quantizer_auto_round.py +1 -2
- transformers/quantizers/quantizer_awq.py +17 -81
- transformers/quantizers/quantizer_bitnet.py +3 -8
- transformers/quantizers/quantizer_bnb_4bit.py +13 -110
- transformers/quantizers/quantizer_bnb_8bit.py +16 -92
- transformers/quantizers/quantizer_compressed_tensors.py +1 -5
- transformers/quantizers/quantizer_eetq.py +14 -62
- transformers/quantizers/quantizer_fbgemm_fp8.py +34 -125
- transformers/quantizers/quantizer_finegrained_fp8.py +13 -105
- transformers/quantizers/quantizer_fp_quant.py +48 -78
- transformers/quantizers/quantizer_gptq.py +7 -24
- transformers/quantizers/quantizer_higgs.py +40 -54
- transformers/quantizers/quantizer_hqq.py +144 -153
- transformers/quantizers/quantizer_mxfp4.py +13 -167
- transformers/quantizers/quantizer_quanto.py +20 -64
- transformers/quantizers/quantizer_quark.py +36 -17
- transformers/quantizers/quantizer_spqr.py +1 -4
- transformers/quantizers/quantizer_torchao.py +23 -202
- transformers/quantizers/quantizer_vptq.py +8 -22
- transformers/quantizers/quantizers_utils.py +20 -0
- transformers/testing_utils.py +297 -36
- transformers/tokenization_mistral_common.py +4 -0
- transformers/tokenization_utils_base.py +113 -222
- transformers/tokenization_utils_tokenizers.py +168 -107
- transformers/trainer.py +28 -31
- transformers/trainer_jit_checkpoint.py +126 -0
- transformers/trainer_utils.py +1 -1
- transformers/training_args.py +66 -28
- transformers/utils/__init__.py +3 -4
- transformers/utils/auto_docstring.py +1 -0
- transformers/utils/generic.py +27 -1
- transformers/utils/hub.py +5 -15
- transformers/utils/import_utils.py +61 -16
- transformers/utils/kernel_config.py +4 -2
- transformers/utils/loading_report.py +19 -10
- transformers/utils/quantization_config.py +75 -242
- transformers/video_processing_utils.py +1 -2
- {transformers-5.0.0rc0.dist-info → transformers-5.0.0rc1.dist-info}/METADATA +274 -227
- {transformers-5.0.0rc0.dist-info → transformers-5.0.0rc1.dist-info}/RECORD +536 -520
- {transformers-5.0.0rc0.dist-info → transformers-5.0.0rc1.dist-info}/WHEEL +1 -1
- transformers/kernels/__init__.py +0 -0
- transformers/kernels/falcon_mamba/selective_scan_with_ln_interface.py +0 -529
- transformers/models/roformer/tokenization_roformer_fast.py +0 -160
- {transformers-5.0.0rc0.dist-info → transformers-5.0.0rc1.dist-info}/entry_points.txt +0 -0
- {transformers-5.0.0rc0.dist-info → transformers-5.0.0rc1.dist-info/licenses}/LICENSE +0 -0
- {transformers-5.0.0rc0.dist-info → transformers-5.0.0rc1.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
# coding=utf-8
|
|
2
|
+
# Copyright 2025 The HuggingFace Inc. team. All rights reserved.
|
|
3
|
+
#
|
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
# you may not use this file except in compliance with the License.
|
|
6
|
+
# You may obtain a copy of the License at
|
|
7
|
+
#
|
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
#
|
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
# See the License for the specific language governing permissions and
|
|
14
|
+
# limitations under the License.
|
|
15
|
+
|
|
16
|
+
from typing import Optional
|
|
17
|
+
|
|
18
|
+
from ..core_model_loading import ConversionOps
|
|
19
|
+
from ..utils import is_torch_available
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
if is_torch_available():
|
|
23
|
+
import torch
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class QuarkDeserialize(ConversionOps):
|
|
27
|
+
def __init__(self, hf_quantizer):
|
|
28
|
+
self.hf_quantizer = hf_quantizer
|
|
29
|
+
|
|
30
|
+
def convert(
|
|
31
|
+
self,
|
|
32
|
+
input_dict: torch.Tensor,
|
|
33
|
+
model: Optional[torch.nn.Module] = None,
|
|
34
|
+
missing_keys: Optional[list[str]] = None,
|
|
35
|
+
full_layer_name: str | None = None,
|
|
36
|
+
**kwargs,
|
|
37
|
+
) -> dict[str, torch.Tensor]:
|
|
38
|
+
# target_key should be in the form of weight_scale, bias_scale, input_scale, output_scale, weight_zero_point, bias_zero_point, input_zero_point, output_zero_point
|
|
39
|
+
target_key, value = tuple(input_dict.items())[0]
|
|
40
|
+
value = value[0] if isinstance(value, list) else value
|
|
41
|
+
# this will get the param name : weight, input, bias or output
|
|
42
|
+
param = target_key.split("_", 1)[0]
|
|
43
|
+
# quant_state should be in the form of scale, or zero_point
|
|
44
|
+
quant_state = target_key.split("_", 1)[-1]
|
|
45
|
+
|
|
46
|
+
# here we change the name for example from the form of :
|
|
47
|
+
# model.layers.0.mlp.down_proj.weight_scale to model.layers.0.mlp.down_proj.weight_quantizer.scale to fit within
|
|
48
|
+
# the QParamsLinear module of quark
|
|
49
|
+
sub_module_state = full_layer_name.rsplit(".", 1)[0] + "." + param + "_quantizer" + "." + quant_state
|
|
50
|
+
|
|
51
|
+
# since quark module was expecting keys in the form of model.layers.0.mlp.down_proj.weight_scale
|
|
52
|
+
# we need to remove it from the missing_keys list
|
|
53
|
+
missing_keys.discard(full_layer_name)
|
|
54
|
+
|
|
55
|
+
return {sub_module_state: value}
|
|
@@ -13,110 +13,64 @@
|
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
"SpQR (Sparse-Quantized Representation) integration file"
|
|
15
15
|
|
|
16
|
-
from ..
|
|
16
|
+
from ..quantizers.quantizers_utils import should_convert_module
|
|
17
|
+
from ..utils import is_accelerate_available, is_spqr_available, is_torch_available, logging
|
|
17
18
|
|
|
18
19
|
|
|
20
|
+
if is_accelerate_available():
|
|
21
|
+
from accelerate import init_empty_weights
|
|
22
|
+
|
|
19
23
|
if is_torch_available():
|
|
20
24
|
import torch.nn as nn
|
|
21
25
|
|
|
26
|
+
logger = logging.get_logger(__name__)
|
|
27
|
+
|
|
22
28
|
|
|
23
|
-
def replace_with_spqr_linear(
|
|
24
|
-
model,
|
|
25
|
-
quantization_config=None,
|
|
26
|
-
modules_to_not_convert=None,
|
|
27
|
-
current_key_name=None,
|
|
28
|
-
has_been_replaced=False,
|
|
29
|
-
):
|
|
29
|
+
def replace_with_spqr_linear(model, modules_to_not_convert: list[str] | None = None, quantization_config=None):
|
|
30
30
|
"""
|
|
31
|
-
Public method that
|
|
32
|
-
`accelerate` is needed to use this method. Returns the converted model and a boolean that indicates if the
|
|
33
|
-
conversion has been successful or not.
|
|
31
|
+
Public method that replaces the Linear layers of the given model with SPQR quantized layers.
|
|
34
32
|
|
|
35
33
|
Args:
|
|
36
34
|
model (`torch.nn.Module`):
|
|
37
35
|
The model to convert, can be any `torch.nn.Module` instance.
|
|
38
|
-
|
|
39
|
-
The quantization config object that contains the quantization parameters.
|
|
40
|
-
modules_to_not_convert (`list[str]`, *optional*):
|
|
36
|
+
modules_to_not_convert (`list[str]`, *optional*, defaults to `None`):
|
|
41
37
|
A list of nn.Linear weights to not convert. If a parameter path is in the list (e.g. `lm_head.weight`), the corresponding module will not be
|
|
42
38
|
converted.
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
has_been_replaced (`bool`, *optional*):
|
|
46
|
-
A boolean that indicates if the conversion has been successful or not. This is used for recursion and
|
|
47
|
-
should not be passed by the user.
|
|
39
|
+
quantization_config (`SpQRConfig`):
|
|
40
|
+
The quantization config object that contains the quantization parameters.
|
|
48
41
|
"""
|
|
49
|
-
if modules_to_not_convert is None:
|
|
50
|
-
modules_to_not_convert = []
|
|
51
|
-
|
|
52
|
-
if is_accelerate_available():
|
|
53
|
-
from accelerate import init_empty_weights
|
|
54
42
|
if is_spqr_available():
|
|
55
43
|
from spqr_quant import QuantizedLinear
|
|
56
44
|
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
in_features = module.in_features
|
|
91
|
-
out_features = module.out_features
|
|
92
|
-
|
|
93
|
-
model._modules[name] = QuantizedLinear.create_placehodler(
|
|
94
|
-
rows=out_features,
|
|
95
|
-
cols=in_features,
|
|
96
|
-
bits=quantization_config.bits,
|
|
97
|
-
beta1=quantization_config.beta1,
|
|
98
|
-
beta2=quantization_config.beta2,
|
|
99
|
-
dense_weights_shape=dense_weights_shape,
|
|
100
|
-
row_offsets_shape=row_offsets_shape,
|
|
101
|
-
col_vals_shape=col_vals_shape,
|
|
102
|
-
in_perm_shape=in_perm_shape,
|
|
103
|
-
)
|
|
104
|
-
has_been_replaced = True
|
|
105
|
-
|
|
106
|
-
# Store the module class in case we need to transpose the weight later
|
|
107
|
-
model._modules[name].source_cls = type(module)
|
|
108
|
-
# Force requires grad to False to avoid unexpected errors
|
|
109
|
-
model._modules[name].requires_grad_(False)
|
|
110
|
-
else:
|
|
111
|
-
pass
|
|
112
|
-
if len(list(module.children())) > 0:
|
|
113
|
-
_, has_been_replaced = replace_with_spqr_linear(
|
|
114
|
-
module,
|
|
115
|
-
quantization_config=quantization_config,
|
|
116
|
-
modules_to_not_convert=modules_to_not_convert,
|
|
117
|
-
current_key_name=current_key_name,
|
|
118
|
-
has_been_replaced=has_been_replaced,
|
|
119
|
-
)
|
|
120
|
-
# Remove the last key for recursion
|
|
121
|
-
current_key_name.pop(-1)
|
|
122
|
-
return model, has_been_replaced
|
|
45
|
+
has_been_replaced = False
|
|
46
|
+
# we need this to correctly materialize the weights during quantization
|
|
47
|
+
for module_name, module in model.named_modules():
|
|
48
|
+
if not should_convert_module(module_name, modules_to_not_convert):
|
|
49
|
+
continue
|
|
50
|
+
with init_empty_weights():
|
|
51
|
+
if isinstance(module, nn.Linear):
|
|
52
|
+
shapes = quantization_config.shapes
|
|
53
|
+
|
|
54
|
+
new_module = QuantizedLinear.create_placehodler(
|
|
55
|
+
rows=module.out_features,
|
|
56
|
+
cols=module.in_features,
|
|
57
|
+
bits=quantization_config.bits,
|
|
58
|
+
beta1=quantization_config.beta1,
|
|
59
|
+
beta2=quantization_config.beta2,
|
|
60
|
+
dense_weights_shape=shapes[f"{module_name}.dense_weights.shape"],
|
|
61
|
+
row_offsets_shape=shapes[f"{module_name}.row_offsets.shape"],
|
|
62
|
+
col_vals_shape=shapes[f"{module_name}.col_vals.shape"],
|
|
63
|
+
in_perm_shape=shapes[f"{module_name}.in_perm.shape"],
|
|
64
|
+
)
|
|
65
|
+
# Force requires grad to False to avoid unexpected errors
|
|
66
|
+
model._modules[module_name].requires_grad_(False)
|
|
67
|
+
model.set_submodule(module_name, new_module)
|
|
68
|
+
has_been_replaced = True
|
|
69
|
+
if not has_been_replaced:
|
|
70
|
+
logger.warning(
|
|
71
|
+
"You are loading your model using eetq but no linear modules were found in your model."
|
|
72
|
+
" Please double check your model architecture, or submit an issue on github if you think this is"
|
|
73
|
+
" a bug."
|
|
74
|
+
)
|
|
75
|
+
|
|
76
|
+
return model
|
|
@@ -32,7 +32,7 @@ from ..quantizers.quantizers_utils import get_module_from_name
|
|
|
32
32
|
|
|
33
33
|
if is_torchao_available():
|
|
34
34
|
TORCHAO_VERSION = version.parse(importlib.metadata.version("torchao"))
|
|
35
|
-
if version.parse(importlib.metadata.version("torchao")) >= version.parse("0.
|
|
35
|
+
if version.parse(importlib.metadata.version("torchao")) >= version.parse("0.15.0"):
|
|
36
36
|
from torchao.prototype.safetensors.safetensors_support import (
|
|
37
37
|
unflatten_tensor_state_dict,
|
|
38
38
|
)
|
|
@@ -210,61 +210,55 @@ class TorchAoDeserialize(ConversionOps):
|
|
|
210
210
|
def convert(
|
|
211
211
|
self,
|
|
212
212
|
input_dict: dict[str, torch.Tensor],
|
|
213
|
+
source_patterns: list[str] | None = None,
|
|
213
214
|
model: Optional[torch.nn.Module] = None,
|
|
214
215
|
full_layer_name: str | None = None,
|
|
215
216
|
missing_keys=None,
|
|
216
217
|
**kwargs,
|
|
217
218
|
) -> dict[str, torch.Tensor]:
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
is_unsafe_serialization = ":" not in list(input_dict.keys())[0]
|
|
219
|
+
"""
|
|
220
|
+
Consolidates tensor subclass components before reconstructing the object
|
|
221
|
+
|
|
222
|
+
For example:
|
|
223
|
+
input_dict: {
|
|
224
|
+
"_weight_qdata": torch.Tensor,
|
|
225
|
+
"_weight_scale": torch.Tensor,
|
|
226
|
+
}
|
|
227
|
+
full_layer_name: "model.layers.0.self_attn.k_proj.weight"
|
|
228
|
+
|
|
229
|
+
Given this, we reconstruct a Float8Tensor instance using the qdata and scale
|
|
230
|
+
and return it as a dictionary with the full_layer_name as the key and the recovered
|
|
231
|
+
Float8Tensor instance as the value.
|
|
232
|
+
"""
|
|
233
|
+
is_unsafe_serialization = list(input_dict.keys())[0] not in source_patterns
|
|
234
234
|
|
|
235
235
|
param_data = {}
|
|
236
|
+
layer_name = ".".join(full_layer_name.split(".")[:-1])
|
|
236
237
|
if is_unsafe_serialization:
|
|
237
238
|
if isinstance(input_dict["weight"], list):
|
|
238
239
|
weight = input_dict["weight"][0]
|
|
239
240
|
else:
|
|
240
241
|
weight = input_dict["weight"]
|
|
241
242
|
else:
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
param_data[f"{full_layer_name}:scale"] = input_dict["weight:scale"][0]
|
|
249
|
-
else:
|
|
250
|
-
param_data[f"{full_layer_name}:scale"] = input_dict["weight:scale"]
|
|
251
|
-
|
|
252
|
-
if is_int_4:
|
|
253
|
-
if isinstance(input_dict["weight:zero_point"], list):
|
|
254
|
-
param_data[f"{full_layer_name}:zero_point"] = input_dict["weight:zero_point"][0]
|
|
255
|
-
else:
|
|
256
|
-
param_data[f"{full_layer_name}:zero_point"] = input_dict["weight:zero_point"]
|
|
243
|
+
for suffix in input_dict.keys():
|
|
244
|
+
if len(input_dict[suffix]) != 1:
|
|
245
|
+
raise ValueError(
|
|
246
|
+
f"Expected a single tensor for {suffix} but got {len(input_dict[suffix])} tensors instead"
|
|
247
|
+
)
|
|
248
|
+
param_data[f"{layer_name}.{suffix}"] = input_dict[suffix][0]
|
|
257
249
|
|
|
258
|
-
# If it's
|
|
259
|
-
# already done) - if it's unsafe-serialized (i.e. not safetensors), not need for anything either
|
|
250
|
+
# If it's unsafe-serialized (i.e. not safetensors), no need for anything
|
|
260
251
|
if is_unsafe_serialization:
|
|
261
252
|
return {full_layer_name: weight}
|
|
262
253
|
# Sanity check for the new serialization format
|
|
263
|
-
elif not (TORCHAO_VERSION >= version.parse("0.
|
|
264
|
-
|
|
265
|
-
raise ValueError("To use `safetensors` serialization, you should have `torchao>=0.14.0` installed")
|
|
254
|
+
elif not (TORCHAO_VERSION >= version.parse("0.15.0") and is_metadata_torchao(self.hf_quantizer.metadata)):
|
|
255
|
+
raise ValueError("To use `safetensors` serialization, you should have `torchao>=0.15.0` installed")
|
|
266
256
|
|
|
267
|
-
|
|
257
|
+
unflattened_state_dict, leftover_state_dict = unflatten_tensor_state_dict(
|
|
258
|
+
param_data, self.hf_quantizer.metadata
|
|
259
|
+
)
|
|
260
|
+
assert not leftover_state_dict # there should be no unprocessed tensors
|
|
261
|
+
new_param = unflattened_state_dict[full_layer_name]
|
|
268
262
|
|
|
269
263
|
module, _ = get_module_from_name(model, full_layer_name)
|
|
270
264
|
# Add repr to the module
|
|
@@ -13,64 +13,49 @@
|
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
"VPTQ (Vector Post-Training Quantization) integration file"
|
|
15
15
|
|
|
16
|
-
|
|
17
|
-
from
|
|
18
|
-
from vptq import VQuantLinear
|
|
16
|
+
from ..quantizers.quantizers_utils import should_convert_module
|
|
17
|
+
from ..utils import is_accelerate_available, is_torch_available, logging
|
|
19
18
|
|
|
20
19
|
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
)
|
|
20
|
+
if is_accelerate_available():
|
|
21
|
+
from accelerate import init_empty_weights
|
|
22
|
+
|
|
23
|
+
if is_torch_available():
|
|
24
|
+
import torch.nn as nn
|
|
25
|
+
|
|
26
|
+
logger = logging.get_logger(__name__)
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def replace_with_vptq_linear(model, modules_to_not_convert: list[str] | None = None, quantization_config=None):
|
|
28
30
|
"""
|
|
29
|
-
Public method that
|
|
30
|
-
`accelerate` is needed to use this method. Returns the converted model and a boolean that indicates if the
|
|
31
|
-
conversion has been successful or not.
|
|
31
|
+
Public method that replaces the Linear layers of the given model with SPQR quantized layers.
|
|
32
32
|
|
|
33
33
|
Args:
|
|
34
34
|
model (`torch.nn.Module`):
|
|
35
35
|
The model to convert, can be any `torch.nn.Module` instance.
|
|
36
|
+
modules_to_not_convert (`list[str]`, *optional*, defaults to `None`):
|
|
37
|
+
A list of nn.Linear weights to not convert. If a parameter path is in the list (e.g. `lm_head.weight`), the corresponding module will not be
|
|
38
|
+
converted.
|
|
36
39
|
quantization_config (`VptqConfig`):
|
|
37
40
|
The quantization config object that contains the quantization parameters.
|
|
38
|
-
modules_to_not_convert (`list[`str`]`, *optional*, defaults to `["lm_head"]`):
|
|
39
|
-
Names of the modules to not convert in `VQuantLinear`. In practice we keep the `lm_head` in full precision
|
|
40
|
-
for numerical stability reasons.
|
|
41
|
-
current_key_name (`list`, *optional*):
|
|
42
|
-
A list that contains the current key name. This is used for recursion and should not be passed by the user.
|
|
43
|
-
has_been_replaced (`bool`, *optional*):
|
|
44
|
-
A boolean that indicates if the conversion has been successful or not. This is used for recursion and
|
|
45
|
-
should not be passed by the user.
|
|
46
41
|
"""
|
|
42
|
+
from vptq import VQuantLinear
|
|
47
43
|
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
if current_key_name is None:
|
|
52
|
-
current_key_name = []
|
|
53
|
-
current_key_name.append(name)
|
|
54
|
-
layer_name = ".".join(current_key_name)
|
|
55
|
-
shared_layer_config = quantization_config.shared_layer_config
|
|
56
|
-
config_for_layers = quantization_config.config_for_layers
|
|
57
|
-
|
|
58
|
-
if (
|
|
59
|
-
isinstance(module, nn.Linear)
|
|
60
|
-
and layer_name not in modules_to_not_convert
|
|
61
|
-
and ((layer_name in config_for_layers) or (current_key_name[-1] in shared_layer_config))
|
|
62
|
-
):
|
|
63
|
-
layer_params = config_for_layers.get(layer_name, None) or shared_layer_config.get(
|
|
64
|
-
current_key_name[-1], None
|
|
65
|
-
)
|
|
44
|
+
has_been_replaced = False
|
|
45
|
+
shared_layer_config = quantization_config.shared_layer_config
|
|
46
|
+
config_for_layers = quantization_config.config_for_layers
|
|
66
47
|
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
48
|
+
for module_name, module in model.named_modules():
|
|
49
|
+
if not should_convert_module(module_name, modules_to_not_convert):
|
|
50
|
+
continue
|
|
51
|
+
with init_empty_weights():
|
|
52
|
+
if isinstance(module, nn.Linear):
|
|
53
|
+
layer_params = config_for_layers.get(module_name, None) or shared_layer_config.get(
|
|
54
|
+
module_name.rsplit(".")[1], None
|
|
55
|
+
)
|
|
56
|
+
new_module = VQuantLinear(
|
|
57
|
+
module.in_features,
|
|
58
|
+
module.out_features,
|
|
74
59
|
vector_lens=layer_params["vector_lens"],
|
|
75
60
|
num_centroids=layer_params["num_centroids"],
|
|
76
61
|
num_res_centroids=layer_params["num_res_centroids"],
|
|
@@ -84,18 +69,16 @@ def replace_with_vptq_linear(
|
|
|
84
69
|
enable_proxy_error=False,
|
|
85
70
|
bias=module.bias is not None,
|
|
86
71
|
)
|
|
72
|
+
# Force requires grad to False to avoid unexpected errors
|
|
73
|
+
model._modules[module_name].requires_grad_(False)
|
|
74
|
+
model.set_submodule(module_name, new_module)
|
|
87
75
|
has_been_replaced = True
|
|
88
76
|
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
has_been_replaced=has_been_replaced,
|
|
98
|
-
)
|
|
99
|
-
# Remove the last key for recursion
|
|
100
|
-
current_key_name.pop(-1)
|
|
101
|
-
return model, has_been_replaced
|
|
77
|
+
if not has_been_replaced:
|
|
78
|
+
logger.warning(
|
|
79
|
+
"You are loading your model using eetq but no linear modules were found in your model."
|
|
80
|
+
" Please double check your model architecture, or submit an issue on github if you think this is"
|
|
81
|
+
" a bug."
|
|
82
|
+
)
|
|
83
|
+
|
|
84
|
+
return model
|
transformers/modelcard.py
CHANGED
|
@@ -23,7 +23,7 @@ from typing import Any, Optional, Union
|
|
|
23
23
|
|
|
24
24
|
import httpx
|
|
25
25
|
import yaml
|
|
26
|
-
from huggingface_hub import model_info
|
|
26
|
+
from huggingface_hub import is_offline_mode, model_info
|
|
27
27
|
from huggingface_hub.errors import OfflineModeIsEnabled
|
|
28
28
|
from huggingface_hub.utils import HFValidationError
|
|
29
29
|
|
|
@@ -50,7 +50,6 @@ from .utils import (
|
|
|
50
50
|
MODEL_CARD_NAME,
|
|
51
51
|
cached_file,
|
|
52
52
|
is_datasets_available,
|
|
53
|
-
is_offline_mode,
|
|
54
53
|
is_tokenizers_available,
|
|
55
54
|
is_torch_available,
|
|
56
55
|
logging,
|
|
@@ -20,6 +20,7 @@ import numpy as np
|
|
|
20
20
|
from tqdm.auto import tqdm
|
|
21
21
|
|
|
22
22
|
from .integrations import (
|
|
23
|
+
GGUF_CONFIG_DEFAULTS_MAPPING,
|
|
23
24
|
GGUF_CONFIG_MAPPING,
|
|
24
25
|
GGUF_TOKENIZER_MAPPING,
|
|
25
26
|
_gguf_parse_value,
|
|
@@ -437,6 +438,13 @@ def load_gguf_checkpoint(gguf_checkpoint_path, return_tensors=False, model_to_lo
|
|
|
437
438
|
all("output.weight" != tensor.name for tensor in reader.tensors) or architecture in exceptions
|
|
438
439
|
)
|
|
439
440
|
|
|
441
|
+
# Set GGUF-specific default values
|
|
442
|
+
config_defaults = GGUF_CONFIG_DEFAULTS_MAPPING.get(
|
|
443
|
+
updated_architecture, GGUF_CONFIG_DEFAULTS_MAPPING.get(architecture) or {}
|
|
444
|
+
)
|
|
445
|
+
for key, value in config_defaults.items():
|
|
446
|
+
parsed_parameters["config"].setdefault(key, value)
|
|
447
|
+
|
|
440
448
|
# List all key-value pairs in a columnized format
|
|
441
449
|
for gguf_key, field in reader.fields.items():
|
|
442
450
|
gguf_key = gguf_key.replace(architecture, updated_architecture)
|
|
@@ -13,6 +13,7 @@
|
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
15
|
import math
|
|
16
|
+
import warnings
|
|
16
17
|
from functools import wraps
|
|
17
18
|
from typing import TYPE_CHECKING, Optional, TypedDict
|
|
18
19
|
|
|
@@ -653,20 +654,26 @@ class RotaryEmbeddingConfigMixin:
|
|
|
653
654
|
Helper to standardize the config's rope params field by ensuring the params are defined for each
|
|
654
655
|
later type. For old model the fn will duplicate a single rope param in each layer type (backward compatibility)
|
|
655
656
|
"""
|
|
656
|
-
# Move `rope_theta` and `partial_rotary_factor` to the
|
|
657
|
+
# Move `rope_theta` and `partial_rotary_factor` to the `rope_parameters`, if not there yet
|
|
657
658
|
rope_theta = getattr(self, "rope_theta", None)
|
|
658
659
|
partial_rotary_factor = getattr(self, "partial_rotary_factor", None)
|
|
659
|
-
rope_parameters = self
|
|
660
|
+
rope_parameters = getattr(self, "rope_parameters", None) or {}
|
|
661
|
+
layer_types = getattr(self, "layer_types", None)
|
|
660
662
|
|
|
663
|
+
# Case 0: no RoPE params defined
|
|
664
|
+
if not (rope_parameters or rope_theta):
|
|
665
|
+
# partial_rotary_factor without rope_theta is invalid, so we don't check for it here
|
|
666
|
+
logger.warning("`standardize_rope_params` was called but no RoPE parameters were found.")
|
|
667
|
+
return
|
|
661
668
|
# Case 1: RoPE param keys do not intersect with possible `layer_types` -> one global dict
|
|
662
|
-
|
|
669
|
+
elif layer_types is None or rope_parameters == {} or not set(rope_parameters.keys()).issubset(layer_types):
|
|
663
670
|
rope_parameters.setdefault("rope_type", rope_parameters.get("type", "default"))
|
|
664
671
|
rope_parameters.setdefault("rope_theta", rope_theta)
|
|
665
672
|
if partial_rotary_factor is not None:
|
|
666
673
|
rope_parameters["partial_rotary_factor"] = partial_rotary_factor
|
|
667
674
|
# Case 2: different RoPE for each layer -> several params as nested dict
|
|
668
675
|
else:
|
|
669
|
-
for layer_type in
|
|
676
|
+
for layer_type in layer_types:
|
|
670
677
|
rope_parameters[layer_type].setdefault("rope_type", rope_parameters[layer_type].get("type", "default"))
|
|
671
678
|
rope_parameters[layer_type].setdefault("rope_theta", rope_theta)
|
|
672
679
|
if partial_rotary_factor is not None:
|
|
@@ -691,14 +698,14 @@ class RotaryEmbeddingConfigMixin:
|
|
|
691
698
|
|
|
692
699
|
for rope_parameters in rope_parameters_dict.values():
|
|
693
700
|
rope_type = rope_parameters.get("rope_type", rope_parameters.get("type", "default"))
|
|
694
|
-
validation_fn = getattr(self, f"_validate_{rope_type}_rope_parameters")
|
|
701
|
+
validation_fn = getattr(self, f"_validate_{rope_type}_rope_parameters", None)
|
|
695
702
|
rope_parameters["rope_type"] = rope_type
|
|
696
703
|
|
|
697
704
|
if validation_fn is not None:
|
|
698
705
|
validation_fn(rope_parameters, ignore_keys=ignore_keys)
|
|
699
706
|
else:
|
|
700
707
|
logger.warning(
|
|
701
|
-
f"Missing validation function
|
|
708
|
+
f"Missing validation function in 'RotaryEmbeddingConfigMixin' for 'rope_type'='{rope_type}'"
|
|
702
709
|
)
|
|
703
710
|
|
|
704
711
|
def _validate_default_rope_parameters(self, rope_parameters: dict, ignore_keys: Optional[set] = None):
|
|
@@ -913,3 +920,20 @@ class RotaryEmbeddingConfigMixin:
|
|
|
913
920
|
unused_keys = received_keys - required_keys
|
|
914
921
|
if unused_keys:
|
|
915
922
|
logger.warning(f"Unrecognized keys in `rope_parameters` for 'rope_type'='{rope_type}': {unused_keys}")
|
|
923
|
+
|
|
924
|
+
|
|
925
|
+
def rope_config_validation(config: RotaryEmbeddingConfigMixin, ignore_keys: Optional[set] = None):
|
|
926
|
+
"""
|
|
927
|
+
This is a deprecated function.
|
|
928
|
+
It has been kept for backward compatibility with custom code models.
|
|
929
|
+
"""
|
|
930
|
+
warnings.warn(
|
|
931
|
+
"`rope_config_validation` is deprecated and has been removed. "
|
|
932
|
+
"Its functionality has been moved to RotaryEmbeddingConfigMixin.validate_rope method. "
|
|
933
|
+
"PreTrainedConfig inherits this class, so please call self.validate_rope() instead. "
|
|
934
|
+
"Also, make sure to use the new rope_parameters syntax. "
|
|
935
|
+
"You can call self.standardize_rope_params() in the meantime.",
|
|
936
|
+
FutureWarning,
|
|
937
|
+
)
|
|
938
|
+
config.standardize_rope_params()
|
|
939
|
+
config.validate_rope(ignore_keys=ignore_keys)
|