transformers 5.0.0rc0__py3-none-any.whl → 5.0.0rc1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- transformers/__init__.py +30 -3
- transformers/cli/serve.py +47 -17
- transformers/conversion_mapping.py +15 -2
- transformers/convert_slow_tokenizer.py +225 -10
- transformers/core_model_loading.py +196 -135
- transformers/data/data_collator.py +12 -4
- transformers/dependency_versions_table.py +1 -2
- transformers/dynamic_module_utils.py +1 -2
- transformers/feature_extraction_utils.py +1 -2
- transformers/file_utils.py +0 -1
- transformers/generation/__init__.py +11 -1
- transformers/generation/configuration_utils.py +3 -2
- transformers/generation/continuous_batching/__init__.py +4 -0
- transformers/generation/continuous_batching/continuous_api.py +134 -79
- transformers/image_processing_base.py +1 -2
- transformers/integrations/__init__.py +4 -2
- transformers/integrations/accelerate.py +15 -3
- transformers/integrations/aqlm.py +38 -66
- transformers/integrations/awq.py +48 -514
- transformers/integrations/bitnet.py +45 -100
- transformers/integrations/bitsandbytes.py +79 -191
- transformers/integrations/deepspeed.py +1 -0
- transformers/integrations/eetq.py +84 -79
- transformers/integrations/fbgemm_fp8.py +191 -145
- transformers/integrations/finegrained_fp8.py +236 -193
- transformers/integrations/fp_quant.py +92 -0
- transformers/integrations/ggml.py +11 -1
- transformers/integrations/higgs.py +40 -62
- transformers/integrations/hub_kernels.py +42 -3
- transformers/integrations/integration_utils.py +10 -0
- transformers/integrations/mxfp4.py +25 -65
- transformers/integrations/peft.py +7 -29
- transformers/integrations/quanto.py +73 -55
- transformers/integrations/quark.py +55 -0
- transformers/integrations/spqr.py +44 -90
- transformers/integrations/torchao.py +32 -38
- transformers/integrations/vptq.py +42 -59
- transformers/modelcard.py +1 -2
- transformers/modeling_gguf_pytorch_utils.py +8 -0
- transformers/modeling_rope_utils.py +30 -6
- transformers/modeling_utils.py +116 -112
- transformers/models/__init__.py +3 -0
- transformers/models/afmoe/modeling_afmoe.py +4 -4
- transformers/models/albert/tokenization_albert.py +6 -12
- transformers/models/align/modeling_align.py +2 -0
- transformers/models/altclip/modeling_altclip.py +4 -0
- transformers/models/apertus/modeling_apertus.py +4 -4
- transformers/models/arcee/modeling_arcee.py +4 -4
- transformers/models/aria/modeling_aria.py +4 -4
- transformers/models/audioflamingo3/modeling_audioflamingo3.py +1 -0
- transformers/models/audioflamingo3/modular_audioflamingo3.py +1 -0
- transformers/models/auto/configuration_auto.py +11 -0
- transformers/models/auto/feature_extraction_auto.py +2 -0
- transformers/models/auto/image_processing_auto.py +1 -0
- transformers/models/auto/modeling_auto.py +6 -0
- transformers/models/auto/processing_auto.py +18 -10
- transformers/models/auto/tokenization_auto.py +74 -472
- transformers/models/autoformer/modeling_autoformer.py +4 -0
- transformers/models/bamba/modeling_bamba.py +4 -3
- transformers/models/bark/modeling_bark.py +2 -0
- transformers/models/bart/modeling_bart.py +7 -0
- transformers/models/barthez/tokenization_barthez.py +5 -10
- transformers/models/beit/modeling_beit.py +6 -1
- transformers/models/bert/tokenization_bert.py +8 -21
- transformers/models/big_bird/modeling_big_bird.py +6 -0
- transformers/models/big_bird/tokenization_big_bird.py +18 -42
- transformers/models/bigbird_pegasus/modeling_bigbird_pegasus.py +8 -2
- transformers/models/biogpt/modeling_biogpt.py +2 -0
- transformers/models/biogpt/modular_biogpt.py +2 -0
- transformers/models/bit/modeling_bit.py +11 -2
- transformers/models/bitnet/modeling_bitnet.py +4 -4
- transformers/models/blenderbot/modeling_blenderbot.py +5 -0
- transformers/models/blenderbot/tokenization_blenderbot.py +12 -16
- transformers/models/blenderbot_small/modeling_blenderbot_small.py +5 -0
- transformers/models/blip/modeling_blip_text.py +2 -0
- transformers/models/blip_2/modeling_blip_2.py +2 -1
- transformers/models/bloom/modeling_bloom.py +4 -0
- transformers/models/blt/modeling_blt.py +2 -2
- transformers/models/blt/modular_blt.py +2 -2
- transformers/models/bridgetower/modeling_bridgetower.py +5 -1
- transformers/models/bros/modeling_bros.py +4 -0
- transformers/models/camembert/tokenization_camembert.py +8 -12
- transformers/models/canine/modeling_canine.py +5 -0
- transformers/models/chameleon/modeling_chameleon.py +2 -1
- transformers/models/chinese_clip/modeling_chinese_clip.py +3 -0
- transformers/models/clap/modeling_clap.py +5 -0
- transformers/models/clip/tokenization_clip.py +22 -44
- transformers/models/clipseg/modeling_clipseg.py +5 -0
- transformers/models/clvp/modeling_clvp.py +5 -0
- transformers/models/clvp/tokenization_clvp.py +1 -63
- transformers/models/code_llama/tokenization_code_llama.py +20 -43
- transformers/models/codegen/tokenization_codegen.py +14 -43
- transformers/models/cohere/modeling_cohere.py +4 -3
- transformers/models/cohere/modular_cohere.py +2 -1
- transformers/models/cohere/tokenization_cohere.py +12 -42
- transformers/models/cohere2/modeling_cohere2.py +7 -6
- transformers/models/cohere2/modular_cohere2.py +5 -5
- transformers/models/cohere2_vision/image_processing_cohere2_vision_fast.py +4 -3
- transformers/models/cohere2_vision/modular_cohere2_vision.py +4 -3
- transformers/models/colqwen2/modeling_colqwen2.py +1 -0
- transformers/models/colqwen2/modular_colqwen2.py +1 -0
- transformers/models/conditional_detr/modeling_conditional_detr.py +5 -0
- transformers/models/convbert/modeling_convbert.py +6 -0
- transformers/models/convnext/modeling_convnext.py +2 -4
- transformers/models/convnextv2/modeling_convnextv2.py +2 -4
- transformers/models/csm/modeling_csm.py +4 -3
- transformers/models/ctrl/modeling_ctrl.py +1 -0
- transformers/models/cvt/modeling_cvt.py +2 -0
- transformers/models/cwm/modeling_cwm.py +4 -4
- transformers/models/d_fine/modeling_d_fine.py +2 -0
- transformers/models/d_fine/modular_d_fine.py +1 -0
- transformers/models/dab_detr/modeling_dab_detr.py +4 -0
- transformers/models/dac/modeling_dac.py +2 -2
- transformers/models/data2vec/modeling_data2vec_audio.py +5 -0
- transformers/models/data2vec/modeling_data2vec_vision.py +4 -1
- transformers/models/dbrx/modeling_dbrx.py +2 -2
- transformers/models/deberta/modeling_deberta.py +5 -0
- transformers/models/deberta/tokenization_deberta.py +11 -20
- transformers/models/deberta_v2/modeling_deberta_v2.py +6 -0
- transformers/models/deberta_v2/tokenization_deberta_v2.py +13 -28
- transformers/models/decision_transformer/modeling_decision_transformer.py +4 -1
- transformers/models/deepseek_v2/modeling_deepseek_v2.py +2 -3
- transformers/models/deepseek_v2/modular_deepseek_v2.py +2 -2
- transformers/models/deepseek_v3/modeling_deepseek_v3.py +3 -2
- transformers/models/deepseek_v3/modular_deepseek_v3.py +1 -0
- transformers/models/deformable_detr/modeling_deformable_detr.py +4 -0
- transformers/models/depth_anything/modeling_depth_anything.py +1 -0
- transformers/models/depth_pro/modeling_depth_pro.py +2 -0
- transformers/models/detr/modeling_detr.py +5 -0
- transformers/models/dia/modeling_dia.py +4 -3
- transformers/models/dia/modular_dia.py +0 -1
- transformers/models/diffllama/modeling_diffllama.py +2 -2
- transformers/models/dinat/modeling_dinat.py +3 -0
- transformers/models/dinov3_convnext/modeling_dinov3_convnext.py +1 -1
- transformers/models/dinov3_vit/modeling_dinov3_vit.py +2 -2
- transformers/models/dinov3_vit/modular_dinov3_vit.py +2 -2
- transformers/models/distilbert/tokenization_distilbert.py +13 -0
- transformers/models/doge/modeling_doge.py +2 -3
- transformers/models/doge/modular_doge.py +0 -1
- transformers/models/donut/modeling_donut_swin.py +2 -0
- transformers/models/dots1/modeling_dots1.py +10 -7
- transformers/models/dots1/modular_dots1.py +5 -3
- transformers/models/dpr/modeling_dpr.py +5 -0
- transformers/models/dpr/tokenization_dpr.py +12 -0
- transformers/models/edgetam/modeling_edgetam.py +1 -1
- transformers/models/edgetam_video/modeling_edgetam_video.py +1 -0
- transformers/models/edgetam_video/modular_edgetam_video.py +1 -0
- transformers/models/efficientloftr/modeling_efficientloftr.py +2 -2
- transformers/models/efficientnet/modeling_efficientnet.py +2 -0
- transformers/models/emu3/modeling_emu3.py +4 -4
- transformers/models/eomt/image_processing_eomt.py +13 -1
- transformers/models/eomt/image_processing_eomt_fast.py +14 -2
- transformers/models/ernie4_5/modeling_ernie4_5.py +4 -4
- transformers/models/ernie4_5/modular_ernie4_5.py +2 -1
- transformers/models/ernie4_5_moe/modeling_ernie4_5_moe.py +5 -5
- transformers/models/ernie4_5_moe/modular_ernie4_5_moe.py +2 -2
- transformers/models/esm/modeling_esmfold.py +5 -4
- transformers/models/evolla/modeling_evolla.py +4 -4
- transformers/models/exaone4/modeling_exaone4.py +2 -2
- transformers/models/exaone4/modular_exaone4.py +0 -1
- transformers/models/falcon/modeling_falcon.py +6 -1
- transformers/models/falcon_h1/modeling_falcon_h1.py +4 -3
- transformers/models/falcon_mamba/modeling_falcon_mamba.py +25 -35
- transformers/models/falcon_mamba/modular_falcon_mamba.py +12 -31
- transformers/{kernels/falcon_mamba → models/fast_vlm}/__init__.py +15 -3
- transformers/models/fast_vlm/configuration_fast_vlm.py +137 -0
- transformers/models/fast_vlm/modeling_fast_vlm.py +455 -0
- transformers/models/fast_vlm/modular_fast_vlm.py +273 -0
- transformers/models/fastspeech2_conformer/modeling_fastspeech2_conformer.py +8 -3
- transformers/models/flaubert/modeling_flaubert.py +7 -0
- transformers/models/flava/modeling_flava.py +6 -1
- transformers/models/flex_olmo/modeling_flex_olmo.py +4 -5
- transformers/models/florence2/modeling_florence2.py +2 -1
- transformers/models/florence2/modular_florence2.py +2 -1
- transformers/models/fnet/modeling_fnet.py +7 -0
- transformers/models/focalnet/modeling_focalnet.py +4 -0
- transformers/models/fsmt/modeling_fsmt.py +2 -0
- transformers/models/funnel/modeling_funnel.py +8 -0
- transformers/models/funnel/tokenization_funnel.py +17 -24
- transformers/models/fuyu/processing_fuyu.py +3 -3
- transformers/models/gemma/modeling_gemma.py +4 -4
- transformers/models/gemma/tokenization_gemma.py +10 -27
- transformers/models/gemma2/modeling_gemma2.py +4 -4
- transformers/models/gemma2/modular_gemma2.py +2 -1
- transformers/models/gemma3/modeling_gemma3.py +14 -84
- transformers/models/gemma3/modular_gemma3.py +12 -81
- transformers/models/gemma3n/modeling_gemma3n.py +18 -209
- transformers/models/gemma3n/modular_gemma3n.py +17 -59
- transformers/models/git/modeling_git.py +2 -0
- transformers/models/glm/modeling_glm.py +4 -4
- transformers/models/glm4/modeling_glm4.py +4 -4
- transformers/models/glm4_moe/modeling_glm4_moe.py +5 -3
- transformers/models/glm4v/configuration_glm4v.py +3 -1
- transformers/models/glm4v/modeling_glm4v.py +3 -3
- transformers/models/glm4v/modular_glm4v.py +6 -4
- transformers/models/glm4v_moe/configuration_glm4v_moe.py +3 -1
- transformers/models/glm4v_moe/modeling_glm4v_moe.py +6 -5
- transformers/models/glm4v_moe/modular_glm4v_moe.py +1 -1
- transformers/models/glpn/modeling_glpn.py +2 -0
- transformers/models/gpt2/modeling_gpt2.py +5 -1
- transformers/models/gpt2/tokenization_gpt2.py +16 -44
- transformers/models/gpt_bigcode/modeling_gpt_bigcode.py +1 -0
- transformers/models/gpt_neo/modeling_gpt_neo.py +4 -0
- transformers/models/gpt_neox/modeling_gpt_neox.py +5 -2
- transformers/models/gpt_neox/modular_gpt_neox.py +3 -0
- transformers/models/gpt_neox/tokenization_gpt_neox.py +10 -49
- transformers/models/gpt_neox_japanese/modeling_gpt_neox_japanese.py +3 -1
- transformers/models/gpt_oss/modeling_gpt_oss.py +5 -6
- transformers/models/gpt_oss/modular_gpt_oss.py +3 -5
- transformers/models/gptj/modeling_gptj.py +3 -0
- transformers/models/granite/modeling_granite.py +4 -4
- transformers/models/granitemoe/modeling_granitemoe.py +4 -6
- transformers/models/granitemoe/modular_granitemoe.py +0 -2
- transformers/models/granitemoehybrid/modeling_granitemoehybrid.py +4 -6
- transformers/models/granitemoeshared/modeling_granitemoeshared.py +4 -6
- transformers/models/grounding_dino/modeling_grounding_dino.py +4 -0
- transformers/models/groupvit/modeling_groupvit.py +3 -0
- transformers/models/helium/modeling_helium.py +4 -3
- transformers/models/herbert/tokenization_herbert.py +9 -25
- transformers/models/hgnet_v2/modeling_hgnet_v2.py +6 -1
- transformers/models/hgnet_v2/modular_hgnet_v2.py +6 -1
- transformers/models/hiera/modeling_hiera.py +4 -0
- transformers/models/hubert/modeling_hubert.py +3 -0
- transformers/models/hubert/modular_hubert.py +1 -0
- transformers/models/hunyuan_v1_dense/modeling_hunyuan_v1_dense.py +4 -4
- transformers/models/hunyuan_v1_moe/modeling_hunyuan_v1_moe.py +4 -4
- transformers/models/ibert/modeling_ibert.py +6 -0
- transformers/models/idefics/modeling_idefics.py +5 -21
- transformers/models/imagegpt/modeling_imagegpt.py +2 -1
- transformers/models/informer/modeling_informer.py +4 -0
- transformers/models/informer/modular_informer.py +1 -0
- transformers/models/internvl/modeling_internvl.py +2 -4
- transformers/models/internvl/modular_internvl.py +2 -4
- transformers/models/jamba/modeling_jamba.py +2 -2
- transformers/models/janus/modeling_janus.py +1 -0
- transformers/models/janus/modular_janus.py +1 -0
- transformers/models/jetmoe/modeling_jetmoe.py +2 -2
- transformers/models/kosmos2/modeling_kosmos2.py +1 -0
- transformers/models/kyutai_speech_to_text/modeling_kyutai_speech_to_text.py +3 -1
- transformers/models/lasr/__init__.py +29 -0
- transformers/models/lasr/configuration_lasr.py +244 -0
- transformers/models/lasr/feature_extraction_lasr.py +277 -0
- transformers/models/lasr/modeling_lasr.py +729 -0
- transformers/models/lasr/modular_lasr.py +569 -0
- transformers/models/lasr/processing_lasr.py +96 -0
- transformers/models/lasr/tokenization_lasr.py +186 -0
- transformers/models/layoutlm/modeling_layoutlm.py +5 -0
- transformers/models/layoutlmv2/modeling_layoutlmv2.py +4 -0
- transformers/models/layoutlmv2/tokenization_layoutlmv2.py +10 -53
- transformers/models/layoutlmv3/modeling_layoutlmv3.py +4 -0
- transformers/models/layoutlmv3/tokenization_layoutlmv3.py +12 -61
- transformers/models/layoutxlm/tokenization_layoutxlm.py +13 -38
- transformers/models/led/modeling_led.py +6 -0
- transformers/models/levit/modeling_levit.py +3 -0
- transformers/models/lfm2/modeling_lfm2.py +4 -5
- transformers/models/lfm2/modular_lfm2.py +0 -1
- transformers/models/lfm2_moe/modeling_lfm2_moe.py +4 -5
- transformers/models/lightglue/modeling_lightglue.py +3 -1
- transformers/models/lightglue/modular_lightglue.py +1 -0
- transformers/models/lilt/modeling_lilt.py +4 -0
- transformers/models/llama/modeling_llama.py +4 -4
- transformers/models/llama/tokenization_llama.py +15 -43
- transformers/models/llama4/modeling_llama4.py +3 -2
- transformers/models/longcat_flash/modeling_longcat_flash.py +4 -4
- transformers/models/longcat_flash/modular_longcat_flash.py +2 -2
- transformers/models/longformer/modeling_longformer.py +6 -0
- transformers/models/longt5/modeling_longt5.py +4 -0
- transformers/models/luke/modeling_luke.py +9 -0
- transformers/models/luke/tokenization_luke.py +11 -38
- transformers/models/lxmert/modeling_lxmert.py +2 -0
- transformers/models/m2m_100/modeling_m2m_100.py +4 -0
- transformers/models/mamba/modeling_mamba.py +14 -22
- transformers/models/marian/modeling_marian.py +5 -0
- transformers/models/markuplm/modeling_markuplm.py +4 -0
- transformers/models/markuplm/tokenization_markuplm.py +28 -61
- transformers/models/mask2former/modeling_mask2former.py +2 -0
- transformers/models/maskformer/modeling_maskformer.py +2 -0
- transformers/models/maskformer/modeling_maskformer_swin.py +2 -0
- transformers/models/mbart/modeling_mbart.py +7 -0
- transformers/models/mbart/tokenization_mbart.py +11 -52
- transformers/models/mbart50/tokenization_mbart50.py +7 -10
- transformers/models/megatron_bert/modeling_megatron_bert.py +7 -0
- transformers/models/mgp_str/modeling_mgp_str.py +2 -0
- transformers/models/mimi/modeling_mimi.py +3 -1
- transformers/models/minimax/modeling_minimax.py +4 -4
- transformers/models/ministral/modeling_ministral.py +4 -4
- transformers/models/ministral3/configuration_ministral3.py +1 -1
- transformers/models/ministral3/modeling_ministral3.py +4 -3
- transformers/models/mistral/modeling_mistral.py +4 -3
- transformers/models/mixtral/modeling_mixtral.py +4 -4
- transformers/models/mllama/modeling_mllama.py +2 -2
- transformers/models/mluke/tokenization_mluke.py +6 -6
- transformers/models/mm_grounding_dino/modeling_mm_grounding_dino.py +4 -0
- transformers/models/mobilenet_v1/modeling_mobilenet_v1.py +2 -0
- transformers/models/mobilenet_v2/modeling_mobilenet_v2.py +3 -0
- transformers/models/mobilevit/modeling_mobilevit.py +3 -0
- transformers/models/mobilevitv2/modeling_mobilevitv2.py +3 -0
- transformers/models/modernbert/modeling_modernbert.py +4 -1
- transformers/models/modernbert/modular_modernbert.py +2 -0
- transformers/models/modernbert_decoder/modeling_modernbert_decoder.py +8 -9
- transformers/models/modernbert_decoder/modular_modernbert_decoder.py +6 -7
- transformers/models/moonshine/modeling_moonshine.py +4 -2
- transformers/models/moshi/modeling_moshi.py +5 -2
- transformers/models/mpnet/modeling_mpnet.py +5 -0
- transformers/models/mpnet/tokenization_mpnet.py +5 -13
- transformers/models/mpt/modeling_mpt.py +2 -0
- transformers/models/mra/modeling_mra.py +6 -0
- transformers/models/mt5/modeling_mt5.py +7 -0
- transformers/models/musicgen/modeling_musicgen.py +2 -0
- transformers/models/musicgen_melody/modeling_musicgen_melody.py +3 -0
- transformers/models/mvp/modeling_mvp.py +7 -0
- transformers/models/nanochat/modeling_nanochat.py +4 -4
- transformers/models/nemotron/modeling_nemotron.py +4 -2
- transformers/models/nllb/tokenization_nllb.py +8 -22
- transformers/models/nougat/tokenization_nougat.py +11 -59
- transformers/models/nystromformer/modeling_nystromformer.py +6 -0
- transformers/models/olmo/modeling_olmo.py +4 -4
- transformers/models/olmo/modular_olmo.py +2 -2
- transformers/models/olmo2/modeling_olmo2.py +4 -5
- transformers/models/olmo2/modular_olmo2.py +0 -1
- transformers/models/olmo3/modeling_olmo3.py +4 -4
- transformers/models/olmoe/modeling_olmoe.py +4 -4
- transformers/models/omdet_turbo/modeling_omdet_turbo.py +2 -0
- transformers/models/oneformer/modeling_oneformer.py +4 -1
- transformers/models/openai/modeling_openai.py +3 -0
- transformers/models/openai/tokenization_openai.py +10 -46
- transformers/models/opt/modeling_opt.py +2 -0
- transformers/models/owlv2/modeling_owlv2.py +4 -0
- transformers/models/owlvit/modeling_owlvit.py +4 -0
- transformers/models/paddleocr_vl/__init__.py +32 -0
- transformers/models/paddleocr_vl/configuration_paddleocr_vl.py +336 -0
- transformers/models/paddleocr_vl/image_processing_paddleocr_vl.py +503 -0
- transformers/models/paddleocr_vl/image_processing_paddleocr_vl_fast.py +209 -0
- transformers/models/paddleocr_vl/modeling_paddleocr_vl.py +1668 -0
- transformers/models/paddleocr_vl/modular_paddleocr_vl.py +1349 -0
- transformers/models/paddleocr_vl/processing_paddleocr_vl.py +135 -0
- transformers/models/parakeet/configuration_parakeet.py +4 -6
- transformers/models/parakeet/modeling_parakeet.py +9 -6
- transformers/models/parakeet/modular_parakeet.py +2 -2
- transformers/models/parakeet/processing_parakeet.py +1 -0
- transformers/models/patchtsmixer/modeling_patchtsmixer.py +6 -0
- transformers/models/patchtst/modeling_patchtst.py +20 -2
- transformers/models/pegasus/modeling_pegasus.py +5 -0
- transformers/models/pegasus/tokenization_pegasus.py +17 -44
- transformers/models/pegasus_x/modeling_pegasus_x.py +4 -0
- transformers/models/perceiver/modeling_perceiver.py +8 -0
- transformers/models/persimmon/modeling_persimmon.py +2 -1
- transformers/models/phi/modeling_phi.py +4 -5
- transformers/models/phi/modular_phi.py +0 -1
- transformers/models/phi3/modeling_phi3.py +2 -1
- transformers/models/phi4_multimodal/modeling_phi4_multimodal.py +5 -5
- transformers/models/phi4_multimodal/modular_phi4_multimodal.py +4 -4
- transformers/models/phimoe/modeling_phimoe.py +4 -4
- transformers/models/phimoe/modular_phimoe.py +2 -2
- transformers/models/pix2struct/modeling_pix2struct.py +2 -0
- transformers/models/pixtral/modeling_pixtral.py +2 -1
- transformers/models/plbart/modeling_plbart.py +6 -0
- transformers/models/plbart/modular_plbart.py +2 -0
- transformers/models/plbart/tokenization_plbart.py +0 -2
- transformers/models/poolformer/modeling_poolformer.py +2 -0
- transformers/models/pop2piano/modeling_pop2piano.py +2 -0
- transformers/models/prompt_depth_anything/modeling_prompt_depth_anything.py +1 -0
- transformers/models/prompt_depth_anything/modular_prompt_depth_anything.py +1 -0
- transformers/models/prophetnet/modeling_prophetnet.py +3 -0
- transformers/models/pvt/modeling_pvt.py +2 -0
- transformers/models/pvt_v2/modeling_pvt_v2.py +3 -0
- transformers/models/qwen2/modeling_qwen2.py +4 -4
- transformers/models/qwen2/tokenization_qwen2.py +14 -18
- transformers/models/qwen2_5_omni/configuration_qwen2_5_omni.py +4 -2
- transformers/models/qwen2_5_omni/modeling_qwen2_5_omni.py +13 -16
- transformers/models/qwen2_5_omni/modular_qwen2_5_omni.py +14 -16
- transformers/models/qwen2_5_vl/configuration_qwen2_5_vl.py +1 -1
- transformers/models/qwen2_5_vl/modeling_qwen2_5_vl.py +5 -6
- transformers/models/qwen2_5_vl/modular_qwen2_5_vl.py +3 -5
- transformers/models/qwen2_audio/modeling_qwen2_audio.py +2 -0
- transformers/models/qwen2_moe/modeling_qwen2_moe.py +4 -4
- transformers/models/qwen2_vl/configuration_qwen2_vl.py +1 -1
- transformers/models/qwen2_vl/modeling_qwen2_vl.py +6 -16
- transformers/models/qwen3/modeling_qwen3.py +4 -4
- transformers/models/qwen3_moe/modeling_qwen3_moe.py +4 -4
- transformers/models/qwen3_next/modeling_qwen3_next.py +4 -3
- transformers/models/qwen3_omni_moe/modeling_qwen3_omni_moe.py +21 -23
- transformers/models/qwen3_omni_moe/modular_qwen3_omni_moe.py +14 -16
- transformers/models/qwen3_vl/modeling_qwen3_vl.py +39 -37
- transformers/models/qwen3_vl/modular_qwen3_vl.py +37 -35
- transformers/models/qwen3_vl_moe/modeling_qwen3_vl_moe.py +39 -37
- transformers/models/qwen3_vl_moe/modular_qwen3_vl_moe.py +4 -1
- transformers/models/rag/modeling_rag.py +1 -0
- transformers/models/recurrent_gemma/modeling_recurrent_gemma.py +15 -1
- transformers/models/reformer/modeling_reformer.py +4 -0
- transformers/models/reformer/tokenization_reformer.py +11 -28
- transformers/models/regnet/modeling_regnet.py +6 -1
- transformers/models/rembert/modeling_rembert.py +6 -0
- transformers/models/rembert/tokenization_rembert.py +3 -10
- transformers/models/resnet/modeling_resnet.py +11 -2
- transformers/models/roberta/tokenization_roberta.py +18 -27
- transformers/models/roformer/modeling_roformer.py +6 -0
- transformers/models/roformer/tokenization_roformer.py +77 -412
- transformers/models/rt_detr/modeling_rt_detr.py +2 -0
- transformers/models/rt_detr/modeling_rt_detr_resnet.py +5 -1
- transformers/models/rt_detr_v2/modeling_rt_detr_v2.py +2 -0
- transformers/models/rwkv/modeling_rwkv.py +1 -0
- transformers/models/sam2/modeling_sam2.py +2 -2
- transformers/models/sam2/modular_sam2.py +2 -2
- transformers/models/sam2_video/modeling_sam2_video.py +1 -0
- transformers/models/sam2_video/modular_sam2_video.py +1 -0
- transformers/models/sam3/modeling_sam3.py +77 -80
- transformers/models/sam3_tracker/modeling_sam3_tracker.py +6 -1
- transformers/models/sam3_tracker/modular_sam3_tracker.py +6 -1
- transformers/models/sam3_tracker_video/modeling_sam3_tracker_video.py +1 -0
- transformers/models/sam3_video/modeling_sam3_video.py +1 -0
- transformers/models/seamless_m4t/modeling_seamless_m4t.py +5 -1
- transformers/models/seamless_m4t/tokenization_seamless_m4t.py +27 -59
- transformers/models/seamless_m4t_v2/modeling_seamless_m4t_v2.py +5 -1
- transformers/models/seed_oss/modeling_seed_oss.py +2 -2
- transformers/models/segformer/modeling_segformer.py +4 -1
- transformers/models/seggpt/modeling_seggpt.py +2 -0
- transformers/models/sew/modeling_sew.py +3 -0
- transformers/models/sew/modular_sew.py +1 -0
- transformers/models/sew_d/modeling_sew_d.py +3 -0
- transformers/models/siglip2/modeling_siglip2.py +4 -0
- transformers/models/siglip2/modular_siglip2.py +4 -0
- transformers/models/smollm3/modeling_smollm3.py +4 -4
- transformers/models/smolvlm/processing_smolvlm.py +0 -7
- transformers/models/speech_to_text/modeling_speech_to_text.py +4 -0
- transformers/models/speecht5/modeling_speecht5.py +13 -1
- transformers/models/splinter/modeling_splinter.py +3 -0
- transformers/models/splinter/tokenization_splinter.py +9 -28
- transformers/models/squeezebert/modeling_squeezebert.py +6 -0
- transformers/models/stablelm/modeling_stablelm.py +3 -1
- transformers/models/starcoder2/modeling_starcoder2.py +4 -3
- transformers/models/superglue/modeling_superglue.py +1 -0
- transformers/models/superpoint/modeling_superpoint.py +1 -0
- transformers/models/swiftformer/modeling_swiftformer.py +2 -0
- transformers/models/swin/modeling_swin.py +4 -0
- transformers/models/swin2sr/modeling_swin2sr.py +2 -0
- transformers/models/swinv2/modeling_swinv2.py +4 -0
- transformers/models/t5/modeling_t5.py +7 -0
- transformers/models/t5/tokenization_t5.py +4 -8
- transformers/models/t5gemma/modeling_t5gemma.py +5 -5
- transformers/models/t5gemma2/modeling_t5gemma2.py +6 -6
- transformers/models/table_transformer/modeling_table_transformer.py +4 -0
- transformers/models/tapas/modeling_tapas.py +3 -0
- transformers/models/textnet/modeling_textnet.py +11 -2
- transformers/models/time_series_transformer/modeling_time_series_transformer.py +4 -0
- transformers/models/timesfm/modeling_timesfm.py +2 -0
- transformers/models/timesfm/modular_timesfm.py +2 -0
- transformers/models/timesformer/modeling_timesformer.py +2 -0
- transformers/models/timm_wrapper/modeling_timm_wrapper.py +1 -1
- transformers/models/trocr/modeling_trocr.py +2 -0
- transformers/models/tvp/modeling_tvp.py +2 -0
- transformers/models/udop/modeling_udop.py +4 -0
- transformers/models/udop/tokenization_udop.py +5 -13
- transformers/models/umt5/modeling_umt5.py +7 -0
- transformers/models/unispeech/modeling_unispeech.py +4 -0
- transformers/models/unispeech/modular_unispeech.py +2 -0
- transformers/models/unispeech_sat/modeling_unispeech_sat.py +6 -0
- transformers/models/unispeech_sat/modular_unispeech_sat.py +2 -0
- transformers/models/univnet/modeling_univnet.py +1 -0
- transformers/models/upernet/modeling_upernet.py +1 -0
- transformers/models/vaultgemma/modeling_vaultgemma.py +4 -4
- transformers/models/vilt/modeling_vilt.py +6 -0
- transformers/models/vision_text_dual_encoder/modeling_vision_text_dual_encoder.py +1 -0
- transformers/models/visual_bert/modeling_visual_bert.py +6 -0
- transformers/models/vitdet/modeling_vitdet.py +2 -0
- transformers/models/vitmatte/modeling_vitmatte.py +1 -0
- transformers/models/vits/modeling_vits.py +1 -0
- transformers/models/vjepa2/modeling_vjepa2.py +1 -0
- transformers/models/wav2vec2/modeling_wav2vec2.py +7 -0
- transformers/models/wav2vec2_bert/modeling_wav2vec2_bert.py +5 -0
- transformers/models/wav2vec2_bert/modular_wav2vec2_bert.py +5 -0
- transformers/models/wav2vec2_conformer/modeling_wav2vec2_conformer.py +6 -0
- transformers/models/wavlm/modeling_wavlm.py +5 -0
- transformers/models/whisper/modeling_whisper.py +6 -0
- transformers/models/whisper/tokenization_whisper.py +4 -15
- transformers/models/x_clip/modeling_x_clip.py +3 -0
- transformers/models/xglm/modeling_xglm.py +1 -0
- transformers/models/xglm/tokenization_xglm.py +4 -9
- transformers/models/xlm/modeling_xlm.py +5 -0
- transformers/models/xlm_roberta/tokenization_xlm_roberta.py +9 -16
- transformers/models/xlnet/tokenization_xlnet.py +3 -7
- transformers/models/yoso/modeling_yoso.py +6 -0
- transformers/models/zamba/modeling_zamba.py +2 -0
- transformers/models/zamba2/modeling_zamba2.py +4 -2
- transformers/models/zamba2/modular_zamba2.py +1 -1
- transformers/models/zoedepth/modeling_zoedepth.py +1 -0
- transformers/pipelines/__init__.py +2 -3
- transformers/pipelines/base.py +1 -9
- transformers/pipelines/document_question_answering.py +3 -1
- transformers/pipelines/text_generation.py +1 -1
- transformers/processing_utils.py +23 -11
- transformers/quantizers/base.py +35 -110
- transformers/quantizers/quantizer_aqlm.py +1 -5
- transformers/quantizers/quantizer_auto_round.py +1 -2
- transformers/quantizers/quantizer_awq.py +17 -81
- transformers/quantizers/quantizer_bitnet.py +3 -8
- transformers/quantizers/quantizer_bnb_4bit.py +13 -110
- transformers/quantizers/quantizer_bnb_8bit.py +16 -92
- transformers/quantizers/quantizer_compressed_tensors.py +1 -5
- transformers/quantizers/quantizer_eetq.py +14 -62
- transformers/quantizers/quantizer_fbgemm_fp8.py +34 -125
- transformers/quantizers/quantizer_finegrained_fp8.py +13 -105
- transformers/quantizers/quantizer_fp_quant.py +48 -78
- transformers/quantizers/quantizer_gptq.py +7 -24
- transformers/quantizers/quantizer_higgs.py +40 -54
- transformers/quantizers/quantizer_hqq.py +144 -153
- transformers/quantizers/quantizer_mxfp4.py +13 -167
- transformers/quantizers/quantizer_quanto.py +20 -64
- transformers/quantizers/quantizer_quark.py +36 -17
- transformers/quantizers/quantizer_spqr.py +1 -4
- transformers/quantizers/quantizer_torchao.py +23 -202
- transformers/quantizers/quantizer_vptq.py +8 -22
- transformers/quantizers/quantizers_utils.py +20 -0
- transformers/testing_utils.py +297 -36
- transformers/tokenization_mistral_common.py +4 -0
- transformers/tokenization_utils_base.py +113 -222
- transformers/tokenization_utils_tokenizers.py +168 -107
- transformers/trainer.py +28 -31
- transformers/trainer_jit_checkpoint.py +126 -0
- transformers/trainer_utils.py +1 -1
- transformers/training_args.py +66 -28
- transformers/utils/__init__.py +3 -4
- transformers/utils/auto_docstring.py +1 -0
- transformers/utils/generic.py +27 -1
- transformers/utils/hub.py +5 -15
- transformers/utils/import_utils.py +61 -16
- transformers/utils/kernel_config.py +4 -2
- transformers/utils/loading_report.py +19 -10
- transformers/utils/quantization_config.py +75 -242
- transformers/video_processing_utils.py +1 -2
- {transformers-5.0.0rc0.dist-info → transformers-5.0.0rc1.dist-info}/METADATA +274 -227
- {transformers-5.0.0rc0.dist-info → transformers-5.0.0rc1.dist-info}/RECORD +536 -520
- {transformers-5.0.0rc0.dist-info → transformers-5.0.0rc1.dist-info}/WHEEL +1 -1
- transformers/kernels/__init__.py +0 -0
- transformers/kernels/falcon_mamba/selective_scan_with_ln_interface.py +0 -529
- transformers/models/roformer/tokenization_roformer_fast.py +0 -160
- {transformers-5.0.0rc0.dist-info → transformers-5.0.0rc1.dist-info}/entry_points.txt +0 -0
- {transformers-5.0.0rc0.dist-info → transformers-5.0.0rc1.dist-info/licenses}/LICENSE +0 -0
- {transformers-5.0.0rc0.dist-info → transformers-5.0.0rc1.dist-info}/top_level.txt +0 -0
|
@@ -76,7 +76,7 @@ GGUF_CONFIG_MAPPING = {
|
|
|
76
76
|
"attention.layer_norm_rms_epsilon": "rms_norm_eps",
|
|
77
77
|
"vocab_size": "vocab_size",
|
|
78
78
|
},
|
|
79
|
-
"
|
|
79
|
+
"qwen2_moe": {
|
|
80
80
|
"context_length": "max_position_embeddings",
|
|
81
81
|
"block_count": "num_hidden_layers",
|
|
82
82
|
"feed_forward_length": "intermediate_size",
|
|
@@ -313,6 +313,16 @@ GGUF_TOKENIZER_MAPPING = {
|
|
|
313
313
|
},
|
|
314
314
|
}
|
|
315
315
|
|
|
316
|
+
# We only need to set here the parameters that default to different values between transformers and llamacpp.
|
|
317
|
+
GGUF_CONFIG_DEFAULTS_MAPPING = {
|
|
318
|
+
"qwen3_moe": {
|
|
319
|
+
# NOTE: Qwen3MoeConfig defaults to false but llama.cpp needs this to be true.
|
|
320
|
+
# See: https://github.com/ggml-org/llama.cpp/blob/17f7f4baad8b3a716ee139da7bb56ae984e8c0fa/src/models/qwen3moe.cpp#L85-L96
|
|
321
|
+
# (the parameter right after LLM_FFN_SILU corresponds to norm_topk_prob)
|
|
322
|
+
"norm_topk_prob": True,
|
|
323
|
+
},
|
|
324
|
+
}
|
|
325
|
+
|
|
316
326
|
|
|
317
327
|
def _gguf_parse_value(_value, data_type):
|
|
318
328
|
if not isinstance(data_type, list):
|
|
@@ -15,17 +15,16 @@
|
|
|
15
15
|
|
|
16
16
|
from math import sqrt
|
|
17
17
|
|
|
18
|
-
from ..
|
|
19
|
-
|
|
20
|
-
is_hadamard_available,
|
|
21
|
-
is_torch_available,
|
|
22
|
-
)
|
|
18
|
+
from ..quantizers.quantizers_utils import should_convert_module
|
|
19
|
+
from ..utils import is_accelerate_available, is_flute_available, is_hadamard_available, is_torch_available, logging
|
|
23
20
|
|
|
24
21
|
|
|
22
|
+
if is_accelerate_available():
|
|
23
|
+
from accelerate import init_empty_weights
|
|
24
|
+
|
|
25
25
|
if is_torch_available():
|
|
26
26
|
import torch
|
|
27
|
-
|
|
28
|
-
|
|
27
|
+
import torch.nn as nn
|
|
29
28
|
|
|
30
29
|
if is_flute_available():
|
|
31
30
|
from flute.integrations.higgs import prepare_data_transposed
|
|
@@ -34,6 +33,8 @@ if is_flute_available():
|
|
|
34
33
|
if is_hadamard_available():
|
|
35
34
|
from fast_hadamard_transform import hadamard_transform
|
|
36
35
|
|
|
36
|
+
logger = logging.get_logger(__name__)
|
|
37
|
+
|
|
37
38
|
|
|
38
39
|
def pad_to_block(tensor, dims, had_block_size, value=0):
|
|
39
40
|
pad_dims = [0 for _ in range(2 * len(tensor.shape))]
|
|
@@ -549,70 +550,47 @@ class HiggsLinear(torch.nn.Module):
|
|
|
549
550
|
)
|
|
550
551
|
|
|
551
552
|
|
|
552
|
-
def replace_with_higgs_linear(
|
|
553
|
-
model,
|
|
554
|
-
quantization_config=None,
|
|
555
|
-
current_key_name=None,
|
|
556
|
-
has_been_replaced=False,
|
|
557
|
-
modules_to_not_convert=None,
|
|
558
|
-
):
|
|
553
|
+
def replace_with_higgs_linear(model, modules_to_not_convert: list[str] | None = None, quantization_config=None):
|
|
559
554
|
"""
|
|
560
|
-
Public method that
|
|
561
|
-
`accelerate` is needed to use this method. Returns the converted model and a boolean that indicates if the
|
|
562
|
-
conversion has been successful or not.
|
|
555
|
+
Public method that replaces the Linear layers of the given model with HIGGS quantized layers.
|
|
563
556
|
|
|
564
557
|
Args:
|
|
565
558
|
model (`torch.nn.Module`):
|
|
566
559
|
The model to convert, can be any `torch.nn.Module` instance.
|
|
560
|
+
modules_to_not_convert (`list[str]`, *optional*, defaults to `None`):
|
|
561
|
+
A list of nn.Linear weights to not convert. If a parameter path is in the list (e.g. `lm_head.weight`), the corresponding module will not be
|
|
562
|
+
converted.
|
|
567
563
|
quantization_config (`HiggsConfig`):
|
|
568
564
|
The quantization config object that contains the quantization parameters.
|
|
569
|
-
current_key_name (`list`, *optional*):
|
|
570
|
-
A list that contains the current key name. This is used for recursion and should not be passed by the user.
|
|
571
|
-
has_been_replaced (`bool`, *optional*):
|
|
572
|
-
A boolean that indicates if the conversion has been successful or not. This is used for recursion and
|
|
573
|
-
should not be passed by the user.
|
|
574
565
|
"""
|
|
575
566
|
|
|
576
|
-
|
|
577
|
-
|
|
578
|
-
for
|
|
579
|
-
if
|
|
580
|
-
|
|
581
|
-
|
|
582
|
-
|
|
583
|
-
|
|
584
|
-
|
|
585
|
-
|
|
586
|
-
|
|
587
|
-
|
|
588
|
-
|
|
589
|
-
|
|
590
|
-
|
|
591
|
-
|
|
592
|
-
|
|
593
|
-
|
|
594
|
-
|
|
595
|
-
|
|
596
|
-
|
|
597
|
-
|
|
598
|
-
|
|
599
|
-
|
|
600
|
-
|
|
601
|
-
|
|
602
|
-
|
|
603
|
-
# Force requires grad to False to avoid unexpected errors
|
|
604
|
-
model._modules[name].requires_grad_(False)
|
|
605
|
-
if len(list(module.children())) > 0:
|
|
606
|
-
_, has_been_replaced = replace_with_higgs_linear(
|
|
607
|
-
module,
|
|
608
|
-
quantization_config=quantization_config,
|
|
609
|
-
current_key_name=current_key_name,
|
|
610
|
-
has_been_replaced=has_been_replaced,
|
|
611
|
-
modules_to_not_convert=modules_to_not_convert,
|
|
612
|
-
)
|
|
613
|
-
# Remove the last key for recursion
|
|
614
|
-
current_key_name.pop(-1)
|
|
615
|
-
return model, has_been_replaced
|
|
567
|
+
has_been_replaced = False
|
|
568
|
+
# we need this to correctly materialize the weights during quantization
|
|
569
|
+
for module_name, module in model.named_modules():
|
|
570
|
+
if not should_convert_module(module_name, modules_to_not_convert):
|
|
571
|
+
continue
|
|
572
|
+
with init_empty_weights():
|
|
573
|
+
if isinstance(module, nn.Linear):
|
|
574
|
+
new_module = HiggsLinear(
|
|
575
|
+
module.in_features,
|
|
576
|
+
module.out_features,
|
|
577
|
+
bias=module.bias is not None,
|
|
578
|
+
num_bits=quantization_config.bits,
|
|
579
|
+
hadamard_size=quantization_config.hadamard_size,
|
|
580
|
+
group_size=quantization_config.group_size,
|
|
581
|
+
)
|
|
582
|
+
new_module.source_cls = type(module)
|
|
583
|
+
new_module.requires_grad_(False)
|
|
584
|
+
model.set_submodule(module_name, new_module)
|
|
585
|
+
has_been_replaced = True
|
|
586
|
+
|
|
587
|
+
if not has_been_replaced:
|
|
588
|
+
logger.warning(
|
|
589
|
+
"You are loading your model using eetq but no linear modules were found in your model."
|
|
590
|
+
" Please double check your model architecture, or submit an issue on github if you think this is"
|
|
591
|
+
" a bug."
|
|
592
|
+
)
|
|
593
|
+
return model
|
|
616
594
|
|
|
617
595
|
|
|
618
596
|
def dequantize_higgs(model, current_key_name=None):
|
|
@@ -78,7 +78,7 @@ try:
|
|
|
78
78
|
)
|
|
79
79
|
return lambda func: func
|
|
80
80
|
|
|
81
|
-
_KERNEL_MAPPING: dict[str, dict[Device | str, LayerRepository]] = {
|
|
81
|
+
_KERNEL_MAPPING: dict[str, dict[Device | str, LayerRepository | dict[Mode, LayerRepository]]] = {
|
|
82
82
|
"MultiScaleDeformableAttention": {
|
|
83
83
|
"cuda": LayerRepository(
|
|
84
84
|
repo_id="kernels-community/deformable-detr",
|
|
@@ -111,6 +111,12 @@ try:
|
|
|
111
111
|
layer_name="RMSNorm",
|
|
112
112
|
)
|
|
113
113
|
},
|
|
114
|
+
"mps": {
|
|
115
|
+
Mode.INFERENCE: LayerRepository(
|
|
116
|
+
repo_id="kernels-community/mlx_rmsnorm",
|
|
117
|
+
layer_name="RMSNorm",
|
|
118
|
+
)
|
|
119
|
+
},
|
|
114
120
|
"npu": {
|
|
115
121
|
Mode.INFERENCE: LayerRepository(
|
|
116
122
|
repo_id="kernels-community/liger_kernels",
|
|
@@ -253,6 +259,8 @@ except ImportError:
|
|
|
253
259
|
|
|
254
260
|
_HUB_KERNEL_MAPPING: dict[str, dict[str, str]] = {
|
|
255
261
|
"causal-conv1d": {"repo_id": "kernels-community/causal-conv1d"},
|
|
262
|
+
"mamba-ssm": {"repo_id": "kernels-community/mamba-ssm", "revision": "v0.0.4"},
|
|
263
|
+
"falcon_mamba-ssm": {"repo_id": "kernels-community/mamba-ssm", "revision": "v0.0.4"},
|
|
256
264
|
}
|
|
257
265
|
|
|
258
266
|
_KERNEL_MODULE_MAPPING: dict[str, ModuleType | None] = {}
|
|
@@ -328,7 +336,7 @@ def lazy_load_kernel(kernel_name: str, mapping: dict[str, ModuleType | None] = _
|
|
|
328
336
|
if kernel_name in mapping and isinstance(mapping[kernel_name], ModuleType):
|
|
329
337
|
return mapping[kernel_name]
|
|
330
338
|
if kernel_name not in _HUB_KERNEL_MAPPING:
|
|
331
|
-
logger.
|
|
339
|
+
logger.warning_once(f"Kernel {kernel_name} not found in _HUB_KERNEL_MAPPING")
|
|
332
340
|
mapping[kernel_name] = None
|
|
333
341
|
return None
|
|
334
342
|
if _kernels_available:
|
|
@@ -336,11 +344,15 @@ def lazy_load_kernel(kernel_name: str, mapping: dict[str, ModuleType | None] = _
|
|
|
336
344
|
|
|
337
345
|
try:
|
|
338
346
|
repo_id = _HUB_KERNEL_MAPPING[kernel_name]["repo_id"]
|
|
347
|
+
revision = _HUB_KERNEL_MAPPING[kernel_name].get("revision", None)
|
|
339
348
|
version = _HUB_KERNEL_MAPPING[kernel_name].get("version", None)
|
|
340
|
-
kernel = get_kernel(repo_id, version=version)
|
|
349
|
+
kernel = get_kernel(repo_id, revision=revision, version=version)
|
|
341
350
|
mapping[kernel_name] = kernel
|
|
342
351
|
except FileNotFoundError:
|
|
343
352
|
mapping[kernel_name] = None
|
|
353
|
+
except AssertionError:
|
|
354
|
+
# Happens when torch is built without an accelerator backend; fall back to slow path.
|
|
355
|
+
mapping[kernel_name] = None
|
|
344
356
|
|
|
345
357
|
else:
|
|
346
358
|
# Try to import is_{kernel_name}_available from ..utils
|
|
@@ -369,6 +381,32 @@ def lazy_load_kernel(kernel_name: str, mapping: dict[str, ModuleType | None] = _
|
|
|
369
381
|
return mapping[kernel_name]
|
|
370
382
|
|
|
371
383
|
|
|
384
|
+
def use_kernelized_func(module_names: list[Callable] | Callable):
|
|
385
|
+
"""
|
|
386
|
+
This decorator attaches the target function as an attribute of the module.
|
|
387
|
+
The function must already be decorated with @use_kernel_func_from_hub
|
|
388
|
+
this decorator then wraps it as an nn.Module internally.
|
|
389
|
+
When kernelize is later applied to the full model, the function can be accessed as a regular module attribute and kernelized just like any other layer.
|
|
390
|
+
The kernelization is performed in place, modifying the module directly.
|
|
391
|
+
"""
|
|
392
|
+
if isinstance(module_names, Callable):
|
|
393
|
+
module_names = [module_names]
|
|
394
|
+
|
|
395
|
+
def decorator(cls):
|
|
396
|
+
orig_init = cls.__init__
|
|
397
|
+
|
|
398
|
+
def new_init(self, *args, **kwargs):
|
|
399
|
+
orig_init(self, *args, **kwargs)
|
|
400
|
+
for fn in module_names:
|
|
401
|
+
# we hardcode the name of the function to "rotary_fn" for now
|
|
402
|
+
setattr(self, "rotary_fn", fn)
|
|
403
|
+
|
|
404
|
+
cls.__init__ = new_init
|
|
405
|
+
return cls
|
|
406
|
+
|
|
407
|
+
return decorator
|
|
408
|
+
|
|
409
|
+
|
|
372
410
|
__all__ = [
|
|
373
411
|
"LayerRepository",
|
|
374
412
|
"use_kernel_forward_from_hub",
|
|
@@ -377,4 +415,5 @@ __all__ = [
|
|
|
377
415
|
"register_kernel_mapping_transformers",
|
|
378
416
|
"replace_kernel_forward_from_hub",
|
|
379
417
|
"lazy_load_kernel",
|
|
418
|
+
"use_kernelized_func",
|
|
380
419
|
]
|
|
@@ -26,6 +26,7 @@ import re
|
|
|
26
26
|
import shutil
|
|
27
27
|
import sys
|
|
28
28
|
import tempfile
|
|
29
|
+
import warnings
|
|
29
30
|
from dataclasses import fields
|
|
30
31
|
from enum import Enum
|
|
31
32
|
from pathlib import Path
|
|
@@ -1455,6 +1456,10 @@ class NeptuneMissingConfiguration(Exception):
|
|
|
1455
1456
|
class NeptuneCallback(TrainerCallback):
|
|
1456
1457
|
"""TrainerCallback that sends the logs to [Neptune](https://app.neptune.ai).
|
|
1457
1458
|
|
|
1459
|
+
> [!WARNING]
|
|
1460
|
+
> Neptune integration is deprecated and will be removed in a future version of Transformers. We recommend using
|
|
1461
|
+
> other supported experiment tracking integrations.
|
|
1462
|
+
|
|
1458
1463
|
Args:
|
|
1459
1464
|
api_token (`str`, *optional*): Neptune API token obtained upon registration.
|
|
1460
1465
|
You can leave this argument out if you have saved your token to the `NEPTUNE_API_TOKEN` environment
|
|
@@ -1500,6 +1505,11 @@ class NeptuneCallback(TrainerCallback):
|
|
|
1500
1505
|
log_checkpoints: str | None = None,
|
|
1501
1506
|
**neptune_run_kwargs,
|
|
1502
1507
|
):
|
|
1508
|
+
warnings.warn(
|
|
1509
|
+
"The NeptuneCallback is deprecated and will be removed in a future version of Transformers. We recommend "
|
|
1510
|
+
"using other supported experiment tracking integrations.",
|
|
1511
|
+
FutureWarning,
|
|
1512
|
+
)
|
|
1503
1513
|
if not is_neptune_available():
|
|
1504
1514
|
raise ValueError(
|
|
1505
1515
|
"NeptuneCallback requires the Neptune client library to be installed. "
|
|
@@ -26,10 +26,9 @@ from ..core_model_loading import ConversionOps
|
|
|
26
26
|
if is_accelerate_available():
|
|
27
27
|
from accelerate import init_empty_weights
|
|
28
28
|
|
|
29
|
-
import re
|
|
30
29
|
from contextlib import contextmanager
|
|
31
30
|
|
|
32
|
-
from ..quantizers.quantizers_utils import get_module_from_name
|
|
31
|
+
from ..quantizers.quantizers_utils import get_module_from_name, should_convert_module
|
|
33
32
|
|
|
34
33
|
|
|
35
34
|
logger = logging.get_logger(__name__)
|
|
@@ -436,15 +435,6 @@ def mlp_forward(self, hidden_states):
|
|
|
436
435
|
return routed_out, router_logits
|
|
437
436
|
|
|
438
437
|
|
|
439
|
-
def should_convert_module(current_key_name, patterns):
|
|
440
|
-
current_key_name_str = ".".join(current_key_name)
|
|
441
|
-
if not any(
|
|
442
|
-
re.match(f"{key}\\.", current_key_name_str) or re.match(f"{key}", current_key_name_str) for key in patterns
|
|
443
|
-
):
|
|
444
|
-
return True
|
|
445
|
-
return False
|
|
446
|
-
|
|
447
|
-
|
|
448
438
|
def dequantize(module, param_name, param_value, target_device, dq_param_name, **kwargs):
|
|
449
439
|
from ..integrations.tensor_parallel import shard_and_distribute_module
|
|
450
440
|
|
|
@@ -604,70 +594,40 @@ def swizzle_mxfp4_convertops(blocks, scales, module, proj, target_device, triton
|
|
|
604
594
|
)
|
|
605
595
|
|
|
606
596
|
|
|
607
|
-
def
|
|
608
|
-
|
|
609
|
-
|
|
610
|
-
|
|
611
|
-
|
|
612
|
-
|
|
613
|
-
|
|
614
|
-
):
|
|
615
|
-
|
|
616
|
-
|
|
597
|
+
def replace_with_mxfp4_linear(model, quantization_config=None, modules_to_not_convert: list[str] | None = None):
|
|
598
|
+
"""
|
|
599
|
+
Public method that replaces the expert layers of the given model with mxfp4 quantized layers.
|
|
600
|
+
|
|
601
|
+
Args:
|
|
602
|
+
model (`torch.nn.Module`):
|
|
603
|
+
The model to convert, can be any `torch.nn.Module` instance.
|
|
604
|
+
quantization_config (`Mxfp4Config`, defaults to `None`):
|
|
605
|
+
The quantization config object that contains the quantization parameters.
|
|
606
|
+
modules_to_not_convert (`list`, *optional*, defaults to `None`):
|
|
607
|
+
A list of modules to not convert. If a module name is in the list (e.g. `lm_head`), it will not be
|
|
608
|
+
converted.
|
|
609
|
+
"""
|
|
610
|
+
if quantization_config.dequantize:
|
|
611
|
+
return model
|
|
612
|
+
|
|
613
|
+
from kernels import get_kernel
|
|
617
614
|
|
|
618
|
-
|
|
619
|
-
|
|
620
|
-
|
|
621
|
-
|
|
615
|
+
global triton_kernels_hub
|
|
616
|
+
triton_kernels_hub = get_kernel("kernels-community/triton_kernels")
|
|
617
|
+
|
|
618
|
+
has_been_replaced = False
|
|
619
|
+
for module_name, module in model.named_modules():
|
|
620
|
+
if not should_convert_module(module_name, modules_to_not_convert):
|
|
622
621
|
continue
|
|
623
622
|
if module.__class__.__name__ == "GptOssExperts" and not quantization_config.dequantize:
|
|
624
623
|
with init_empty_weights():
|
|
625
|
-
model.
|
|
624
|
+
model.set_submodule(module_name, Mxfp4GptOssExperts(model.config))
|
|
626
625
|
has_been_replaced = True
|
|
627
626
|
if module.__class__.__name__ == "GptOssMLP" and not quantization_config.dequantize:
|
|
628
627
|
from types import MethodType
|
|
629
628
|
|
|
630
629
|
module.forward = MethodType(mlp_forward, module)
|
|
631
|
-
if len(list(module.children())) > 0:
|
|
632
|
-
_, has_been_replaced = _replace_with_mxfp4_linear(
|
|
633
|
-
module,
|
|
634
|
-
modules_to_not_convert,
|
|
635
|
-
current_key_name,
|
|
636
|
-
quantization_config,
|
|
637
|
-
has_been_replaced=has_been_replaced,
|
|
638
|
-
config=config,
|
|
639
|
-
)
|
|
640
|
-
current_key_name.pop(-1)
|
|
641
|
-
return model, has_been_replaced
|
|
642
630
|
|
|
643
|
-
|
|
644
|
-
def replace_with_mxfp4_linear(
|
|
645
|
-
model,
|
|
646
|
-
modules_to_not_convert=None,
|
|
647
|
-
current_key_name=None,
|
|
648
|
-
quantization_config=None,
|
|
649
|
-
config=None,
|
|
650
|
-
):
|
|
651
|
-
if quantization_config.dequantize:
|
|
652
|
-
return model
|
|
653
|
-
else:
|
|
654
|
-
from kernels import get_kernel
|
|
655
|
-
|
|
656
|
-
global triton_kernels_hub
|
|
657
|
-
triton_kernels_hub = get_kernel("kernels-community/triton_kernels")
|
|
658
|
-
|
|
659
|
-
modules_to_not_convert = ["lm_head"] if modules_to_not_convert is None else modules_to_not_convert
|
|
660
|
-
|
|
661
|
-
if quantization_config.modules_to_not_convert is not None:
|
|
662
|
-
modules_to_not_convert.extend(quantization_config.modules_to_not_convert)
|
|
663
|
-
modules_to_not_convert = list(set(modules_to_not_convert))
|
|
664
|
-
model, has_been_replaced = _replace_with_mxfp4_linear(
|
|
665
|
-
model,
|
|
666
|
-
modules_to_not_convert,
|
|
667
|
-
current_key_name,
|
|
668
|
-
quantization_config,
|
|
669
|
-
config=config,
|
|
670
|
-
)
|
|
671
631
|
if not has_been_replaced:
|
|
672
632
|
logger.warning(
|
|
673
633
|
"You are loading your model using mixed-precision FP4 quantization but no linear modules were found in your model."
|
|
@@ -17,6 +17,7 @@ import json
|
|
|
17
17
|
import os
|
|
18
18
|
from typing import Any, Literal
|
|
19
19
|
|
|
20
|
+
from ..conversion_mapping import get_model_conversion_mapping
|
|
20
21
|
from ..core_model_loading import WeightRenaming, rename_source_key
|
|
21
22
|
from ..utils import (
|
|
22
23
|
CONFIG_NAME,
|
|
@@ -46,26 +47,6 @@ MIN_PEFT_VERSION = "0.18.0"
|
|
|
46
47
|
logger = logging.get_logger(__name__)
|
|
47
48
|
|
|
48
49
|
|
|
49
|
-
# DO NOT MODIFY, KEPT FOR BC ONLY
|
|
50
|
-
VLMS = [
|
|
51
|
-
"aria",
|
|
52
|
-
"ayavision",
|
|
53
|
-
"emu3",
|
|
54
|
-
"fuyu",
|
|
55
|
-
"gotocr2",
|
|
56
|
-
"gemma3",
|
|
57
|
-
"internvl",
|
|
58
|
-
"llava", # all llava prefixed models fall under this check
|
|
59
|
-
"mistral3",
|
|
60
|
-
"mllama",
|
|
61
|
-
"paligemma",
|
|
62
|
-
"qwen2vl",
|
|
63
|
-
"qwen2_5_vl",
|
|
64
|
-
"videollava",
|
|
65
|
-
"vipllava",
|
|
66
|
-
]
|
|
67
|
-
|
|
68
|
-
|
|
69
50
|
class PeftAdapterMixin:
|
|
70
51
|
"""
|
|
71
52
|
A class containing all functions for loading and using adapters weights that are supported in PEFT library. For
|
|
@@ -211,11 +192,10 @@ class PeftAdapterMixin:
|
|
|
211
192
|
if any(conf.peft_type != PeftType.LORA for conf in self.peft_config.values()):
|
|
212
193
|
raise ValueError("Hotswapping is currently only supported for LoRA, please set `hotswap=False`.")
|
|
213
194
|
|
|
195
|
+
key_mapping = adapter_kwargs.pop("key_mapping", None) if adapter_kwargs is not None else None
|
|
196
|
+
weight_conversions = get_model_conversion_mapping(self, key_mapping=key_mapping)
|
|
214
197
|
# peft only supports low_cpu_mem_usage starting from v0.13.0
|
|
215
198
|
peft_load_kwargs = {}
|
|
216
|
-
key_mapping = adapter_kwargs.pop("key_mapping", None) if adapter_kwargs is not None else None
|
|
217
|
-
if key_mapping is None and any(allowed_name in self.__class__.__name__.lower() for allowed_name in VLMS):
|
|
218
|
-
key_mapping = self._checkpoint_conversion_mapping
|
|
219
199
|
peft_load_kwargs["low_cpu_mem_usage"] = low_cpu_mem_usage
|
|
220
200
|
|
|
221
201
|
adapter_name = adapter_name if adapter_name is not None else "default"
|
|
@@ -279,9 +259,6 @@ class PeftAdapterMixin:
|
|
|
279
259
|
)
|
|
280
260
|
peft_config.inference_mode = not is_trainable
|
|
281
261
|
|
|
282
|
-
if peft_config.peft_type != PeftType.LORA:
|
|
283
|
-
raise ValueError("Hotswapping is currently only supported for LoRA, please set `hotswap=False`.")
|
|
284
|
-
|
|
285
262
|
if not hotswap:
|
|
286
263
|
# TODO: WE NEED TOO APPLY OUR DYNAMIC WEIGHT CONVERSION AT SOME POINT HERE!
|
|
287
264
|
# Create and add fresh new adapters into the model, unless the weights are hotswapped
|
|
@@ -295,17 +272,18 @@ class PeftAdapterMixin:
|
|
|
295
272
|
|
|
296
273
|
# We need to pre-process the state dict to remove unneeded prefixes - for backward compatibility
|
|
297
274
|
renamings = []
|
|
298
|
-
if
|
|
299
|
-
renamings = [entry for entry in
|
|
275
|
+
if weight_conversions:
|
|
276
|
+
renamings = [entry for entry in weight_conversions if isinstance(entry, WeightRenaming)]
|
|
300
277
|
processed_adapter_state_dict = {}
|
|
301
278
|
prefix = "base_model.model."
|
|
279
|
+
state_dict = self.state_dict()
|
|
302
280
|
for key, value in adapter_state_dict.items():
|
|
303
281
|
if key.startswith(prefix):
|
|
304
282
|
new_key = key[len(prefix) :]
|
|
305
283
|
else:
|
|
306
284
|
new_key = key
|
|
307
285
|
|
|
308
|
-
new_key = rename_source_key(new_key, renamings, [])[0]
|
|
286
|
+
new_key = rename_source_key(new_key, renamings, [], self.base_model_prefix, state_dict)[0]
|
|
309
287
|
|
|
310
288
|
# For hotswapping, we need the adapter name to be present in the state dict keys
|
|
311
289
|
if hotswap:
|
|
@@ -12,21 +12,53 @@
|
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
|
-
from ..
|
|
15
|
+
from ..core_model_loading import ConversionOps
|
|
16
|
+
from ..quantizers.quantizers_utils import get_module_from_name, should_convert_module
|
|
17
|
+
from ..utils import is_torch_available, logging
|
|
16
18
|
|
|
17
19
|
|
|
18
20
|
if is_torch_available():
|
|
19
21
|
import torch
|
|
22
|
+
import torch.nn as nn
|
|
20
23
|
|
|
21
24
|
logger = logging.get_logger(__name__)
|
|
22
25
|
|
|
23
26
|
|
|
27
|
+
class QuantoQuantize(ConversionOps):
|
|
28
|
+
def __init__(self, hf_quantizer):
|
|
29
|
+
self.hf_quantizer = hf_quantizer
|
|
30
|
+
|
|
31
|
+
def convert(
|
|
32
|
+
self,
|
|
33
|
+
input_dict: dict[str, list[torch.Tensor]],
|
|
34
|
+
model: torch.nn.Module | None = None,
|
|
35
|
+
full_layer_name: str | None = None,
|
|
36
|
+
missing_keys: list[str] | None = None,
|
|
37
|
+
**kwargs,
|
|
38
|
+
) -> dict[str, torch.Tensor]:
|
|
39
|
+
_, value = tuple(input_dict.items())[0]
|
|
40
|
+
value = value[0]
|
|
41
|
+
|
|
42
|
+
from ..modeling_utils import _load_parameter_into_model
|
|
43
|
+
|
|
44
|
+
_load_parameter_into_model(model, full_layer_name, value)
|
|
45
|
+
module, _ = get_module_from_name(model, full_layer_name)
|
|
46
|
+
module.freeze()
|
|
47
|
+
module.weight.requires_grad = False
|
|
48
|
+
module._is_hf_initialized = True
|
|
49
|
+
|
|
50
|
+
# need to discard some missing keys we already updated the module in freeze.
|
|
51
|
+
module_name = full_layer_name.rsplit(".", 1)[0]
|
|
52
|
+
missing_keys.discard(f"{module_name}.weight")
|
|
53
|
+
missing_keys.discard(f"{module_name}.input_scale")
|
|
54
|
+
missing_keys.discard(f"{module_name}.output_scale")
|
|
55
|
+
return {}
|
|
56
|
+
|
|
57
|
+
|
|
24
58
|
def replace_with_quanto_layers(
|
|
25
59
|
model,
|
|
26
60
|
quantization_config=None,
|
|
27
|
-
modules_to_not_convert=None,
|
|
28
|
-
current_key_name=None,
|
|
29
|
-
has_been_replaced=False,
|
|
61
|
+
modules_to_not_convert: list[str] | None = None,
|
|
30
62
|
):
|
|
31
63
|
"""
|
|
32
64
|
Public method that recursively replaces the Linear layers of the given model with Quanto quantized layers.
|
|
@@ -35,64 +67,50 @@ def replace_with_quanto_layers(
|
|
|
35
67
|
Args:
|
|
36
68
|
model (`torch.nn.Module`):
|
|
37
69
|
The model to convert, can be any `torch.nn.Module` instance.
|
|
38
|
-
quantization_config (`
|
|
70
|
+
quantization_config (`QuantoConfig`, defaults to `None`):
|
|
39
71
|
The quantization config object that contains the quantization parameters.
|
|
40
72
|
modules_to_not_convert (`list`, *optional*, defaults to `None`):
|
|
41
73
|
A list of modules to not convert. If a module name is in the list (e.g. `lm_head`), it will not be
|
|
42
74
|
converted.
|
|
43
|
-
current_key_name (`list`, *optional*, defaults to `None`):
|
|
44
|
-
A list that contains the current key name. This is used for recursion and should not be passed by the user.
|
|
45
|
-
has_been_replaced (`bool`, *optional*, defaults to `None`):
|
|
46
|
-
A boolean that indicates if the conversion has been successful or not. This is used for recursion and
|
|
47
|
-
should not be passed by the user.
|
|
48
75
|
"""
|
|
49
76
|
from accelerate import init_empty_weights
|
|
50
|
-
|
|
51
|
-
if is_optimum_quanto_available():
|
|
52
|
-
from optimum.quanto import QLayerNorm, QLinear, qfloat8, qint2, qint4, qint8
|
|
77
|
+
from optimum.quanto import QLayerNorm, QLinear, qfloat8, qint2, qint4, qint8
|
|
53
78
|
|
|
54
79
|
w_mapping = {"float8": qfloat8, "int8": qint8, "int4": qint4, "int2": qint2}
|
|
55
80
|
a_mapping = {None: None, "float8": qfloat8, "int8": qint8}
|
|
56
81
|
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
modules_to_not_convert=modules_to_not_convert,
|
|
93
|
-
current_key_name=current_key_name,
|
|
94
|
-
has_been_replaced=has_been_replaced,
|
|
95
|
-
)
|
|
96
|
-
# Remove the last key for recursion
|
|
97
|
-
current_key_name.pop(-1)
|
|
98
|
-
return model, has_been_replaced
|
|
82
|
+
has_been_replaced = False
|
|
83
|
+
for module_name, module in model.named_modules():
|
|
84
|
+
if not should_convert_module(module_name, modules_to_not_convert):
|
|
85
|
+
continue
|
|
86
|
+
with init_empty_weights():
|
|
87
|
+
new_module = None
|
|
88
|
+
if isinstance(module, nn.Linear):
|
|
89
|
+
new_module = QLinear(
|
|
90
|
+
in_features=module.in_features,
|
|
91
|
+
out_features=module.out_features,
|
|
92
|
+
bias=module.bias is not None,
|
|
93
|
+
dtype=module.weight.dtype,
|
|
94
|
+
weights=w_mapping[quantization_config.weights],
|
|
95
|
+
activations=a_mapping[quantization_config.activations],
|
|
96
|
+
)
|
|
97
|
+
elif isinstance(module, torch.nn.LayerNorm) and quantization_config.activations is not None:
|
|
98
|
+
new_module = QLayerNorm(
|
|
99
|
+
module.normalized_shape,
|
|
100
|
+
module.eps,
|
|
101
|
+
module.elementwise_affine,
|
|
102
|
+
module.bias is not None,
|
|
103
|
+
activations=a_mapping[quantization_config.activations],
|
|
104
|
+
)
|
|
105
|
+
if new_module is not None:
|
|
106
|
+
has_been_replaced = True
|
|
107
|
+
model.set_submodule(module_name, new_module)
|
|
108
|
+
|
|
109
|
+
if not has_been_replaced:
|
|
110
|
+
logger.warning(
|
|
111
|
+
"You are loading your model using quanto but no linear modules were found in your model."
|
|
112
|
+
" Please double check your model architecture, or submit an issue on github if you think this is"
|
|
113
|
+
" a bug."
|
|
114
|
+
)
|
|
115
|
+
|
|
116
|
+
return model
|