transformers 5.0.0rc1__py3-none-any.whl → 5.0.0rc2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- transformers/__init__.py +20 -1
- transformers/activations.py +1 -1
- transformers/audio_utils.py +0 -1
- transformers/cache_utils.py +17 -15
- transformers/configuration_utils.py +114 -70
- transformers/conversion_mapping.py +68 -5
- transformers/core_model_loading.py +201 -35
- transformers/dependency_versions_table.py +1 -1
- transformers/feature_extraction_utils.py +54 -22
- transformers/generation/candidate_generator.py +79 -31
- transformers/generation/configuration_utils.py +162 -122
- transformers/generation/continuous_batching/cache.py +47 -18
- transformers/generation/continuous_batching/cache_manager.py +131 -34
- transformers/generation/continuous_batching/continuous_api.py +101 -64
- transformers/generation/continuous_batching/requests.py +28 -1
- transformers/generation/continuous_batching/scheduler.py +11 -4
- transformers/generation/stopping_criteria.py +1 -1
- transformers/generation/utils.py +108 -110
- transformers/generation/watermarking.py +8 -5
- transformers/image_processing_base.py +2 -12
- transformers/image_processing_utils_fast.py +15 -4
- transformers/initialization.py +37 -0
- transformers/integrations/__init__.py +12 -0
- transformers/integrations/accelerate.py +44 -111
- transformers/integrations/aqlm.py +3 -5
- transformers/integrations/awq.py +2 -5
- transformers/integrations/bitnet.py +5 -8
- transformers/integrations/bitsandbytes.py +16 -15
- transformers/integrations/deepspeed.py +18 -3
- transformers/integrations/eetq.py +3 -5
- transformers/integrations/fbgemm_fp8.py +1 -1
- transformers/integrations/finegrained_fp8.py +6 -16
- transformers/integrations/flash_attention.py +2 -2
- transformers/integrations/higgs.py +2 -5
- transformers/integrations/hub_kernels.py +23 -5
- transformers/integrations/integration_utils.py +35 -0
- transformers/integrations/mistral.py +12 -0
- transformers/integrations/moe.py +240 -0
- transformers/integrations/mxfp4.py +4 -10
- transformers/integrations/peft.py +5 -0
- transformers/integrations/quanto.py +5 -2
- transformers/integrations/spqr.py +3 -5
- transformers/integrations/tensor_parallel.py +167 -221
- transformers/integrations/vptq.py +3 -5
- transformers/modeling_gguf_pytorch_utils.py +66 -19
- transformers/modeling_rope_utils.py +78 -81
- transformers/modeling_utils.py +583 -503
- transformers/models/__init__.py +19 -0
- transformers/models/afmoe/modeling_afmoe.py +7 -16
- transformers/models/afmoe/modular_afmoe.py +5 -13
- transformers/models/aimv2/modeling_aimv2.py +4 -0
- transformers/models/aimv2/modular_aimv2.py +4 -0
- transformers/models/albert/modeling_albert.py +3 -0
- transformers/models/align/modeling_align.py +12 -6
- transformers/models/altclip/modeling_altclip.py +7 -3
- transformers/models/apertus/modeling_apertus.py +4 -2
- transformers/models/apertus/modular_apertus.py +4 -1
- transformers/models/arcee/modeling_arcee.py +1 -1
- transformers/models/aria/modeling_aria.py +8 -4
- transformers/models/aria/modular_aria.py +7 -3
- transformers/models/audioflamingo3/processing_audioflamingo3.py +27 -22
- transformers/models/auto/auto_factory.py +1 -1
- transformers/models/auto/configuration_auto.py +27 -0
- transformers/models/auto/feature_extraction_auto.py +7 -3
- transformers/models/auto/image_processing_auto.py +4 -2
- transformers/models/auto/modeling_auto.py +31 -0
- transformers/models/auto/processing_auto.py +4 -0
- transformers/models/auto/tokenization_auto.py +132 -153
- transformers/models/auto/video_processing_auto.py +5 -2
- transformers/models/aya_vision/modeling_aya_vision.py +7 -3
- transformers/models/bamba/modeling_bamba.py +18 -19
- transformers/models/bamba/modular_bamba.py +17 -16
- transformers/models/bark/modeling_bark.py +9 -0
- transformers/models/bart/configuration_bart.py +0 -1
- transformers/models/bart/modeling_bart.py +7 -0
- transformers/models/beit/image_processing_beit_fast.py +0 -1
- transformers/models/bert/modeling_bert.py +3 -0
- transformers/models/bert_generation/modeling_bert_generation.py +2 -0
- transformers/models/big_bird/modeling_big_bird.py +3 -0
- transformers/models/bigbird_pegasus/modeling_bigbird_pegasus.py +7 -0
- transformers/models/bit/modeling_bit.py +5 -1
- transformers/models/bitnet/modeling_bitnet.py +1 -1
- transformers/models/blenderbot/modeling_blenderbot.py +7 -0
- transformers/models/blenderbot/tokenization_blenderbot.py +6 -7
- transformers/models/blenderbot_small/modeling_blenderbot_small.py +7 -0
- transformers/models/blip/modeling_blip.py +2 -0
- transformers/models/blip/modeling_blip_text.py +8 -0
- transformers/models/blip_2/modeling_blip_2.py +2 -0
- transformers/models/bloom/modeling_bloom.py +13 -44
- transformers/models/blt/modeling_blt.py +162 -2
- transformers/models/blt/modular_blt.py +168 -3
- transformers/models/bridgetower/image_processing_bridgetower_fast.py +0 -2
- transformers/models/bridgetower/modeling_bridgetower.py +6 -0
- transformers/models/bros/modeling_bros.py +8 -0
- transformers/models/camembert/modeling_camembert.py +109 -106
- transformers/models/canine/modeling_canine.py +6 -0
- transformers/models/canine/tokenization_canine.py +2 -0
- transformers/models/chameleon/modeling_chameleon.py +9 -4
- transformers/models/chinese_clip/modeling_chinese_clip.py +6 -3
- transformers/models/clap/feature_extraction_clap.py +2 -2
- transformers/models/clap/modeling_clap.py +25 -15
- transformers/models/clip/modeling_clip.py +2 -0
- transformers/models/clipseg/modeling_clipseg.py +4 -0
- transformers/models/clvp/modeling_clvp.py +14 -3
- transformers/models/code_llama/tokenization_code_llama.py +1 -1
- transformers/models/codegen/modeling_codegen.py +13 -4
- transformers/models/cohere/modeling_cohere.py +1 -1
- transformers/models/cohere2/modeling_cohere2.py +1 -1
- transformers/models/cohere2_vision/image_processing_cohere2_vision_fast.py +0 -1
- transformers/models/cohere2_vision/modeling_cohere2_vision.py +7 -3
- transformers/models/conditional_detr/configuration_conditional_detr.py +1 -1
- transformers/models/conditional_detr/modeling_conditional_detr.py +4 -1
- transformers/models/convbert/modeling_convbert.py +3 -0
- transformers/models/convnext/image_processing_convnext.py +2 -2
- transformers/models/convnext/image_processing_convnext_fast.py +9 -13
- transformers/models/csm/generation_csm.py +19 -22
- transformers/models/csm/modeling_csm.py +3 -1
- transformers/models/csm/modular_csm.py +2 -0
- transformers/models/ctrl/modeling_ctrl.py +14 -2
- transformers/models/cvt/modeling_cvt.py +5 -1
- transformers/models/cwm/modeling_cwm.py +1 -1
- transformers/models/d_fine/configuration_d_fine.py +3 -4
- transformers/models/d_fine/modeling_d_fine.py +46 -39
- transformers/models/d_fine/modular_d_fine.py +15 -4
- transformers/models/dab_detr/configuration_dab_detr.py +2 -2
- transformers/models/dab_detr/modeling_dab_detr.py +1 -1
- transformers/models/dac/modeling_dac.py +4 -4
- transformers/models/data2vec/modeling_data2vec_text.py +7 -0
- transformers/models/data2vec/modular_data2vec_text.py +7 -0
- transformers/models/dbrx/configuration_dbrx.py +9 -1
- transformers/models/dbrx/modeling_dbrx.py +1 -1
- transformers/models/deberta/modeling_deberta.py +2 -0
- transformers/models/deberta_v2/modeling_deberta_v2.py +2 -0
- transformers/models/decision_transformer/modeling_decision_transformer.py +8 -5
- transformers/models/deepseek_v2/modeling_deepseek_v2.py +7 -4
- transformers/models/deepseek_v2/modular_deepseek_v2.py +4 -2
- transformers/models/deepseek_v3/modeling_deepseek_v3.py +9 -5
- transformers/models/deepseek_v3/modular_deepseek_v3.py +6 -2
- transformers/models/deepseek_vl/image_processing_deepseek_vl_fast.py +0 -1
- transformers/models/deepseek_vl/modeling_deepseek_vl.py +9 -5
- transformers/models/deepseek_vl/modular_deepseek_vl.py +3 -0
- transformers/models/deepseek_vl_hybrid/image_processing_deepseek_vl_hybrid_fast.py +0 -4
- transformers/models/deepseek_vl_hybrid/modeling_deepseek_vl_hybrid.py +9 -5
- transformers/models/deepseek_vl_hybrid/modular_deepseek_vl_hybrid.py +9 -9
- transformers/models/deformable_detr/configuration_deformable_detr.py +2 -2
- transformers/models/deformable_detr/modeling_deformable_detr.py +1 -1
- transformers/models/depth_anything/configuration_depth_anything.py +2 -3
- transformers/models/depth_pro/image_processing_depth_pro_fast.py +0 -1
- transformers/models/detr/configuration_detr.py +1 -1
- transformers/models/detr/modeling_detr.py +8 -1
- transformers/models/dia/generation_dia.py +3 -10
- transformers/models/dia/modeling_dia.py +12 -1
- transformers/models/dia/modular_dia.py +11 -0
- transformers/models/dia/processing_dia.py +1 -1
- transformers/models/diffllama/modeling_diffllama.py +3 -3
- transformers/models/diffllama/modular_diffllama.py +2 -2
- transformers/models/dinov3_vit/image_processing_dinov3_vit_fast.py +0 -1
- transformers/models/dinov3_vit/modeling_dinov3_vit.py +3 -0
- transformers/models/dinov3_vit/modular_dinov3_vit.py +3 -0
- transformers/models/distilbert/modeling_distilbert.py +11 -9
- transformers/models/doge/modeling_doge.py +1 -1
- transformers/models/donut/image_processing_donut_fast.py +0 -1
- transformers/models/donut/modeling_donut_swin.py +16 -12
- transformers/models/dots1/modeling_dots1.py +14 -5
- transformers/models/dpt/configuration_dpt.py +1 -1
- transformers/models/dpt/image_processing_dpt_fast.py +1 -2
- transformers/models/dpt/modular_dpt.py +1 -2
- transformers/models/edgetam/configuration_edgetam.py +1 -1
- transformers/models/edgetam/modeling_edgetam.py +5 -2
- transformers/models/edgetam/modular_edgetam.py +15 -14
- transformers/models/edgetam_video/modeling_edgetam_video.py +55 -43
- transformers/models/edgetam_video/modular_edgetam_video.py +13 -19
- transformers/models/efficientloftr/image_processing_efficientloftr_fast.py +1 -2
- transformers/models/efficientloftr/modeling_efficientloftr.py +14 -1
- transformers/models/efficientnet/image_processing_efficientnet.py +5 -6
- transformers/models/efficientnet/image_processing_efficientnet_fast.py +1 -2
- transformers/models/efficientnet/modeling_efficientnet.py +5 -1
- transformers/models/electra/modeling_electra.py +7 -0
- transformers/models/emu3/modeling_emu3.py +8 -2
- transformers/models/emu3/modular_emu3.py +7 -1
- transformers/models/encodec/modeling_encodec.py +14 -0
- transformers/models/eomt/image_processing_eomt_fast.py +46 -14
- transformers/models/eomt/modeling_eomt.py +7 -0
- transformers/models/eomt/modular_eomt.py +7 -0
- transformers/models/ernie/modeling_ernie.py +6 -0
- transformers/models/ernie/modular_ernie.py +6 -0
- transformers/models/ernie4_5/modeling_ernie4_5.py +1 -1
- transformers/models/ernie4_5_moe/modeling_ernie4_5_moe.py +16 -13
- transformers/models/ernie4_5_moe/modular_ernie4_5_moe.py +9 -35
- transformers/models/ernie4_5_vl_moe/__init__.py +31 -0
- transformers/models/ernie4_5_vl_moe/configuration_ernie4_5_vl_moe.py +330 -0
- transformers/models/ernie4_5_vl_moe/image_processing_ernie4_5_vl_moe.py +456 -0
- transformers/models/ernie4_5_vl_moe/image_processing_ernie4_5_vl_moe_fast.py +232 -0
- transformers/models/ernie4_5_vl_moe/modeling_ernie4_5_vl_moe.py +1898 -0
- transformers/models/ernie4_5_vl_moe/modular_ernie4_5_vl_moe.py +1904 -0
- transformers/models/ernie4_5_vl_moe/processing_ernie4_5_vl_moe.py +251 -0
- transformers/models/ernie4_5_vl_moe/video_processing_ernie4_5_vl_moe.py +594 -0
- transformers/models/esm/modeling_esm.py +6 -0
- transformers/models/esm/modeling_esmfold.py +6 -1
- transformers/models/evolla/modeling_evolla.py +9 -1
- transformers/models/evolla/modular_evolla.py +8 -0
- transformers/models/exaone4/modeling_exaone4.py +1 -1
- transformers/models/falcon/modeling_falcon.py +3 -3
- transformers/models/falcon_h1/modeling_falcon_h1.py +28 -23
- transformers/models/falcon_h1/modular_falcon_h1.py +7 -2
- transformers/models/falcon_mamba/modeling_falcon_mamba.py +6 -2
- transformers/models/falcon_mamba/modular_falcon_mamba.py +7 -2
- transformers/models/fast_vlm/modeling_fast_vlm.py +7 -3
- transformers/models/fastspeech2_conformer/modeling_fastspeech2_conformer.py +23 -10
- transformers/models/fastspeech2_conformer/tokenization_fastspeech2_conformer.py +1 -0
- transformers/models/flaubert/modeling_flaubert.py +14 -15
- transformers/models/flava/image_processing_flava_fast.py +0 -2
- transformers/models/flava/modeling_flava.py +4 -1
- transformers/models/flex_olmo/modeling_flex_olmo.py +7 -4
- transformers/models/florence2/modeling_florence2.py +20 -3
- transformers/models/florence2/modular_florence2.py +13 -0
- transformers/models/fnet/modeling_fnet.py +7 -0
- transformers/models/fuyu/image_processing_fuyu.py +1 -1
- transformers/models/fuyu/modeling_fuyu.py +3 -1
- transformers/models/fuyu/processing_fuyu.py +16 -0
- transformers/models/gemma/modeling_gemma.py +10 -12
- transformers/models/gemma/modular_gemma.py +9 -11
- transformers/models/gemma2/modeling_gemma2.py +1 -1
- transformers/models/gemma2/modular_gemma2.py +1 -1
- transformers/models/gemma3/image_processing_gemma3_fast.py +0 -1
- transformers/models/gemma3/modeling_gemma3.py +28 -7
- transformers/models/gemma3/modular_gemma3.py +26 -6
- transformers/models/gemma3n/configuration_gemma3n.py +3 -0
- transformers/models/gemma3n/modeling_gemma3n.py +47 -9
- transformers/models/gemma3n/modular_gemma3n.py +51 -9
- transformers/models/git/modeling_git.py +181 -126
- transformers/models/glm/modeling_glm.py +1 -1
- transformers/models/glm4/modeling_glm4.py +1 -1
- transformers/models/glm46v/image_processing_glm46v.py +0 -4
- transformers/models/glm46v/modeling_glm46v.py +3 -1
- transformers/models/glm46v/modular_glm46v.py +3 -0
- transformers/models/glm4_moe/modeling_glm4_moe.py +9 -5
- transformers/models/glm4_moe/modular_glm4_moe.py +1 -1
- transformers/models/glm4v/image_processing_glm4v.py +0 -4
- transformers/models/glm4v/modeling_glm4v.py +15 -5
- transformers/models/glm4v/modular_glm4v.py +11 -3
- transformers/models/glm4v_moe/modeling_glm4v_moe.py +39 -23
- transformers/models/glm4v_moe/modular_glm4v_moe.py +12 -0
- transformers/models/glmasr/__init__.py +30 -0
- transformers/models/glmasr/configuration_glmasr.py +197 -0
- transformers/models/glmasr/modeling_glmasr.py +512 -0
- transformers/models/glmasr/modular_glmasr.py +433 -0
- transformers/models/glmasr/processing_glmasr.py +332 -0
- transformers/models/glpn/image_processing_glpn_fast.py +0 -1
- transformers/models/got_ocr2/image_processing_got_ocr2_fast.py +0 -1
- transformers/models/got_ocr2/modeling_got_ocr2.py +8 -3
- transformers/models/gpt2/modeling_gpt2.py +8 -5
- transformers/models/gpt_bigcode/modeling_gpt_bigcode.py +3 -8
- transformers/models/gpt_neo/modeling_gpt_neo.py +15 -3
- transformers/models/gpt_neox/modeling_gpt_neox.py +1 -1
- transformers/models/gpt_neox_japanese/modeling_gpt_neox_japanese.py +1 -1
- transformers/models/gpt_oss/configuration_gpt_oss.py +17 -0
- transformers/models/gpt_oss/modeling_gpt_oss.py +6 -9
- transformers/models/gpt_oss/modular_gpt_oss.py +5 -7
- transformers/models/gptj/modeling_gptj.py +15 -6
- transformers/models/granite/modeling_granite.py +1 -1
- transformers/models/granite_speech/modeling_granite_speech.py +15 -1
- transformers/models/granitemoe/modeling_granitemoe.py +2 -3
- transformers/models/granitemoe/modular_granitemoe.py +1 -2
- transformers/models/granitemoehybrid/configuration_granitemoehybrid.py +4 -0
- transformers/models/granitemoehybrid/modeling_granitemoehybrid.py +33 -23
- transformers/models/granitemoehybrid/modular_granitemoehybrid.py +12 -2
- transformers/models/granitemoeshared/modeling_granitemoeshared.py +2 -3
- transformers/models/grounding_dino/configuration_grounding_dino.py +2 -3
- transformers/models/grounding_dino/modeling_grounding_dino.py +4 -4
- transformers/models/groupvit/modeling_groupvit.py +6 -1
- transformers/models/helium/modeling_helium.py +1 -1
- transformers/models/hgnet_v2/modeling_hgnet_v2.py +10 -0
- transformers/models/hgnet_v2/modular_hgnet_v2.py +10 -0
- transformers/models/hubert/modeling_hubert.py +4 -0
- transformers/models/hubert/modular_hubert.py +4 -0
- transformers/models/hunyuan_v1_dense/modeling_hunyuan_v1_dense.py +1 -1
- transformers/models/hunyuan_v1_dense/modular_hunyuan_v1_dense.py +1 -1
- transformers/models/hunyuan_v1_moe/__init__.py +1 -1
- transformers/models/hunyuan_v1_moe/modeling_hunyuan_v1_moe.py +12 -4
- transformers/models/hunyuan_v1_moe/modular_hunyuan_v1_moe.py +4 -2
- transformers/models/ibert/modeling_ibert.py +16 -0
- transformers/models/idefics/modeling_idefics.py +10 -0
- transformers/models/idefics2/modeling_idefics2.py +7 -1
- transformers/models/idefics3/modeling_idefics3.py +5 -1
- transformers/models/imagegpt/image_processing_imagegpt_fast.py +1 -5
- transformers/models/imagegpt/modeling_imagegpt.py +9 -2
- transformers/models/instructblip/modeling_instructblip.py +2 -0
- transformers/models/instructblipvideo/modeling_instructblipvideo.py +52 -50
- transformers/models/instructblipvideo/video_processing_instructblipvideo.py +0 -1
- transformers/models/internvl/modeling_internvl.py +11 -8
- transformers/models/internvl/modular_internvl.py +5 -9
- transformers/models/internvl/video_processing_internvl.py +0 -1
- transformers/models/jais2/__init__.py +27 -0
- transformers/models/jais2/configuration_jais2.py +152 -0
- transformers/models/jais2/modeling_jais2.py +486 -0
- transformers/models/jais2/modular_jais2.py +196 -0
- transformers/models/jamba/modeling_jamba.py +24 -19
- transformers/models/jamba/modular_jamba.py +17 -17
- transformers/models/janus/image_processing_janus_fast.py +0 -1
- transformers/models/janus/modeling_janus.py +15 -7
- transformers/models/janus/modular_janus.py +16 -7
- transformers/models/jetmoe/modeling_jetmoe.py +2 -2
- transformers/models/jetmoe/modular_jetmoe.py +1 -0
- transformers/models/kosmos2/modeling_kosmos2.py +14 -2
- transformers/models/kosmos2_5/image_processing_kosmos2_5_fast.py +2 -2
- transformers/models/kosmos2_5/modeling_kosmos2_5.py +10 -1
- transformers/models/kyutai_speech_to_text/modeling_kyutai_speech_to_text.py +9 -3
- transformers/models/kyutai_speech_to_text/modular_kyutai_speech_to_text.py +9 -1
- transformers/models/lasr/configuration_lasr.py +4 -0
- transformers/models/lasr/modeling_lasr.py +3 -2
- transformers/models/lasr/modular_lasr.py +8 -1
- transformers/models/lasr/processing_lasr.py +0 -2
- transformers/models/layoutlm/modeling_layoutlm.py +5 -3
- transformers/models/layoutlmv2/image_processing_layoutlmv2_fast.py +0 -1
- transformers/models/layoutlmv2/modeling_layoutlmv2.py +12 -0
- transformers/models/layoutlmv2/tokenization_layoutlmv2.py +1 -0
- transformers/models/layoutlmv3/image_processing_layoutlmv3_fast.py +0 -1
- transformers/models/layoutlmv3/modeling_layoutlmv3.py +29 -5
- transformers/models/led/modeling_led.py +6 -0
- transformers/models/levit/modeling_levit.py +18 -0
- transformers/models/lfm2/modeling_lfm2.py +1 -1
- transformers/models/lfm2_moe/modeling_lfm2_moe.py +14 -4
- transformers/models/lfm2_moe/modular_lfm2_moe.py +5 -28
- transformers/models/lfm2_vl/configuration_lfm2_vl.py +4 -0
- transformers/models/lfm2_vl/modeling_lfm2_vl.py +11 -5
- transformers/models/lfm2_vl/modular_lfm2_vl.py +4 -2
- transformers/models/lfm2_vl/processing_lfm2_vl.py +82 -42
- transformers/models/lightglue/image_processing_lightglue_fast.py +1 -2
- transformers/models/lilt/modeling_lilt.py +19 -15
- transformers/models/llama/modeling_llama.py +1 -1
- transformers/models/llama4/image_processing_llama4_fast.py +1 -2
- transformers/models/llama4/modeling_llama4.py +8 -4
- transformers/models/llava/image_processing_llava_fast.py +0 -1
- transformers/models/llava/modeling_llava.py +12 -7
- transformers/models/llava_next/image_processing_llava_next_fast.py +0 -1
- transformers/models/llava_next/modeling_llava_next.py +7 -3
- transformers/models/llava_next_video/modeling_llava_next_video.py +7 -3
- transformers/models/llava_next_video/modular_llava_next_video.py +7 -3
- transformers/models/llava_onevision/image_processing_llava_onevision_fast.py +0 -1
- transformers/models/llava_onevision/modeling_llava_onevision.py +7 -3
- transformers/models/llava_onevision/modular_llava_onevision.py +7 -4
- transformers/models/longcat_flash/modeling_longcat_flash.py +2 -1
- transformers/models/longcat_flash/modular_longcat_flash.py +1 -0
- transformers/models/longt5/modeling_longt5.py +0 -4
- transformers/models/m2m_100/modeling_m2m_100.py +10 -0
- transformers/models/mamba/modeling_mamba.py +2 -1
- transformers/models/mamba2/modeling_mamba2.py +24 -23
- transformers/models/marian/configuration_marian.py +1 -1
- transformers/models/marian/modeling_marian.py +3 -0
- transformers/models/markuplm/modeling_markuplm.py +5 -8
- transformers/models/mask2former/configuration_mask2former.py +3 -3
- transformers/models/mask2former/image_processing_mask2former_fast.py +1 -4
- transformers/models/mask2former/modeling_mask2former.py +9 -0
- transformers/models/maskformer/configuration_maskformer.py +3 -3
- transformers/models/maskformer/image_processing_maskformer_fast.py +1 -4
- transformers/models/maskformer/modeling_maskformer.py +9 -1
- transformers/models/maskformer/modeling_maskformer_swin.py +19 -15
- transformers/models/mbart/configuration_mbart.py +1 -0
- transformers/models/mbart/modeling_mbart.py +7 -0
- transformers/models/megatron_bert/modeling_megatron_bert.py +2 -0
- transformers/models/metaclip_2/modeling_metaclip_2.py +2 -0
- transformers/models/metaclip_2/modular_metaclip_2.py +2 -0
- transformers/models/mimi/modeling_mimi.py +25 -4
- transformers/models/minimax/modeling_minimax.py +16 -3
- transformers/models/minimax/modular_minimax.py +12 -1
- transformers/models/ministral/modeling_ministral.py +1 -1
- transformers/models/ministral3/modeling_ministral3.py +1 -1
- transformers/models/mistral/modeling_mistral.py +1 -1
- transformers/models/mistral3/modeling_mistral3.py +10 -4
- transformers/models/mistral3/modular_mistral3.py +3 -1
- transformers/models/mixtral/modeling_mixtral.py +12 -4
- transformers/models/mixtral/modular_mixtral.py +6 -2
- transformers/models/mlcd/modeling_mlcd.py +6 -0
- transformers/models/mlcd/modular_mlcd.py +4 -0
- transformers/models/mllama/modeling_mllama.py +13 -2
- transformers/models/mm_grounding_dino/configuration_mm_grounding_dino.py +1 -2
- transformers/models/mm_grounding_dino/modeling_mm_grounding_dino.py +4 -4
- transformers/models/mm_grounding_dino/modular_mm_grounding_dino.py +1 -2
- transformers/models/mobilebert/modeling_mobilebert.py +2 -0
- transformers/models/mobilenet_v2/image_processing_mobilenet_v2_fast.py +0 -1
- transformers/models/mobilevit/image_processing_mobilevit.py +5 -5
- transformers/models/mobilevit/image_processing_mobilevit_fast.py +1 -2
- transformers/models/mobilevit/modeling_mobilevit.py +4 -0
- transformers/models/mobilevitv2/modeling_mobilevitv2.py +4 -0
- transformers/models/modernbert/modeling_modernbert.py +12 -1
- transformers/models/modernbert/modular_modernbert.py +12 -1
- transformers/models/modernbert_decoder/modeling_modernbert_decoder.py +9 -1
- transformers/models/modernbert_decoder/modular_modernbert_decoder.py +9 -1
- transformers/models/moonshine/modeling_moonshine.py +1 -1
- transformers/models/moshi/modeling_moshi.py +21 -51
- transformers/models/mpnet/modeling_mpnet.py +2 -0
- transformers/models/mra/modeling_mra.py +4 -1
- transformers/models/mt5/configuration_mt5.py +2 -3
- transformers/models/mt5/modeling_mt5.py +0 -10
- transformers/models/musicgen/modeling_musicgen.py +5 -9
- transformers/models/musicgen_melody/modeling_musicgen_melody.py +4 -0
- transformers/models/mvp/modeling_mvp.py +7 -0
- transformers/models/nanochat/modeling_nanochat.py +1 -1
- transformers/models/nemotron/modeling_nemotron.py +3 -3
- transformers/models/nllb_moe/configuration_nllb_moe.py +1 -0
- transformers/models/nllb_moe/modeling_nllb_moe.py +10 -0
- transformers/models/nougat/image_processing_nougat_fast.py +0 -1
- transformers/models/nougat/tokenization_nougat.py +11 -16
- transformers/models/nystromformer/modeling_nystromformer.py +7 -0
- transformers/models/olmo/modeling_olmo.py +1 -1
- transformers/models/olmo2/modeling_olmo2.py +1 -1
- transformers/models/olmo3/modeling_olmo3.py +1 -1
- transformers/models/olmoe/modeling_olmoe.py +12 -4
- transformers/models/olmoe/modular_olmoe.py +4 -2
- transformers/models/omdet_turbo/configuration_omdet_turbo.py +2 -2
- transformers/models/omdet_turbo/modeling_omdet_turbo.py +4 -0
- transformers/models/oneformer/configuration_oneformer.py +3 -3
- transformers/models/oneformer/modeling_oneformer.py +7 -38
- transformers/models/openai/modeling_openai.py +12 -0
- transformers/models/ovis2/image_processing_ovis2_fast.py +0 -1
- transformers/models/ovis2/modeling_ovis2.py +15 -3
- transformers/models/ovis2/modular_ovis2.py +8 -0
- transformers/models/owlv2/image_processing_owlv2_fast.py +0 -2
- transformers/models/owlv2/modeling_owlv2.py +7 -3
- transformers/models/owlv2/modular_owlv2.py +0 -2
- transformers/models/owlvit/modeling_owlvit.py +7 -3
- transformers/models/paddleocr_vl/image_processing_paddleocr_vl.py +3 -2
- transformers/models/paddleocr_vl/modeling_paddleocr_vl.py +28 -14
- transformers/models/paddleocr_vl/modular_paddleocr_vl.py +22 -12
- transformers/models/paligemma/modeling_paligemma.py +25 -17
- transformers/models/parakeet/modeling_parakeet.py +5 -0
- transformers/models/parakeet/modular_parakeet.py +5 -0
- transformers/models/parakeet/{tokenization_parakeet_fast.py → tokenization_parakeet.py} +3 -3
- transformers/models/patchtsmixer/modeling_patchtsmixer.py +4 -0
- transformers/models/patchtst/modeling_patchtst.py +5 -4
- transformers/models/pe_audio/__init__.py +30 -0
- transformers/models/pe_audio/configuration_pe_audio.py +206 -0
- transformers/models/pe_audio/feature_extraction_pe_audio.py +162 -0
- transformers/models/pe_audio/modeling_pe_audio.py +820 -0
- transformers/models/pe_audio/modular_pe_audio.py +299 -0
- transformers/models/pe_audio/processing_pe_audio.py +24 -0
- transformers/models/pe_audio_video/__init__.py +29 -0
- transformers/models/pe_audio_video/configuration_pe_audio_video.py +225 -0
- transformers/models/pe_audio_video/modeling_pe_audio_video.py +972 -0
- transformers/models/pe_audio_video/modular_pe_audio_video.py +764 -0
- transformers/models/pe_audio_video/processing_pe_audio_video.py +25 -0
- transformers/models/pe_video/__init__.py +30 -0
- transformers/models/pe_video/configuration_pe_video.py +211 -0
- transformers/models/pe_video/modeling_pe_video.py +636 -0
- transformers/models/pe_video/modular_pe_video.py +219 -0
- transformers/models/pe_video/processing_pe_video.py +10 -0
- transformers/models/pe_video/video_processing_pe_video.py +66 -0
- transformers/models/pegasus/configuration_pegasus.py +1 -0
- transformers/models/pegasus/modeling_pegasus.py +3 -0
- transformers/models/pegasus_x/modeling_pegasus_x.py +1 -0
- transformers/models/perceiver/image_processing_perceiver_fast.py +0 -1
- transformers/models/perceiver/modeling_perceiver.py +5 -1
- transformers/models/perception_lm/image_processing_perception_lm_fast.py +0 -1
- transformers/models/perception_lm/modeling_perception_lm.py +7 -3
- transformers/models/perception_lm/modular_perception_lm.py +7 -3
- transformers/models/persimmon/modeling_persimmon.py +1 -1
- transformers/models/phi/modeling_phi.py +1 -1
- transformers/models/phi3/modeling_phi3.py +1 -1
- transformers/models/phi4_multimodal/modeling_phi4_multimodal.py +4 -1
- transformers/models/phi4_multimodal/modular_phi4_multimodal.py +3 -0
- transformers/models/phi4_multimodal/processing_phi4_multimodal.py +0 -2
- transformers/models/phimoe/modeling_phimoe.py +12 -4
- transformers/models/phimoe/modular_phimoe.py +1 -1
- transformers/models/pix2struct/processing_pix2struct.py +0 -4
- transformers/models/pixio/__init__.py +30 -0
- transformers/models/pixio/configuration_pixio.py +151 -0
- transformers/models/pixio/modeling_pixio.py +507 -0
- transformers/models/pixio/modular_pixio.py +404 -0
- transformers/models/pixtral/modeling_pixtral.py +1 -1
- transformers/models/pixtral/processing_pixtral.py +3 -1
- transformers/models/plbart/configuration_plbart.py +1 -0
- transformers/models/plbart/modeling_plbart.py +7 -0
- transformers/models/plbart/modular_plbart.py +6 -0
- transformers/models/poolformer/image_processing_poolformer_fast.py +0 -1
- transformers/models/poolformer/modeling_poolformer.py +11 -1
- transformers/models/pop2piano/configuration_pop2piano.py +0 -1
- transformers/models/prompt_depth_anything/configuration_prompt_depth_anything.py +2 -3
- transformers/models/prophetnet/modeling_prophetnet.py +2 -1
- transformers/models/qwen2/modeling_qwen2.py +1 -1
- transformers/models/qwen2_5_omni/modeling_qwen2_5_omni.py +104 -64
- transformers/models/qwen2_5_omni/modular_qwen2_5_omni.py +58 -18
- transformers/models/qwen2_5_vl/modeling_qwen2_5_vl.py +18 -5
- transformers/models/qwen2_5_vl/modular_qwen2_5_vl.py +26 -22
- transformers/models/qwen2_audio/modeling_qwen2_audio.py +2 -2
- transformers/models/qwen2_moe/modeling_qwen2_moe.py +12 -4
- transformers/models/qwen2_vl/image_processing_qwen2_vl.py +3 -2
- transformers/models/qwen2_vl/modeling_qwen2_vl.py +17 -4
- transformers/models/qwen3/modeling_qwen3.py +1 -1
- transformers/models/qwen3_moe/modeling_qwen3_moe.py +12 -4
- transformers/models/qwen3_next/modeling_qwen3_next.py +4 -6
- transformers/models/qwen3_omni_moe/configuration_qwen3_omni_moe.py +4 -0
- transformers/models/qwen3_omni_moe/modeling_qwen3_omni_moe.py +92 -46
- transformers/models/qwen3_omni_moe/modular_qwen3_omni_moe.py +48 -4
- transformers/models/qwen3_vl/configuration_qwen3_vl.py +5 -5
- transformers/models/qwen3_vl/modeling_qwen3_vl.py +17 -4
- transformers/models/qwen3_vl/modular_qwen3_vl.py +21 -10
- transformers/models/qwen3_vl/processing_qwen3_vl.py +3 -3
- transformers/models/qwen3_vl_moe/modeling_qwen3_vl_moe.py +94 -112
- transformers/models/qwen3_vl_moe/modular_qwen3_vl_moe.py +32 -81
- transformers/models/rag/configuration_rag.py +0 -8
- transformers/models/rag/modeling_rag.py +7 -9
- transformers/models/recurrent_gemma/modeling_recurrent_gemma.py +3 -2
- transformers/models/reformer/modeling_reformer.py +9 -1
- transformers/models/regnet/modeling_regnet.py +4 -0
- transformers/models/rembert/modeling_rembert.py +7 -1
- transformers/models/resnet/modeling_resnet.py +8 -3
- transformers/models/roberta/modeling_roberta.py +3 -0
- transformers/models/roberta/modular_roberta.py +3 -0
- transformers/models/roberta_prelayernorm/modeling_roberta_prelayernorm.py +3 -0
- transformers/models/roc_bert/modeling_roc_bert.py +3 -0
- transformers/models/rt_detr/configuration_rt_detr.py +1 -1
- transformers/models/rt_detr/modeling_rt_detr.py +4 -0
- transformers/models/rt_detr/modeling_rt_detr_resnet.py +8 -3
- transformers/models/rt_detr_v2/configuration_rt_detr_v2.py +2 -3
- transformers/models/rt_detr_v2/modeling_rt_detr_v2.py +7 -0
- transformers/models/rt_detr_v2/modular_rt_detr_v2.py +8 -3
- transformers/models/rwkv/modeling_rwkv.py +1 -1
- transformers/models/sam/configuration_sam.py +1 -0
- transformers/models/sam/image_processing_sam_fast.py +0 -1
- transformers/models/sam/modeling_sam.py +4 -1
- transformers/models/sam2/configuration_sam2.py +1 -1
- transformers/models/sam2/modeling_sam2.py +5 -1
- transformers/models/sam2/modular_sam2.py +5 -1
- transformers/models/sam2_video/modeling_sam2_video.py +51 -43
- transformers/models/sam2_video/modular_sam2_video.py +31 -18
- transformers/models/sam3/configuration_sam3.py +21 -1
- transformers/models/sam3/modeling_sam3.py +23 -0
- transformers/models/sam3_tracker/modeling_sam3_tracker.py +2 -0
- transformers/models/sam3_tracker/modular_sam3_tracker.py +2 -0
- transformers/models/sam3_tracker_video/configuration_sam3_tracker_video.py +25 -0
- transformers/models/sam3_tracker_video/modeling_sam3_tracker_video.py +26 -15
- transformers/models/sam3_tracker_video/modular_sam3_tracker_video.py +25 -2
- transformers/models/sam3_video/configuration_sam3_video.py +14 -0
- transformers/models/sam3_video/modeling_sam3_video.py +3 -3
- transformers/models/sam3_video/processing_sam3_video.py +1 -1
- transformers/models/sam_hq/configuration_sam_hq.py +1 -0
- transformers/models/sam_hq/modeling_sam_hq.py +26 -23
- transformers/models/seamless_m4t/modeling_seamless_m4t.py +27 -11
- transformers/models/seamless_m4t_v2/modeling_seamless_m4t_v2.py +6 -0
- transformers/models/seed_oss/modeling_seed_oss.py +1 -1
- transformers/models/segformer/image_processing_segformer_fast.py +0 -1
- transformers/models/segformer/modeling_segformer.py +2 -2
- transformers/models/segformer/modular_segformer.py +0 -1
- transformers/models/shieldgemma2/modeling_shieldgemma2.py +1 -0
- transformers/models/siglip/modeling_siglip.py +24 -2
- transformers/models/siglip2/modeling_siglip2.py +63 -41
- transformers/models/smollm3/modeling_smollm3.py +1 -1
- transformers/models/smolvlm/modeling_smolvlm.py +5 -1
- transformers/models/smolvlm/video_processing_smolvlm.py +0 -1
- transformers/models/speech_to_text/modeling_speech_to_text.py +10 -0
- transformers/models/speecht5/modeling_speecht5.py +28 -0
- transformers/models/splinter/modeling_splinter.py +9 -3
- transformers/models/squeezebert/modeling_squeezebert.py +2 -0
- transformers/models/stablelm/modeling_stablelm.py +1 -1
- transformers/models/starcoder2/modeling_starcoder2.py +1 -1
- transformers/models/superglue/image_processing_superglue_fast.py +1 -2
- transformers/models/superpoint/image_processing_superpoint_fast.py +1 -2
- transformers/models/swiftformer/modeling_swiftformer.py +4 -0
- transformers/models/swin/modeling_swin.py +16 -12
- transformers/models/swin2sr/image_processing_swin2sr_fast.py +0 -1
- transformers/models/swin2sr/modeling_swin2sr.py +49 -33
- transformers/models/swinv2/modeling_swinv2.py +41 -33
- transformers/models/switch_transformers/modeling_switch_transformers.py +2 -8
- transformers/models/switch_transformers/modular_switch_transformers.py +2 -8
- transformers/models/t5/configuration_t5.py +7 -1
- transformers/models/t5/modeling_t5.py +1 -7
- transformers/models/t5gemma/modeling_t5gemma.py +1 -1
- transformers/models/t5gemma2/configuration_t5gemma2.py +6 -42
- transformers/models/t5gemma2/modeling_t5gemma2.py +13 -4
- transformers/models/t5gemma2/modular_t5gemma2.py +289 -4
- transformers/models/table_transformer/configuration_table_transformer.py +1 -1
- transformers/models/table_transformer/modeling_table_transformer.py +1 -1
- transformers/models/textnet/image_processing_textnet_fast.py +0 -1
- transformers/models/timesfm/modeling_timesfm.py +12 -0
- transformers/models/timesfm/modular_timesfm.py +12 -0
- transformers/models/timm_backbone/modeling_timm_backbone.py +13 -9
- transformers/models/timm_wrapper/configuration_timm_wrapper.py +3 -0
- transformers/models/timm_wrapper/modeling_timm_wrapper.py +19 -13
- transformers/models/trocr/modeling_trocr.py +1 -2
- transformers/models/tvp/configuration_tvp.py +5 -1
- transformers/models/tvp/modeling_tvp.py +4 -4
- transformers/models/udop/configuration_udop.py +1 -0
- transformers/models/udop/modeling_udop.py +3 -7
- transformers/models/umt5/configuration_umt5.py +2 -2
- transformers/models/umt5/modeling_umt5.py +0 -6
- transformers/models/vaultgemma/modeling_vaultgemma.py +1 -1
- transformers/models/video_llama_3/image_processing_video_llama_3.py +3 -2
- transformers/models/video_llama_3/modeling_video_llama_3.py +12 -1
- transformers/models/video_llama_3/modular_video_llama_3.py +10 -1
- transformers/models/video_llava/modeling_video_llava.py +7 -3
- transformers/models/vilt/configuration_vilt.py +2 -2
- transformers/models/vilt/modeling_vilt.py +7 -0
- transformers/models/vipllava/modeling_vipllava.py +7 -3
- transformers/models/visual_bert/modeling_visual_bert.py +2 -0
- transformers/models/vitmatte/configuration_vitmatte.py +1 -1
- transformers/models/vitmatte/image_processing_vitmatte_fast.py +0 -1
- transformers/models/vitmatte/modeling_vitmatte.py +4 -0
- transformers/models/vitpose/configuration_vitpose.py +1 -1
- transformers/models/vitpose/image_processing_vitpose_fast.py +0 -1
- transformers/models/voxtral/modeling_voxtral.py +2 -2
- transformers/models/voxtral/modular_voxtral.py +2 -2
- transformers/models/wav2vec2_bert/modeling_wav2vec2_bert.py +16 -10
- transformers/models/wav2vec2_bert/modular_wav2vec2_bert.py +7 -0
- transformers/models/wav2vec2_conformer/modeling_wav2vec2_conformer.py +21 -11
- transformers/models/wav2vec2_conformer/modular_wav2vec2_conformer.py +21 -11
- transformers/models/whisper/generation_whisper.py +1 -0
- transformers/models/whisper/modeling_whisper.py +5 -3
- transformers/models/x_clip/modeling_x_clip.py +2 -0
- transformers/models/xcodec/modeling_xcodec.py +5 -0
- transformers/models/xglm/modeling_xglm.py +10 -0
- transformers/models/xlm/modeling_xlm.py +13 -14
- transformers/models/xlm_roberta/modeling_xlm_roberta.py +109 -106
- transformers/models/xlm_roberta_xl/modeling_xlm_roberta_xl.py +3 -0
- transformers/models/xlnet/modeling_xlnet.py +3 -1
- transformers/models/xmod/modeling_xmod.py +3 -0
- transformers/models/yoso/modeling_yoso.py +4 -1
- transformers/models/zamba/modeling_zamba.py +2 -1
- transformers/models/zamba2/modeling_zamba2.py +3 -2
- transformers/models/zoedepth/configuration_zoedepth.py +1 -1
- transformers/models/zoedepth/image_processing_zoedepth_fast.py +1 -3
- transformers/models/zoedepth/modeling_zoedepth.py +7 -0
- transformers/pipelines/__init__.py +9 -6
- transformers/pipelines/automatic_speech_recognition.py +20 -12
- transformers/pipelines/base.py +1 -1
- transformers/pipelines/document_question_answering.py +1 -1
- transformers/pipelines/question_answering.py +1 -1
- transformers/pipelines/text_to_audio.py +2 -2
- transformers/processing_utils.py +127 -56
- transformers/quantizers/auto.py +2 -4
- transformers/quantizers/base.py +9 -64
- transformers/quantizers/quantizer_aqlm.py +1 -18
- transformers/quantizers/quantizer_auto_round.py +1 -10
- transformers/quantizers/quantizer_awq.py +3 -8
- transformers/quantizers/quantizer_bitnet.py +1 -6
- transformers/quantizers/quantizer_bnb_4bit.py +9 -49
- transformers/quantizers/quantizer_bnb_8bit.py +9 -19
- transformers/quantizers/quantizer_compressed_tensors.py +1 -4
- transformers/quantizers/quantizer_eetq.py +2 -12
- transformers/quantizers/quantizer_fbgemm_fp8.py +5 -14
- transformers/quantizers/quantizer_finegrained_fp8.py +15 -10
- transformers/quantizers/quantizer_fp_quant.py +4 -4
- transformers/quantizers/quantizer_gptq.py +1 -4
- transformers/quantizers/quantizer_higgs.py +2 -6
- transformers/quantizers/quantizer_mxfp4.py +2 -28
- transformers/quantizers/quantizer_quanto.py +14 -14
- transformers/quantizers/quantizer_spqr.py +3 -8
- transformers/quantizers/quantizer_torchao.py +28 -124
- transformers/quantizers/quantizer_vptq.py +1 -10
- transformers/testing_utils.py +28 -12
- transformers/tokenization_mistral_common.py +3 -2
- transformers/tokenization_utils_base.py +3 -2
- transformers/tokenization_utils_tokenizers.py +25 -2
- transformers/trainer.py +24 -2
- transformers/trainer_callback.py +8 -0
- transformers/trainer_seq2seq.py +4 -0
- transformers/training_args.py +8 -10
- transformers/utils/__init__.py +4 -0
- transformers/utils/attention_visualizer.py +4 -4
- transformers/utils/auto_docstring.py +34 -25
- transformers/utils/generic.py +20 -0
- transformers/utils/import_utils.py +51 -9
- transformers/utils/kernel_config.py +71 -18
- transformers/utils/quantization_config.py +8 -8
- transformers/video_processing_utils.py +16 -12
- {transformers-5.0.0rc1.dist-info → transformers-5.0.0rc2.dist-info}/METADATA +5 -6
- {transformers-5.0.0rc1.dist-info → transformers-5.0.0rc2.dist-info}/RECORD +671 -632
- {transformers-5.0.0rc1.dist-info → transformers-5.0.0rc2.dist-info}/WHEEL +0 -0
- {transformers-5.0.0rc1.dist-info → transformers-5.0.0rc2.dist-info}/entry_points.txt +0 -0
- {transformers-5.0.0rc1.dist-info → transformers-5.0.0rc2.dist-info}/licenses/LICENSE +0 -0
- {transformers-5.0.0rc1.dist-info → transformers-5.0.0rc2.dist-info}/top_level.txt +0 -0
|
@@ -18,7 +18,7 @@ from typing import TYPE_CHECKING
|
|
|
18
18
|
from packaging import version
|
|
19
19
|
|
|
20
20
|
from .base import HfQuantizer
|
|
21
|
-
from .quantizers_utils import get_module_from_name
|
|
21
|
+
from .quantizers_utils import get_module_from_name, should_convert_module
|
|
22
22
|
|
|
23
23
|
|
|
24
24
|
if TYPE_CHECKING:
|
|
@@ -94,19 +94,19 @@ class TorchAoHfQuantizer(HfQuantizer):
|
|
|
94
94
|
def __init__(self, quantization_config, **kwargs):
|
|
95
95
|
super().__init__(quantization_config, **kwargs)
|
|
96
96
|
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
97
|
+
self.quantized_param_size = None
|
|
98
|
+
quant_type = self.quantization_config.quant_type
|
|
99
|
+
if isinstance(quant_type, str):
|
|
100
|
+
map_to_param_size = {
|
|
101
|
+
"int4_weight_only": 0.5,
|
|
102
|
+
"int8_weight_only": 1,
|
|
103
|
+
"int8_dynamic_activation_int8_weight": 1,
|
|
104
|
+
}
|
|
105
|
+
if quant_type in map_to_param_size:
|
|
106
|
+
self.quantized_param_size = map_to_param_size[quant_type]
|
|
106
107
|
else:
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
self.full_ao_keys = self.weight_ao_keys + ["_data"]
|
|
108
|
+
size_digit = fuzzy_match_size(quant_type.__class__.__name__)
|
|
109
|
+
self.quantized_param_size = 0.5 if size_digit == "4" else 1
|
|
110
110
|
|
|
111
111
|
def validate_environment(self, *args, **kwargs):
|
|
112
112
|
if not is_torchao_available():
|
|
@@ -134,22 +134,11 @@ class TorchAoHfQuantizer(HfQuantizer):
|
|
|
134
134
|
|
|
135
135
|
def update_dtype(self, dtype):
|
|
136
136
|
if self.quantization_config.quant_type == "int4_weight_only":
|
|
137
|
-
if dtype
|
|
137
|
+
if dtype != torch.bfloat16:
|
|
138
138
|
logger.warning_once(
|
|
139
|
-
f"Setting dtype to {dtype} for int4_weight_only quantization, but only bfloat16 is supported right now.
|
|
140
|
-
)
|
|
141
|
-
if dtype is None:
|
|
142
|
-
logger.warning_once(
|
|
143
|
-
"Setting dtype to torch.bfloat16 for int4_weight_only quantization since only bfloat16 is supported right now. Please set dtype=torch.bfloat16 to remove this warning."
|
|
139
|
+
f"Setting dtype to {dtype} for int4_weight_only quantization, but only bfloat16 is supported right now. Overwriting torch_dtype to bfloat16."
|
|
144
140
|
)
|
|
145
141
|
dtype = torch.bfloat16
|
|
146
|
-
if self.quantization_config.quant_type == "int8_dynamic_activation_int8_weight":
|
|
147
|
-
if dtype is None:
|
|
148
|
-
logger.info(
|
|
149
|
-
"Setting dtype to torch.float32 for int8_dynamic_activation_int8_weight quantization as no dtype was specified in from_pretrained"
|
|
150
|
-
)
|
|
151
|
-
# we need to set the dtype, otherwise we have dtype mismatch when performing the quantized linear op
|
|
152
|
-
dtype = torch.float32
|
|
153
142
|
return dtype
|
|
154
143
|
|
|
155
144
|
def get_state_dict_and_metadata(self, model):
|
|
@@ -157,57 +146,27 @@ class TorchAoHfQuantizer(HfQuantizer):
|
|
|
157
146
|
We flatten the state dict of tensor subclasses so that it is compatible with the safetensors format.
|
|
158
147
|
"""
|
|
159
148
|
if TORCHAO_VERSION >= version.parse("0.15.0"):
|
|
160
|
-
return flatten_tensor_state_dict(model.state_dict())
|
|
149
|
+
return flatten_tensor_state_dict(model.state_dict())
|
|
161
150
|
else:
|
|
162
151
|
raise RuntimeError(
|
|
163
152
|
f"In order to use safetensors with torchao, please use torchao version >= 0.15.0. Current version: {TORCHAO_VERSION}"
|
|
164
153
|
)
|
|
165
154
|
|
|
166
|
-
def
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
quant_type = self.quantization_config.quant_type
|
|
174
|
-
if isinstance(quant_type, AOBaseConfig):
|
|
175
|
-
# Extract size digit using fuzzy match on the class name
|
|
176
|
-
config_name = quant_type.__class__.__name__
|
|
177
|
-
size_digit = fuzzy_match_size(config_name)
|
|
178
|
-
|
|
179
|
-
# Map the extracted digit to appropriate dtype
|
|
180
|
-
if size_digit == "4":
|
|
181
|
-
return CustomDtype.INT4
|
|
182
|
-
else:
|
|
183
|
-
# Default to int8
|
|
184
|
-
return torch.int8
|
|
185
|
-
|
|
186
|
-
# Original mapping for non-AOBaseConfig types
|
|
187
|
-
map_to_target_dtype = {
|
|
188
|
-
"int4_weight_only": CustomDtype.INT4,
|
|
189
|
-
"int8_weight_only": torch.int8,
|
|
190
|
-
"int8_dynamic_activation_int8_weight": torch.int8,
|
|
191
|
-
"autoquant": None,
|
|
192
|
-
}
|
|
193
|
-
return map_to_target_dtype[self.quantization_config.quant_type]
|
|
194
|
-
else:
|
|
195
|
-
raise ValueError(
|
|
196
|
-
"You are using `device_map='auto'` on a torchao quantized model. To automatically compute"
|
|
197
|
-
" the appropriate device map, you should upgrade your `accelerate` library with "
|
|
198
|
-
"`pip install --upgrade accelerate`"
|
|
199
|
-
)
|
|
155
|
+
def param_element_size(self, model: "PreTrainedModel", param_name: str, param: "torch.Tensor") -> float:
|
|
156
|
+
"Return the element size (in bytes) for `param_name`."
|
|
157
|
+
if self.param_needs_quantization(model, param_name) and self.quantized_param_size is not None:
|
|
158
|
+
return self.quantized_param_size
|
|
159
|
+
|
|
160
|
+
return super().param_element_size(model, param_name, param)
|
|
200
161
|
|
|
201
162
|
def adjust_max_memory(self, max_memory: dict[str, int | str]) -> dict[str, int | str]:
|
|
202
163
|
# need more space for the quantization parameters (e.g. scale). Tested with int4 wo and group size = 128
|
|
203
164
|
max_memory = {key: val * 0.9 for key, val in max_memory.items()}
|
|
204
165
|
return max_memory
|
|
205
166
|
|
|
206
|
-
def _process_model_before_weight_loading(
|
|
207
|
-
self, model: "PreTrainedModel", keep_in_fp32_modules: list[str] | None = None, **kwargs
|
|
208
|
-
):
|
|
167
|
+
def _process_model_before_weight_loading(self, model: "PreTrainedModel", checkpoint_files=None, **kwargs):
|
|
209
168
|
self.modules_to_not_convert = self.get_modules_to_not_convert(
|
|
210
|
-
model, self.quantization_config.modules_to_not_convert,
|
|
169
|
+
model, self.quantization_config.modules_to_not_convert, model._keep_in_fp32_modules
|
|
211
170
|
)
|
|
212
171
|
if self.quantization_config.include_input_output_embeddings:
|
|
213
172
|
input_emb = model.get_input_embeddings()
|
|
@@ -217,16 +176,16 @@ class TorchAoHfQuantizer(HfQuantizer):
|
|
|
217
176
|
self.modules_to_not_convert = [
|
|
218
177
|
x for x in self.modules_to_not_convert if x not in input_emb_names + output_emb_names
|
|
219
178
|
]
|
|
220
|
-
|
|
179
|
+
if checkpoint_files is not None:
|
|
180
|
+
# Torchao needs access to all metadata later
|
|
181
|
+
self.set_metadata(checkpoint_files)
|
|
221
182
|
|
|
222
183
|
def param_needs_quantization(self, model: "PreTrainedModel", param_name: str, **kwargs) -> bool:
|
|
223
|
-
if self.pre_quantized:
|
|
224
|
-
return False
|
|
225
184
|
if self.quantization_config.quant_type == "autoquant":
|
|
226
185
|
return False
|
|
227
186
|
|
|
228
187
|
# check if the param_name is not in self.modules_to_not_convert
|
|
229
|
-
if
|
|
188
|
+
if not should_convert_module(param_name, self.modules_to_not_convert):
|
|
230
189
|
return False
|
|
231
190
|
|
|
232
191
|
# we only quantize the weight of nn.Linear and nn.Embedding
|
|
@@ -253,22 +212,6 @@ class TorchAoHfQuantizer(HfQuantizer):
|
|
|
253
212
|
|
|
254
213
|
return isinstance(module, tuple(_QUANTIZABLE)) and tensor_name == "weight"
|
|
255
214
|
|
|
256
|
-
def preprocess_model(self, model: "PreTrainedModel", config, dtype=None, checkpoint_files=None, **kwargs):
|
|
257
|
-
"""
|
|
258
|
-
Setting model attributes and/or converting model before weights loading. At this point
|
|
259
|
-
the model should be initialized on the meta device so you can freely manipulate the skeleton
|
|
260
|
-
of the model in order to replace modules in-place. Make sure to override the abstract method `_process_model_before_weight_loading`.
|
|
261
|
-
|
|
262
|
-
Args:
|
|
263
|
-
model (`~transformers.PreTrainedModel`):
|
|
264
|
-
The model to quantize
|
|
265
|
-
kwargs (`dict`, *optional*):
|
|
266
|
-
The keyword arguments that are passed along `_process_model_before_weight_loading`.
|
|
267
|
-
"""
|
|
268
|
-
super().preprocess_model(model, config, dtype, checkpoint_files, **kwargs)
|
|
269
|
-
# Torchao needs access to all metadata later
|
|
270
|
-
self.set_metadata(checkpoint_files)
|
|
271
|
-
|
|
272
215
|
def _process_model_after_weight_loading(self, model, **kwargs):
|
|
273
216
|
"""No process required for torchao quantized model"""
|
|
274
217
|
if self.quantization_config.quant_type == "autoquant":
|
|
@@ -294,45 +237,6 @@ class TorchAoHfQuantizer(HfQuantizer):
|
|
|
294
237
|
)
|
|
295
238
|
return _is_torchao_serializable
|
|
296
239
|
|
|
297
|
-
def get_accelerator_warm_up_factor(self):
|
|
298
|
-
"""
|
|
299
|
-
This factor is used in caching_allocator_warmup to determine how many bytes to pre-allocate for accelerator warmup.
|
|
300
|
-
- A factor of 2 means we pre-allocate the full memory footprint of the model.
|
|
301
|
-
- A factor of 4 means we pre-allocate half of that, and so on
|
|
302
|
-
|
|
303
|
-
However, when using TorchAO, calculating memory usage with param.numel() * param.element_size() doesn't give the correct size for quantized weights (like int4 or int8)
|
|
304
|
-
That's because TorchAO internally represents quantized tensors using subtensors and metadata, and the reported element_size() still corresponds to the dtype
|
|
305
|
-
not the actual bit-width of the quantized data.
|
|
306
|
-
|
|
307
|
-
To correct for this:
|
|
308
|
-
- Use a division factor of 8 for int4 weights
|
|
309
|
-
- Use a division factor of 4 for int8 weights
|
|
310
|
-
"""
|
|
311
|
-
if self.quantization_config._get_ao_version() > version.Version("0.9.0"):
|
|
312
|
-
from torchao.core.config import AOBaseConfig
|
|
313
|
-
|
|
314
|
-
quant_type = self.quantization_config.quant_type
|
|
315
|
-
# For autoquant case, it will be treated in the string implementation below in map_to_target_dtype
|
|
316
|
-
if isinstance(quant_type, AOBaseConfig):
|
|
317
|
-
# Extract size digit using fuzzy match on the class name
|
|
318
|
-
config_name = quant_type.__class__.__name__
|
|
319
|
-
size_digit = fuzzy_match_size(config_name)
|
|
320
|
-
|
|
321
|
-
if size_digit == "4":
|
|
322
|
-
return 8
|
|
323
|
-
else:
|
|
324
|
-
return 4
|
|
325
|
-
|
|
326
|
-
# Original mapping for non-AOBaseConfig types
|
|
327
|
-
map_to_target_dtype = {
|
|
328
|
-
"int4_weight_only": 8,
|
|
329
|
-
"int8_weight_only": 4,
|
|
330
|
-
"int8_dynamic_activation_int8_weight": 4,
|
|
331
|
-
"autoquant": 4,
|
|
332
|
-
}
|
|
333
|
-
|
|
334
|
-
return map_to_target_dtype[self.quantization_config.quant_type]
|
|
335
|
-
|
|
336
240
|
@property
|
|
337
241
|
def is_trainable(self) -> bool:
|
|
338
242
|
supported_quant_types_for_training = [
|
|
@@ -49,24 +49,15 @@ class VptqHfQuantizer(HfQuantizer):
|
|
|
49
49
|
if not torch.cuda.is_available():
|
|
50
50
|
raise RuntimeError("GPU is required to run VTPQ quantized model.")
|
|
51
51
|
|
|
52
|
-
def update_dtype(self, dtype: "torch.dtype") -> "torch.dtype":
|
|
53
|
-
if dtype is None:
|
|
54
|
-
dtype = torch.float16
|
|
55
|
-
logger.info(
|
|
56
|
-
"Assuming VPTQ inference on GPU and loading the model in `torch.float16`. To overwrite it, set `dtype` manually."
|
|
57
|
-
)
|
|
58
|
-
return dtype
|
|
59
|
-
|
|
60
52
|
def _process_model_before_weight_loading(
|
|
61
53
|
self,
|
|
62
54
|
model: "PreTrainedModel",
|
|
63
|
-
keep_in_fp32_modules: list[str] | None = None,
|
|
64
55
|
**kwargs,
|
|
65
56
|
):
|
|
66
57
|
from ..integrations import replace_with_vptq_linear
|
|
67
58
|
|
|
68
59
|
self.modules_to_not_convert = self.get_modules_to_not_convert(
|
|
69
|
-
model, self.quantization_config.modules_to_not_convert,
|
|
60
|
+
model, self.quantization_config.modules_to_not_convert, model._keep_in_fp32_modules
|
|
70
61
|
)
|
|
71
62
|
replace_with_vptq_linear(
|
|
72
63
|
model,
|
transformers/testing_utils.py
CHANGED
|
@@ -118,6 +118,7 @@ from .utils import (
|
|
|
118
118
|
is_mistral_common_available,
|
|
119
119
|
is_natten_available,
|
|
120
120
|
is_nltk_available,
|
|
121
|
+
is_numba_available,
|
|
121
122
|
is_onnx_available,
|
|
122
123
|
is_openai_available,
|
|
123
124
|
is_optimum_available,
|
|
@@ -130,6 +131,7 @@ from .utils import (
|
|
|
130
131
|
is_pyctcdecode_available,
|
|
131
132
|
is_pytesseract_available,
|
|
132
133
|
is_pytest_available,
|
|
134
|
+
is_pytest_order_available,
|
|
133
135
|
is_pytorch_quantization_available,
|
|
134
136
|
is_quark_available,
|
|
135
137
|
is_qutlass_available,
|
|
@@ -221,7 +223,7 @@ if is_torch_available():
|
|
|
221
223
|
import torch
|
|
222
224
|
from safetensors.torch import load_file
|
|
223
225
|
|
|
224
|
-
from .modeling_utils import PreTrainedModel
|
|
226
|
+
from .modeling_utils import FLASH_ATTN_KERNEL_FALLBACK, PreTrainedModel
|
|
225
227
|
|
|
226
228
|
IS_ROCM_SYSTEM = torch.version.hip is not None
|
|
227
229
|
IS_CUDA_SYSTEM = torch.version.cuda is not None
|
|
@@ -620,7 +622,7 @@ def require_flash_attn(test_case):
|
|
|
620
622
|
try:
|
|
621
623
|
from kernels import get_kernel
|
|
622
624
|
|
|
623
|
-
get_kernel("
|
|
625
|
+
get_kernel(FLASH_ATTN_KERNEL_FALLBACK["flash_attention_2"])
|
|
624
626
|
except Exception as _:
|
|
625
627
|
kernels_available = False
|
|
626
628
|
|
|
@@ -1091,17 +1093,20 @@ def require_torch_large_gpu(test_case, memory: float = 20):
|
|
|
1091
1093
|
)(test_case)
|
|
1092
1094
|
|
|
1093
1095
|
|
|
1094
|
-
def require_torch_large_accelerator(test_case, memory: float = 20):
|
|
1096
|
+
def require_torch_large_accelerator(test_case=None, *, memory: float = 20):
|
|
1095
1097
|
"""Decorator marking a test that requires an accelerator with more than `memory` GiB of memory."""
|
|
1096
|
-
if torch_device != "cuda" and torch_device != "xpu":
|
|
1097
|
-
return unittest.skip(reason=f"test requires a GPU or XPU with more than {memory} GiB of memory")(test_case)
|
|
1098
1098
|
|
|
1099
|
-
|
|
1099
|
+
def memory_decorator(tc):
|
|
1100
|
+
if torch_device not in ("cuda", "xpu"):
|
|
1101
|
+
return unittest.skip(f"test requires a GPU or XPU with more than {memory} GiB of memory")(tc)
|
|
1100
1102
|
|
|
1101
|
-
|
|
1102
|
-
|
|
1103
|
-
|
|
1104
|
-
|
|
1103
|
+
torch_accel = getattr(torch, torch_device)
|
|
1104
|
+
return unittest.skipUnless(
|
|
1105
|
+
torch_accel.get_device_properties(0).total_memory / 1024**3 > memory,
|
|
1106
|
+
f"test requires a GPU or XPU with more than {memory} GiB of memory",
|
|
1107
|
+
)(tc)
|
|
1108
|
+
|
|
1109
|
+
return memory_decorator if test_case is None else memory_decorator(test_case)
|
|
1105
1110
|
|
|
1106
1111
|
|
|
1107
1112
|
def require_torch_accelerator(test_case):
|
|
@@ -1381,6 +1386,13 @@ def require_pyctcdecode(test_case):
|
|
|
1381
1386
|
return unittest.skipUnless(is_pyctcdecode_available(), "test requires pyctcdecode")(test_case)
|
|
1382
1387
|
|
|
1383
1388
|
|
|
1389
|
+
def require_numba(test_case):
|
|
1390
|
+
"""
|
|
1391
|
+
Decorator marking a test that requires numba
|
|
1392
|
+
"""
|
|
1393
|
+
return unittest.skipUnless(is_numba_available(), "test requires numba")(test_case)
|
|
1394
|
+
|
|
1395
|
+
|
|
1384
1396
|
def require_librosa(test_case):
|
|
1385
1397
|
"""
|
|
1386
1398
|
Decorator marking a test that requires librosa
|
|
@@ -2659,9 +2671,13 @@ def run_first(test_case):
|
|
|
2659
2671
|
single process at a time. So we make sure all tests that run in a subprocess are launched first, to avoid device
|
|
2660
2672
|
allocation conflicts.
|
|
2661
2673
|
"""
|
|
2662
|
-
|
|
2674
|
+
# Without this check, we get unwanted warnings when it's not installed
|
|
2675
|
+
if is_pytest_order_available():
|
|
2676
|
+
import pytest
|
|
2663
2677
|
|
|
2664
|
-
|
|
2678
|
+
return pytest.mark.order(1)(test_case)
|
|
2679
|
+
else:
|
|
2680
|
+
return test_case
|
|
2665
2681
|
|
|
2666
2682
|
|
|
2667
2683
|
def run_test_in_subprocess(test_case, target_func, inputs=None, timeout=None):
|
|
@@ -1114,7 +1114,7 @@ class MistralCommonBackend(PushToHubMixin):
|
|
|
1114
1114
|
max_length = self.model_max_length
|
|
1115
1115
|
|
|
1116
1116
|
# Test if we have a padding token
|
|
1117
|
-
if padding_strategy != PaddingStrategy.DO_NOT_PAD and (self.
|
|
1117
|
+
if padding_strategy != PaddingStrategy.DO_NOT_PAD and (self.pad_token_id is None or self.pad_token_id < 0):
|
|
1118
1118
|
raise ValueError(
|
|
1119
1119
|
"Asking to pad but the tokenizer does not have a padding token. "
|
|
1120
1120
|
"Please select a token to use as `pad_token` `(tokenizer.pad_token = tokenizer.eos_token e.g.)` "
|
|
@@ -1851,8 +1851,9 @@ class MistralCommonBackend(PushToHubMixin):
|
|
|
1851
1851
|
raise ValueError("`init_inputs` are not supported by `MistralCommonBackend.from_pretrained`.")
|
|
1852
1852
|
|
|
1853
1853
|
# Handle kwargs and AutoTokenizer/AutoProcessor case
|
|
1854
|
+
# These kwargs are passed by AutoTokenizer/AutoProcessor but are not used by MistralCommonBackend
|
|
1854
1855
|
if kwargs and not set(kwargs.keys()).issubset(
|
|
1855
|
-
{"trust_remote_code", "_from_pipeline", "_commit_hash", "dtype", "_from_auto"}
|
|
1856
|
+
{"trust_remote_code", "_from_pipeline", "_commit_hash", "dtype", "_from_auto", "subfolder"}
|
|
1856
1857
|
):
|
|
1857
1858
|
raise ValueError(f"Some kwargs in {kwargs} are not supported by `MistralCommonBackend.from_pretrained`.")
|
|
1858
1859
|
|
|
@@ -972,7 +972,7 @@ class PreTrainedTokenizerBase(PushToHubMixin):
|
|
|
972
972
|
|
|
973
973
|
# first name has to correspond to main model input name
|
|
974
974
|
# to make sure `tokenizer.pad(...)` works correctly
|
|
975
|
-
model_input_names: list[str] = ["input_ids", "
|
|
975
|
+
model_input_names: list[str] = ["input_ids", "attention_mask"]
|
|
976
976
|
padding_side: str = "right"
|
|
977
977
|
truncation_side: str = "right"
|
|
978
978
|
slow_tokenizer_class = None
|
|
@@ -2152,9 +2152,10 @@ class PreTrainedTokenizerBase(PushToHubMixin):
|
|
|
2152
2152
|
# Add tokenizer class to the tokenizer config to be able to reload it with from_pretrained
|
|
2153
2153
|
tokenizer_class = self.__class__.__name__
|
|
2154
2154
|
|
|
2155
|
-
# tokenizers backend don't need to save added_tokens_decoder
|
|
2155
|
+
# tokenizers backend don't need to save added_tokens_decoder and additional_special_tokens
|
|
2156
2156
|
if any(base.__name__ == "TokenizersBackend" for base in self.__class__.__mro__):
|
|
2157
2157
|
tokenizer_config.pop("added_tokens_decoder", None)
|
|
2158
|
+
tokenizer_config.pop("additional_special_tokens", None)
|
|
2158
2159
|
|
|
2159
2160
|
# Remove the Fast at the end if we can save the slow tokenizer
|
|
2160
2161
|
if tokenizer_class.endswith("Fast") and getattr(self, "can_save_slow_tokenizer", False):
|
|
@@ -30,6 +30,7 @@ from tokenizers import AddedToken, processors
|
|
|
30
30
|
from tokenizers import Encoding as EncodingFast
|
|
31
31
|
from tokenizers import Tokenizer as TokenizerFast
|
|
32
32
|
from tokenizers.decoders import Decoder as DecoderFast
|
|
33
|
+
from tokenizers.models import BPE, Unigram
|
|
33
34
|
from tokenizers.trainers import BpeTrainer, UnigramTrainer, WordLevelTrainer, WordPieceTrainer
|
|
34
35
|
|
|
35
36
|
from .integrations.ggml import convert_gguf_tokenizer
|
|
@@ -121,7 +122,8 @@ class TokenizersBackend(PreTrainedTokenizerBase):
|
|
|
121
122
|
if isinstance(vocab, list):
|
|
122
123
|
vocab = list(map(tuple, vocab)) # TODO just for now
|
|
123
124
|
elif cls.model.__name__ == "Unigram":
|
|
124
|
-
vocab
|
|
125
|
+
if vocab and isinstance(vocab[0], (list, tuple)):
|
|
126
|
+
vocab = [tuple(item) for item in vocab]
|
|
125
127
|
elif cls.model.__name__ == "WordLevel":
|
|
126
128
|
vocab = {token: i for i, token in enumerate(vocab)}
|
|
127
129
|
elif cls.model.__name__ == "BPE" or cls.model.__name__ == "WordPiece":
|
|
@@ -182,6 +184,7 @@ class TokenizersBackend(PreTrainedTokenizerBase):
|
|
|
182
184
|
local_kwargs["vocab"], local_kwargs["merges"] = TikTokenConverter(
|
|
183
185
|
vocab_file=vocab_file, extra_special_tokens=local_kwargs.get("extra_special_tokens")
|
|
184
186
|
).extract_vocab_merges_from_model(vocab_file)
|
|
187
|
+
|
|
185
188
|
return local_kwargs
|
|
186
189
|
|
|
187
190
|
# Fallback to standard vocab/merges files if they existed!
|
|
@@ -236,6 +239,9 @@ class TokenizersBackend(PreTrainedTokenizerBase):
|
|
|
236
239
|
add_prefix_space = kwargs.get("add_prefix_space", False)
|
|
237
240
|
vocab_file = kwargs.get("vocab_file")
|
|
238
241
|
|
|
242
|
+
vocab = kwargs.get("vocab")
|
|
243
|
+
merges = kwargs.get("merges")
|
|
244
|
+
|
|
239
245
|
fast_tokenizer = None
|
|
240
246
|
if tokenizer_object is not None:
|
|
241
247
|
fast_tokenizer = copy.deepcopy(tokenizer_object)
|
|
@@ -252,6 +258,15 @@ class TokenizersBackend(PreTrainedTokenizerBase):
|
|
|
252
258
|
kwargs.update(tokenizer_config)
|
|
253
259
|
if len(additional_kwargs) > 0:
|
|
254
260
|
kwargs.update(additional_kwargs)
|
|
261
|
+
elif self._tokenizer is None and vocab is not None:
|
|
262
|
+
# Build from vocab/merges extracted by convert_to_native_format
|
|
263
|
+
if merges is not None:
|
|
264
|
+
vocab_dict = vocab if isinstance(vocab, dict) else {w: i for i, (w, _) in enumerate(vocab)}
|
|
265
|
+
fast_tokenizer = TokenizerFast(BPE(vocab=vocab_dict, merges=merges, fuse_unk=True, dropout=None))
|
|
266
|
+
elif isinstance(vocab, dict):
|
|
267
|
+
fast_tokenizer = TokenizerFast(BPE(vocab=vocab, merges=[], fuse_unk=True, dropout=None))
|
|
268
|
+
elif isinstance(vocab, list) and vocab and isinstance(vocab[0], (tuple, list)):
|
|
269
|
+
fast_tokenizer = TokenizerFast(Unigram(vocab=vocab, unk_id=kwargs.get("unk_id", 0)))
|
|
255
270
|
elif self._tokenizer is None:
|
|
256
271
|
raise ValueError(
|
|
257
272
|
"Couldn't instantiate the backend tokenizer from one of: \n"
|
|
@@ -260,6 +275,11 @@ class TokenizersBackend(PreTrainedTokenizerBase):
|
|
|
260
275
|
"(3) an equivalent slow tokenizer class to instantiate and convert. \n"
|
|
261
276
|
"You need to have sentencepiece or tiktoken installed to convert a slow tokenizer to a fast one."
|
|
262
277
|
)
|
|
278
|
+
# Only set defaults when creating TokenizersBackend from scratch
|
|
279
|
+
if fast_tokenizer_file is None and tokenizer_object is None and self._tokenizer is None:
|
|
280
|
+
kwargs.setdefault("bos_token", "<s>")
|
|
281
|
+
kwargs.setdefault("eos_token", "</s>")
|
|
282
|
+
|
|
263
283
|
if fast_tokenizer is not None:
|
|
264
284
|
self._tokenizer = fast_tokenizer
|
|
265
285
|
|
|
@@ -289,6 +309,7 @@ class TokenizersBackend(PreTrainedTokenizerBase):
|
|
|
289
309
|
# Set backend to "tokenizers" if not already set
|
|
290
310
|
if "backend" not in kwargs:
|
|
291
311
|
kwargs["backend"] = "tokenizers"
|
|
312
|
+
|
|
292
313
|
explicit_bos_eos_in_kwargs = "add_bos_token" in kwargs or "add_eos_token" in kwargs
|
|
293
314
|
self._add_bos_token = kwargs.get("add_bos_token", False)
|
|
294
315
|
self._add_eos_token = kwargs.get("add_eos_token", False)
|
|
@@ -339,7 +360,7 @@ class TokenizersBackend(PreTrainedTokenizerBase):
|
|
|
339
360
|
tokens.append(token)
|
|
340
361
|
if tokens:
|
|
341
362
|
# These tokens are from the special tokens map
|
|
342
|
-
self.add_tokens(tokens
|
|
363
|
+
self.add_tokens(tokens)
|
|
343
364
|
|
|
344
365
|
try:
|
|
345
366
|
vocab_size = self._tokenizer.get_vocab_size()
|
|
@@ -900,6 +921,8 @@ class TokenizersBackend(PreTrainedTokenizerBase):
|
|
|
900
921
|
|
|
901
922
|
if isinstance(token_ids, int):
|
|
902
923
|
token_ids = [token_ids]
|
|
924
|
+
if isinstance(token_ids, dict):
|
|
925
|
+
token_ids = token_ids["input_ids"]
|
|
903
926
|
return self._tokenizer.decode(token_ids, skip_special_tokens=skip_special_tokens)
|
|
904
927
|
|
|
905
928
|
def _save_pretrained(
|
transformers/trainer.py
CHANGED
|
@@ -1671,6 +1671,12 @@ class Trainer:
|
|
|
1671
1671
|
optimizer_cls = AdamW8bit
|
|
1672
1672
|
else:
|
|
1673
1673
|
raise ValueError("Invalid optimizer")
|
|
1674
|
+
optimizer_kwargs.update(
|
|
1675
|
+
{
|
|
1676
|
+
"block_size": optim_args.get("block_size", 256),
|
|
1677
|
+
"bf16_stochastic_round": strtobool(optim_args.get("bf16_stochastic_round", "False")),
|
|
1678
|
+
}
|
|
1679
|
+
)
|
|
1674
1680
|
optimizer_kwargs.update(adam_kwargs)
|
|
1675
1681
|
elif args.optim in [
|
|
1676
1682
|
OptimizerNames.SCHEDULE_FREE_RADAM,
|
|
@@ -2349,7 +2355,8 @@ class Trainer:
|
|
|
2349
2355
|
if self.is_fsdp_enabled:
|
|
2350
2356
|
self.model = self.model_wrapped = model
|
|
2351
2357
|
# Fix `got mixed torch.Tensor and DTensor` error in model.generate() for FSDP2 with LoRA
|
|
2352
|
-
|
|
2358
|
+
if hasattr(self.model, "generate"):
|
|
2359
|
+
dist.fsdp.register_fsdp_forward_method(self.model, "generate")
|
|
2353
2360
|
|
|
2354
2361
|
# for the rest of this function `model` is the outside model, whether it was wrapped or not
|
|
2355
2362
|
if model is not self.model:
|
|
@@ -3943,6 +3950,9 @@ class Trainer:
|
|
|
3943
3950
|
# Both standard transformer models and Liger-patched models handle shift_labels correctly,
|
|
3944
3951
|
# so we can directly use the computed loss from the model output.
|
|
3945
3952
|
# See: https://huggingface.co/docs/accelerate/en/concept_guides/sequence_parallelism
|
|
3953
|
+
if "labels" not in inputs and "shift_labels" in inputs:
|
|
3954
|
+
# DeepSpeed SP Dataloader removes "labels" but we need it, otherwise, we won't compute the loss.
|
|
3955
|
+
inputs["labels"] = inputs["shift_labels"]
|
|
3946
3956
|
outputs = model(**inputs)
|
|
3947
3957
|
loss = outputs.loss
|
|
3948
3958
|
|
|
@@ -4018,7 +4028,16 @@ class Trainer:
|
|
|
4018
4028
|
self._save(output_dir, state_dict=state_dict)
|
|
4019
4029
|
elif self.is_deepspeed_enabled:
|
|
4020
4030
|
try:
|
|
4021
|
-
|
|
4031
|
+
accept_exclude_frozen_parameters = "exclude_frozen_parameters" in set(
|
|
4032
|
+
inspect.signature(self.model_wrapped.save_checkpoint).parameters.keys()
|
|
4033
|
+
)
|
|
4034
|
+
zero3_sharding = self.deepspeed.config.get("zero_optimization", {}).get("stage", None) == 3
|
|
4035
|
+
if accept_exclude_frozen_parameters and _is_peft_model(self.model) and zero3_sharding:
|
|
4036
|
+
# When using PEFT with DeepSpeed ZeRO Stage 3,
|
|
4037
|
+
# we do not need to load the frozen parameters
|
|
4038
|
+
state_dict = self.deepspeed._zero3_consolidated_16bit_state_dict(exclude_frozen_parameters=True)
|
|
4039
|
+
else:
|
|
4040
|
+
state_dict = self.accelerator.get_state_dict(self.deepspeed)
|
|
4022
4041
|
if self.args.should_save:
|
|
4023
4042
|
self._save(output_dir, state_dict=state_dict)
|
|
4024
4043
|
except ValueError:
|
|
@@ -4824,6 +4843,7 @@ class Trainer:
|
|
|
4824
4843
|
if not self.args.hub_always_push and self.push_in_progress is not None and not self.push_in_progress.is_done():
|
|
4825
4844
|
return
|
|
4826
4845
|
|
|
4846
|
+
self.callback_handler.on_push_begin(self.args, self.state, self.control)
|
|
4827
4847
|
output_dir = self.args.output_dir
|
|
4828
4848
|
# To avoid a new synchronization of all model weights, we just copy the file from the checkpoint folder
|
|
4829
4849
|
modeling_files = [CONFIG_NAME, GENERATION_CONFIG_NAME, WEIGHTS_NAME, SAFE_WEIGHTS_NAME]
|
|
@@ -4918,6 +4938,8 @@ class Trainer:
|
|
|
4918
4938
|
The URL of the repository where the model was pushed if `blocking=False`, or a `Future` object tracking the
|
|
4919
4939
|
progress of the commit if `blocking=True`.
|
|
4920
4940
|
"""
|
|
4941
|
+
self.callback_handler.on_push_begin(self.args, self.state, self.control)
|
|
4942
|
+
|
|
4921
4943
|
model_name = kwargs.pop("model_name", None)
|
|
4922
4944
|
if model_name is None and self.args.should_save:
|
|
4923
4945
|
if self.args.hub_model_id is None:
|
transformers/trainer_callback.py
CHANGED
|
@@ -420,6 +420,11 @@ class TrainerCallback:
|
|
|
420
420
|
Event called after a prediction step.
|
|
421
421
|
"""
|
|
422
422
|
|
|
423
|
+
def on_push_begin(self, args: TrainingArguments, state: TrainerState, control: TrainerControl, **kwargs):
|
|
424
|
+
"""
|
|
425
|
+
Event called before pushing the model to the hub, at the beginning of Trainer.push_to_hub and Trainer._push_from_checkpoint.
|
|
426
|
+
"""
|
|
427
|
+
|
|
423
428
|
|
|
424
429
|
class CallbackHandler(TrainerCallback):
|
|
425
430
|
"""Internal class that just calls the list of callbacks in order."""
|
|
@@ -532,6 +537,9 @@ class CallbackHandler(TrainerCallback):
|
|
|
532
537
|
def on_prediction_step(self, args: TrainingArguments, state: TrainerState, control: TrainerControl):
|
|
533
538
|
return self.call_event("on_prediction_step", args, state, control)
|
|
534
539
|
|
|
540
|
+
def on_push_begin(self, args: TrainingArguments, state: TrainerState, control: TrainerControl, **kwargs):
|
|
541
|
+
return self.call_event("on_push_begin", args, state, control, **kwargs)
|
|
542
|
+
|
|
535
543
|
def call_event(self, event, args, state, control, **kwargs):
|
|
536
544
|
for callback in self.callbacks:
|
|
537
545
|
result = getattr(callback, event)(
|
transformers/trainer_seq2seq.py
CHANGED
|
@@ -333,7 +333,11 @@ class Seq2SeqTrainer(Trainer):
|
|
|
333
333
|
self.model.generation_config._from_model_config = False
|
|
334
334
|
|
|
335
335
|
# Retrieves GenerationConfig from model.generation_config
|
|
336
|
+
# Update with defaults because earlier the generation config used ot be init
|
|
337
|
+
# with default values. Now we init it with `None` and keep defaults for BC
|
|
336
338
|
gen_config = self.model.generation_config
|
|
339
|
+
default_gen_config = gen_config._get_default_generation_params()
|
|
340
|
+
gen_config.update(**default_gen_config, defaults_only=True)
|
|
337
341
|
# in case the batch is shorter than max length, the output should be padded
|
|
338
342
|
if generated_tokens.shape[-1] < gen_config.max_length:
|
|
339
343
|
generated_tokens = self._pad_tensors_to_max_len(generated_tokens, gen_config.max_length)
|
transformers/training_args.py
CHANGED
|
@@ -1530,16 +1530,14 @@ class TrainingArguments:
|
|
|
1530
1530
|
self.greater_is_better = not self.metric_for_best_model.endswith("loss")
|
|
1531
1531
|
if is_torch_available():
|
|
1532
1532
|
if self.bf16 or self.bf16_full_eval:
|
|
1533
|
-
if
|
|
1534
|
-
|
|
1535
|
-
|
|
1536
|
-
|
|
1537
|
-
if
|
|
1538
|
-
error_message
|
|
1539
|
-
|
|
1540
|
-
|
|
1541
|
-
# gpu
|
|
1542
|
-
raise ValueError(error_message)
|
|
1533
|
+
if (
|
|
1534
|
+
not self.use_cpu and not is_torch_bf16_gpu_available() and not is_torch_xla_available()
|
|
1535
|
+
): # added for tpu support
|
|
1536
|
+
error_message = "Your setup doesn't support bf16/gpu. You need to assign use_cpu if you want to train the model on CPU"
|
|
1537
|
+
if is_torch_cuda_available():
|
|
1538
|
+
error_message += " You need Ampere+ GPU with cuda>=11.0"
|
|
1539
|
+
# gpu
|
|
1540
|
+
raise ValueError(error_message)
|
|
1543
1541
|
|
|
1544
1542
|
if self.fp16 and self.bf16:
|
|
1545
1543
|
raise ValueError("At most one of fp16 and bf16 can be True, but not both")
|
transformers/utils/__init__.py
CHANGED
|
@@ -49,6 +49,7 @@ from .generic import (
|
|
|
49
49
|
PaddingStrategy,
|
|
50
50
|
TensorType,
|
|
51
51
|
TransformersKwargs,
|
|
52
|
+
_is_tensor_or_array_like,
|
|
52
53
|
can_return_loss,
|
|
53
54
|
can_return_tuple,
|
|
54
55
|
expand_dims,
|
|
@@ -144,6 +145,7 @@ from .import_utils import (
|
|
|
144
145
|
is_gguf_available,
|
|
145
146
|
is_gptqmodel_available,
|
|
146
147
|
is_grokadamw_available,
|
|
148
|
+
is_grouped_mm_available,
|
|
147
149
|
is_habana_gaudi1,
|
|
148
150
|
is_hadamard_available,
|
|
149
151
|
is_hqq_available,
|
|
@@ -168,6 +170,7 @@ from .import_utils import (
|
|
|
168
170
|
is_ninja_available,
|
|
169
171
|
is_nltk_available,
|
|
170
172
|
is_num2words_available,
|
|
173
|
+
is_numba_available,
|
|
171
174
|
is_onnx_available,
|
|
172
175
|
is_openai_available,
|
|
173
176
|
is_optimum_available,
|
|
@@ -182,6 +185,7 @@ from .import_utils import (
|
|
|
182
185
|
is_pyctcdecode_available,
|
|
183
186
|
is_pytesseract_available,
|
|
184
187
|
is_pytest_available,
|
|
188
|
+
is_pytest_order_available,
|
|
185
189
|
is_pytorch_quantization_available,
|
|
186
190
|
is_quanto_greater,
|
|
187
191
|
is_quark_available,
|
|
@@ -21,7 +21,7 @@ from ..models.auto.auto_factory import _get_model_class
|
|
|
21
21
|
from ..models.auto.configuration_auto import AutoConfig
|
|
22
22
|
from ..models.auto.modeling_auto import MODEL_FOR_PRETRAINING_MAPPING, MODEL_MAPPING
|
|
23
23
|
from ..models.auto.processing_auto import PROCESSOR_MAPPING_NAMES, AutoProcessor
|
|
24
|
-
from ..models.auto.tokenization_auto import
|
|
24
|
+
from ..models.auto.tokenization_auto import AutoTokenizer
|
|
25
25
|
from .import_utils import is_torch_available
|
|
26
26
|
|
|
27
27
|
|
|
@@ -199,12 +199,12 @@ class AttentionMaskVisualizer:
|
|
|
199
199
|
if "token_type_ids" in inputs: # TODO inspect signature of update causal mask
|
|
200
200
|
kwargs["token_type_ids"] = inputs["token_type_ids"]
|
|
201
201
|
tokens = processor.tokenizer.convert_ids_to_tokens(inputs["input_ids"][0])
|
|
202
|
-
|
|
202
|
+
else:
|
|
203
203
|
tokenizer = AutoTokenizer.from_pretrained(self.repo_id)
|
|
204
204
|
tokens = tokenizer.tokenize(input_sentence)
|
|
205
205
|
attention_mask = tokenizer(input_sentence, return_tensors="pt")["attention_mask"]
|
|
206
|
-
|
|
207
|
-
|
|
206
|
+
if attention_mask is None:
|
|
207
|
+
raise ValueError(f"Model type {self.config.model_type} does not support attention visualization")
|
|
208
208
|
|
|
209
209
|
model.config._attn_implementation = "eager"
|
|
210
210
|
model.train()
|