transformers 5.0.0rc0__py3-none-any.whl → 5.0.0rc1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- transformers/__init__.py +30 -3
- transformers/cli/serve.py +47 -17
- transformers/conversion_mapping.py +15 -2
- transformers/convert_slow_tokenizer.py +225 -10
- transformers/core_model_loading.py +196 -135
- transformers/data/data_collator.py +12 -4
- transformers/dependency_versions_table.py +1 -2
- transformers/dynamic_module_utils.py +1 -2
- transformers/feature_extraction_utils.py +1 -2
- transformers/file_utils.py +0 -1
- transformers/generation/__init__.py +11 -1
- transformers/generation/configuration_utils.py +3 -2
- transformers/generation/continuous_batching/__init__.py +4 -0
- transformers/generation/continuous_batching/continuous_api.py +134 -79
- transformers/image_processing_base.py +1 -2
- transformers/integrations/__init__.py +4 -2
- transformers/integrations/accelerate.py +15 -3
- transformers/integrations/aqlm.py +38 -66
- transformers/integrations/awq.py +48 -514
- transformers/integrations/bitnet.py +45 -100
- transformers/integrations/bitsandbytes.py +79 -191
- transformers/integrations/deepspeed.py +1 -0
- transformers/integrations/eetq.py +84 -79
- transformers/integrations/fbgemm_fp8.py +191 -145
- transformers/integrations/finegrained_fp8.py +236 -193
- transformers/integrations/fp_quant.py +92 -0
- transformers/integrations/ggml.py +11 -1
- transformers/integrations/higgs.py +40 -62
- transformers/integrations/hub_kernels.py +42 -3
- transformers/integrations/integration_utils.py +10 -0
- transformers/integrations/mxfp4.py +25 -65
- transformers/integrations/peft.py +7 -29
- transformers/integrations/quanto.py +73 -55
- transformers/integrations/quark.py +55 -0
- transformers/integrations/spqr.py +44 -90
- transformers/integrations/torchao.py +32 -38
- transformers/integrations/vptq.py +42 -59
- transformers/modelcard.py +1 -2
- transformers/modeling_gguf_pytorch_utils.py +8 -0
- transformers/modeling_rope_utils.py +30 -6
- transformers/modeling_utils.py +116 -112
- transformers/models/__init__.py +3 -0
- transformers/models/afmoe/modeling_afmoe.py +4 -4
- transformers/models/albert/tokenization_albert.py +6 -12
- transformers/models/align/modeling_align.py +2 -0
- transformers/models/altclip/modeling_altclip.py +4 -0
- transformers/models/apertus/modeling_apertus.py +4 -4
- transformers/models/arcee/modeling_arcee.py +4 -4
- transformers/models/aria/modeling_aria.py +4 -4
- transformers/models/audioflamingo3/modeling_audioflamingo3.py +1 -0
- transformers/models/audioflamingo3/modular_audioflamingo3.py +1 -0
- transformers/models/auto/configuration_auto.py +11 -0
- transformers/models/auto/feature_extraction_auto.py +2 -0
- transformers/models/auto/image_processing_auto.py +1 -0
- transformers/models/auto/modeling_auto.py +6 -0
- transformers/models/auto/processing_auto.py +18 -10
- transformers/models/auto/tokenization_auto.py +74 -472
- transformers/models/autoformer/modeling_autoformer.py +4 -0
- transformers/models/bamba/modeling_bamba.py +4 -3
- transformers/models/bark/modeling_bark.py +2 -0
- transformers/models/bart/modeling_bart.py +7 -0
- transformers/models/barthez/tokenization_barthez.py +5 -10
- transformers/models/beit/modeling_beit.py +6 -1
- transformers/models/bert/tokenization_bert.py +8 -21
- transformers/models/big_bird/modeling_big_bird.py +6 -0
- transformers/models/big_bird/tokenization_big_bird.py +18 -42
- transformers/models/bigbird_pegasus/modeling_bigbird_pegasus.py +8 -2
- transformers/models/biogpt/modeling_biogpt.py +2 -0
- transformers/models/biogpt/modular_biogpt.py +2 -0
- transformers/models/bit/modeling_bit.py +11 -2
- transformers/models/bitnet/modeling_bitnet.py +4 -4
- transformers/models/blenderbot/modeling_blenderbot.py +5 -0
- transformers/models/blenderbot/tokenization_blenderbot.py +12 -16
- transformers/models/blenderbot_small/modeling_blenderbot_small.py +5 -0
- transformers/models/blip/modeling_blip_text.py +2 -0
- transformers/models/blip_2/modeling_blip_2.py +2 -1
- transformers/models/bloom/modeling_bloom.py +4 -0
- transformers/models/blt/modeling_blt.py +2 -2
- transformers/models/blt/modular_blt.py +2 -2
- transformers/models/bridgetower/modeling_bridgetower.py +5 -1
- transformers/models/bros/modeling_bros.py +4 -0
- transformers/models/camembert/tokenization_camembert.py +8 -12
- transformers/models/canine/modeling_canine.py +5 -0
- transformers/models/chameleon/modeling_chameleon.py +2 -1
- transformers/models/chinese_clip/modeling_chinese_clip.py +3 -0
- transformers/models/clap/modeling_clap.py +5 -0
- transformers/models/clip/tokenization_clip.py +22 -44
- transformers/models/clipseg/modeling_clipseg.py +5 -0
- transformers/models/clvp/modeling_clvp.py +5 -0
- transformers/models/clvp/tokenization_clvp.py +1 -63
- transformers/models/code_llama/tokenization_code_llama.py +20 -43
- transformers/models/codegen/tokenization_codegen.py +14 -43
- transformers/models/cohere/modeling_cohere.py +4 -3
- transformers/models/cohere/modular_cohere.py +2 -1
- transformers/models/cohere/tokenization_cohere.py +12 -42
- transformers/models/cohere2/modeling_cohere2.py +7 -6
- transformers/models/cohere2/modular_cohere2.py +5 -5
- transformers/models/cohere2_vision/image_processing_cohere2_vision_fast.py +4 -3
- transformers/models/cohere2_vision/modular_cohere2_vision.py +4 -3
- transformers/models/colqwen2/modeling_colqwen2.py +1 -0
- transformers/models/colqwen2/modular_colqwen2.py +1 -0
- transformers/models/conditional_detr/modeling_conditional_detr.py +5 -0
- transformers/models/convbert/modeling_convbert.py +6 -0
- transformers/models/convnext/modeling_convnext.py +2 -4
- transformers/models/convnextv2/modeling_convnextv2.py +2 -4
- transformers/models/csm/modeling_csm.py +4 -3
- transformers/models/ctrl/modeling_ctrl.py +1 -0
- transformers/models/cvt/modeling_cvt.py +2 -0
- transformers/models/cwm/modeling_cwm.py +4 -4
- transformers/models/d_fine/modeling_d_fine.py +2 -0
- transformers/models/d_fine/modular_d_fine.py +1 -0
- transformers/models/dab_detr/modeling_dab_detr.py +4 -0
- transformers/models/dac/modeling_dac.py +2 -2
- transformers/models/data2vec/modeling_data2vec_audio.py +5 -0
- transformers/models/data2vec/modeling_data2vec_vision.py +4 -1
- transformers/models/dbrx/modeling_dbrx.py +2 -2
- transformers/models/deberta/modeling_deberta.py +5 -0
- transformers/models/deberta/tokenization_deberta.py +11 -20
- transformers/models/deberta_v2/modeling_deberta_v2.py +6 -0
- transformers/models/deberta_v2/tokenization_deberta_v2.py +13 -28
- transformers/models/decision_transformer/modeling_decision_transformer.py +4 -1
- transformers/models/deepseek_v2/modeling_deepseek_v2.py +2 -3
- transformers/models/deepseek_v2/modular_deepseek_v2.py +2 -2
- transformers/models/deepseek_v3/modeling_deepseek_v3.py +3 -2
- transformers/models/deepseek_v3/modular_deepseek_v3.py +1 -0
- transformers/models/deformable_detr/modeling_deformable_detr.py +4 -0
- transformers/models/depth_anything/modeling_depth_anything.py +1 -0
- transformers/models/depth_pro/modeling_depth_pro.py +2 -0
- transformers/models/detr/modeling_detr.py +5 -0
- transformers/models/dia/modeling_dia.py +4 -3
- transformers/models/dia/modular_dia.py +0 -1
- transformers/models/diffllama/modeling_diffllama.py +2 -2
- transformers/models/dinat/modeling_dinat.py +3 -0
- transformers/models/dinov3_convnext/modeling_dinov3_convnext.py +1 -1
- transformers/models/dinov3_vit/modeling_dinov3_vit.py +2 -2
- transformers/models/dinov3_vit/modular_dinov3_vit.py +2 -2
- transformers/models/distilbert/tokenization_distilbert.py +13 -0
- transformers/models/doge/modeling_doge.py +2 -3
- transformers/models/doge/modular_doge.py +0 -1
- transformers/models/donut/modeling_donut_swin.py +2 -0
- transformers/models/dots1/modeling_dots1.py +10 -7
- transformers/models/dots1/modular_dots1.py +5 -3
- transformers/models/dpr/modeling_dpr.py +5 -0
- transformers/models/dpr/tokenization_dpr.py +12 -0
- transformers/models/edgetam/modeling_edgetam.py +1 -1
- transformers/models/edgetam_video/modeling_edgetam_video.py +1 -0
- transformers/models/edgetam_video/modular_edgetam_video.py +1 -0
- transformers/models/efficientloftr/modeling_efficientloftr.py +2 -2
- transformers/models/efficientnet/modeling_efficientnet.py +2 -0
- transformers/models/emu3/modeling_emu3.py +4 -4
- transformers/models/eomt/image_processing_eomt.py +13 -1
- transformers/models/eomt/image_processing_eomt_fast.py +14 -2
- transformers/models/ernie4_5/modeling_ernie4_5.py +4 -4
- transformers/models/ernie4_5/modular_ernie4_5.py +2 -1
- transformers/models/ernie4_5_moe/modeling_ernie4_5_moe.py +5 -5
- transformers/models/ernie4_5_moe/modular_ernie4_5_moe.py +2 -2
- transformers/models/esm/modeling_esmfold.py +5 -4
- transformers/models/evolla/modeling_evolla.py +4 -4
- transformers/models/exaone4/modeling_exaone4.py +2 -2
- transformers/models/exaone4/modular_exaone4.py +0 -1
- transformers/models/falcon/modeling_falcon.py +6 -1
- transformers/models/falcon_h1/modeling_falcon_h1.py +4 -3
- transformers/models/falcon_mamba/modeling_falcon_mamba.py +25 -35
- transformers/models/falcon_mamba/modular_falcon_mamba.py +12 -31
- transformers/{kernels/falcon_mamba → models/fast_vlm}/__init__.py +15 -3
- transformers/models/fast_vlm/configuration_fast_vlm.py +137 -0
- transformers/models/fast_vlm/modeling_fast_vlm.py +455 -0
- transformers/models/fast_vlm/modular_fast_vlm.py +273 -0
- transformers/models/fastspeech2_conformer/modeling_fastspeech2_conformer.py +8 -3
- transformers/models/flaubert/modeling_flaubert.py +7 -0
- transformers/models/flava/modeling_flava.py +6 -1
- transformers/models/flex_olmo/modeling_flex_olmo.py +4 -5
- transformers/models/florence2/modeling_florence2.py +2 -1
- transformers/models/florence2/modular_florence2.py +2 -1
- transformers/models/fnet/modeling_fnet.py +7 -0
- transformers/models/focalnet/modeling_focalnet.py +4 -0
- transformers/models/fsmt/modeling_fsmt.py +2 -0
- transformers/models/funnel/modeling_funnel.py +8 -0
- transformers/models/funnel/tokenization_funnel.py +17 -24
- transformers/models/fuyu/processing_fuyu.py +3 -3
- transformers/models/gemma/modeling_gemma.py +4 -4
- transformers/models/gemma/tokenization_gemma.py +10 -27
- transformers/models/gemma2/modeling_gemma2.py +4 -4
- transformers/models/gemma2/modular_gemma2.py +2 -1
- transformers/models/gemma3/modeling_gemma3.py +14 -84
- transformers/models/gemma3/modular_gemma3.py +12 -81
- transformers/models/gemma3n/modeling_gemma3n.py +18 -209
- transformers/models/gemma3n/modular_gemma3n.py +17 -59
- transformers/models/git/modeling_git.py +2 -0
- transformers/models/glm/modeling_glm.py +4 -4
- transformers/models/glm4/modeling_glm4.py +4 -4
- transformers/models/glm4_moe/modeling_glm4_moe.py +5 -3
- transformers/models/glm4v/configuration_glm4v.py +3 -1
- transformers/models/glm4v/modeling_glm4v.py +3 -3
- transformers/models/glm4v/modular_glm4v.py +6 -4
- transformers/models/glm4v_moe/configuration_glm4v_moe.py +3 -1
- transformers/models/glm4v_moe/modeling_glm4v_moe.py +6 -5
- transformers/models/glm4v_moe/modular_glm4v_moe.py +1 -1
- transformers/models/glpn/modeling_glpn.py +2 -0
- transformers/models/gpt2/modeling_gpt2.py +5 -1
- transformers/models/gpt2/tokenization_gpt2.py +16 -44
- transformers/models/gpt_bigcode/modeling_gpt_bigcode.py +1 -0
- transformers/models/gpt_neo/modeling_gpt_neo.py +4 -0
- transformers/models/gpt_neox/modeling_gpt_neox.py +5 -2
- transformers/models/gpt_neox/modular_gpt_neox.py +3 -0
- transformers/models/gpt_neox/tokenization_gpt_neox.py +10 -49
- transformers/models/gpt_neox_japanese/modeling_gpt_neox_japanese.py +3 -1
- transformers/models/gpt_oss/modeling_gpt_oss.py +5 -6
- transformers/models/gpt_oss/modular_gpt_oss.py +3 -5
- transformers/models/gptj/modeling_gptj.py +3 -0
- transformers/models/granite/modeling_granite.py +4 -4
- transformers/models/granitemoe/modeling_granitemoe.py +4 -6
- transformers/models/granitemoe/modular_granitemoe.py +0 -2
- transformers/models/granitemoehybrid/modeling_granitemoehybrid.py +4 -6
- transformers/models/granitemoeshared/modeling_granitemoeshared.py +4 -6
- transformers/models/grounding_dino/modeling_grounding_dino.py +4 -0
- transformers/models/groupvit/modeling_groupvit.py +3 -0
- transformers/models/helium/modeling_helium.py +4 -3
- transformers/models/herbert/tokenization_herbert.py +9 -25
- transformers/models/hgnet_v2/modeling_hgnet_v2.py +6 -1
- transformers/models/hgnet_v2/modular_hgnet_v2.py +6 -1
- transformers/models/hiera/modeling_hiera.py +4 -0
- transformers/models/hubert/modeling_hubert.py +3 -0
- transformers/models/hubert/modular_hubert.py +1 -0
- transformers/models/hunyuan_v1_dense/modeling_hunyuan_v1_dense.py +4 -4
- transformers/models/hunyuan_v1_moe/modeling_hunyuan_v1_moe.py +4 -4
- transformers/models/ibert/modeling_ibert.py +6 -0
- transformers/models/idefics/modeling_idefics.py +5 -21
- transformers/models/imagegpt/modeling_imagegpt.py +2 -1
- transformers/models/informer/modeling_informer.py +4 -0
- transformers/models/informer/modular_informer.py +1 -0
- transformers/models/internvl/modeling_internvl.py +2 -4
- transformers/models/internvl/modular_internvl.py +2 -4
- transformers/models/jamba/modeling_jamba.py +2 -2
- transformers/models/janus/modeling_janus.py +1 -0
- transformers/models/janus/modular_janus.py +1 -0
- transformers/models/jetmoe/modeling_jetmoe.py +2 -2
- transformers/models/kosmos2/modeling_kosmos2.py +1 -0
- transformers/models/kyutai_speech_to_text/modeling_kyutai_speech_to_text.py +3 -1
- transformers/models/lasr/__init__.py +29 -0
- transformers/models/lasr/configuration_lasr.py +244 -0
- transformers/models/lasr/feature_extraction_lasr.py +277 -0
- transformers/models/lasr/modeling_lasr.py +729 -0
- transformers/models/lasr/modular_lasr.py +569 -0
- transformers/models/lasr/processing_lasr.py +96 -0
- transformers/models/lasr/tokenization_lasr.py +186 -0
- transformers/models/layoutlm/modeling_layoutlm.py +5 -0
- transformers/models/layoutlmv2/modeling_layoutlmv2.py +4 -0
- transformers/models/layoutlmv2/tokenization_layoutlmv2.py +10 -53
- transformers/models/layoutlmv3/modeling_layoutlmv3.py +4 -0
- transformers/models/layoutlmv3/tokenization_layoutlmv3.py +12 -61
- transformers/models/layoutxlm/tokenization_layoutxlm.py +13 -38
- transformers/models/led/modeling_led.py +6 -0
- transformers/models/levit/modeling_levit.py +3 -0
- transformers/models/lfm2/modeling_lfm2.py +4 -5
- transformers/models/lfm2/modular_lfm2.py +0 -1
- transformers/models/lfm2_moe/modeling_lfm2_moe.py +4 -5
- transformers/models/lightglue/modeling_lightglue.py +3 -1
- transformers/models/lightglue/modular_lightglue.py +1 -0
- transformers/models/lilt/modeling_lilt.py +4 -0
- transformers/models/llama/modeling_llama.py +4 -4
- transformers/models/llama/tokenization_llama.py +15 -43
- transformers/models/llama4/modeling_llama4.py +3 -2
- transformers/models/longcat_flash/modeling_longcat_flash.py +4 -4
- transformers/models/longcat_flash/modular_longcat_flash.py +2 -2
- transformers/models/longformer/modeling_longformer.py +6 -0
- transformers/models/longt5/modeling_longt5.py +4 -0
- transformers/models/luke/modeling_luke.py +9 -0
- transformers/models/luke/tokenization_luke.py +11 -38
- transformers/models/lxmert/modeling_lxmert.py +2 -0
- transformers/models/m2m_100/modeling_m2m_100.py +4 -0
- transformers/models/mamba/modeling_mamba.py +14 -22
- transformers/models/marian/modeling_marian.py +5 -0
- transformers/models/markuplm/modeling_markuplm.py +4 -0
- transformers/models/markuplm/tokenization_markuplm.py +28 -61
- transformers/models/mask2former/modeling_mask2former.py +2 -0
- transformers/models/maskformer/modeling_maskformer.py +2 -0
- transformers/models/maskformer/modeling_maskformer_swin.py +2 -0
- transformers/models/mbart/modeling_mbart.py +7 -0
- transformers/models/mbart/tokenization_mbart.py +11 -52
- transformers/models/mbart50/tokenization_mbart50.py +7 -10
- transformers/models/megatron_bert/modeling_megatron_bert.py +7 -0
- transformers/models/mgp_str/modeling_mgp_str.py +2 -0
- transformers/models/mimi/modeling_mimi.py +3 -1
- transformers/models/minimax/modeling_minimax.py +4 -4
- transformers/models/ministral/modeling_ministral.py +4 -4
- transformers/models/ministral3/configuration_ministral3.py +1 -1
- transformers/models/ministral3/modeling_ministral3.py +4 -3
- transformers/models/mistral/modeling_mistral.py +4 -3
- transformers/models/mixtral/modeling_mixtral.py +4 -4
- transformers/models/mllama/modeling_mllama.py +2 -2
- transformers/models/mluke/tokenization_mluke.py +6 -6
- transformers/models/mm_grounding_dino/modeling_mm_grounding_dino.py +4 -0
- transformers/models/mobilenet_v1/modeling_mobilenet_v1.py +2 -0
- transformers/models/mobilenet_v2/modeling_mobilenet_v2.py +3 -0
- transformers/models/mobilevit/modeling_mobilevit.py +3 -0
- transformers/models/mobilevitv2/modeling_mobilevitv2.py +3 -0
- transformers/models/modernbert/modeling_modernbert.py +4 -1
- transformers/models/modernbert/modular_modernbert.py +2 -0
- transformers/models/modernbert_decoder/modeling_modernbert_decoder.py +8 -9
- transformers/models/modernbert_decoder/modular_modernbert_decoder.py +6 -7
- transformers/models/moonshine/modeling_moonshine.py +4 -2
- transformers/models/moshi/modeling_moshi.py +5 -2
- transformers/models/mpnet/modeling_mpnet.py +5 -0
- transformers/models/mpnet/tokenization_mpnet.py +5 -13
- transformers/models/mpt/modeling_mpt.py +2 -0
- transformers/models/mra/modeling_mra.py +6 -0
- transformers/models/mt5/modeling_mt5.py +7 -0
- transformers/models/musicgen/modeling_musicgen.py +2 -0
- transformers/models/musicgen_melody/modeling_musicgen_melody.py +3 -0
- transformers/models/mvp/modeling_mvp.py +7 -0
- transformers/models/nanochat/modeling_nanochat.py +4 -4
- transformers/models/nemotron/modeling_nemotron.py +4 -2
- transformers/models/nllb/tokenization_nllb.py +8 -22
- transformers/models/nougat/tokenization_nougat.py +11 -59
- transformers/models/nystromformer/modeling_nystromformer.py +6 -0
- transformers/models/olmo/modeling_olmo.py +4 -4
- transformers/models/olmo/modular_olmo.py +2 -2
- transformers/models/olmo2/modeling_olmo2.py +4 -5
- transformers/models/olmo2/modular_olmo2.py +0 -1
- transformers/models/olmo3/modeling_olmo3.py +4 -4
- transformers/models/olmoe/modeling_olmoe.py +4 -4
- transformers/models/omdet_turbo/modeling_omdet_turbo.py +2 -0
- transformers/models/oneformer/modeling_oneformer.py +4 -1
- transformers/models/openai/modeling_openai.py +3 -0
- transformers/models/openai/tokenization_openai.py +10 -46
- transformers/models/opt/modeling_opt.py +2 -0
- transformers/models/owlv2/modeling_owlv2.py +4 -0
- transformers/models/owlvit/modeling_owlvit.py +4 -0
- transformers/models/paddleocr_vl/__init__.py +32 -0
- transformers/models/paddleocr_vl/configuration_paddleocr_vl.py +336 -0
- transformers/models/paddleocr_vl/image_processing_paddleocr_vl.py +503 -0
- transformers/models/paddleocr_vl/image_processing_paddleocr_vl_fast.py +209 -0
- transformers/models/paddleocr_vl/modeling_paddleocr_vl.py +1668 -0
- transformers/models/paddleocr_vl/modular_paddleocr_vl.py +1349 -0
- transformers/models/paddleocr_vl/processing_paddleocr_vl.py +135 -0
- transformers/models/parakeet/configuration_parakeet.py +4 -6
- transformers/models/parakeet/modeling_parakeet.py +9 -6
- transformers/models/parakeet/modular_parakeet.py +2 -2
- transformers/models/parakeet/processing_parakeet.py +1 -0
- transformers/models/patchtsmixer/modeling_patchtsmixer.py +6 -0
- transformers/models/patchtst/modeling_patchtst.py +20 -2
- transformers/models/pegasus/modeling_pegasus.py +5 -0
- transformers/models/pegasus/tokenization_pegasus.py +17 -44
- transformers/models/pegasus_x/modeling_pegasus_x.py +4 -0
- transformers/models/perceiver/modeling_perceiver.py +8 -0
- transformers/models/persimmon/modeling_persimmon.py +2 -1
- transformers/models/phi/modeling_phi.py +4 -5
- transformers/models/phi/modular_phi.py +0 -1
- transformers/models/phi3/modeling_phi3.py +2 -1
- transformers/models/phi4_multimodal/modeling_phi4_multimodal.py +5 -5
- transformers/models/phi4_multimodal/modular_phi4_multimodal.py +4 -4
- transformers/models/phimoe/modeling_phimoe.py +4 -4
- transformers/models/phimoe/modular_phimoe.py +2 -2
- transformers/models/pix2struct/modeling_pix2struct.py +2 -0
- transformers/models/pixtral/modeling_pixtral.py +2 -1
- transformers/models/plbart/modeling_plbart.py +6 -0
- transformers/models/plbart/modular_plbart.py +2 -0
- transformers/models/plbart/tokenization_plbart.py +0 -2
- transformers/models/poolformer/modeling_poolformer.py +2 -0
- transformers/models/pop2piano/modeling_pop2piano.py +2 -0
- transformers/models/prompt_depth_anything/modeling_prompt_depth_anything.py +1 -0
- transformers/models/prompt_depth_anything/modular_prompt_depth_anything.py +1 -0
- transformers/models/prophetnet/modeling_prophetnet.py +3 -0
- transformers/models/pvt/modeling_pvt.py +2 -0
- transformers/models/pvt_v2/modeling_pvt_v2.py +3 -0
- transformers/models/qwen2/modeling_qwen2.py +4 -4
- transformers/models/qwen2/tokenization_qwen2.py +14 -18
- transformers/models/qwen2_5_omni/configuration_qwen2_5_omni.py +4 -2
- transformers/models/qwen2_5_omni/modeling_qwen2_5_omni.py +13 -16
- transformers/models/qwen2_5_omni/modular_qwen2_5_omni.py +14 -16
- transformers/models/qwen2_5_vl/configuration_qwen2_5_vl.py +1 -1
- transformers/models/qwen2_5_vl/modeling_qwen2_5_vl.py +5 -6
- transformers/models/qwen2_5_vl/modular_qwen2_5_vl.py +3 -5
- transformers/models/qwen2_audio/modeling_qwen2_audio.py +2 -0
- transformers/models/qwen2_moe/modeling_qwen2_moe.py +4 -4
- transformers/models/qwen2_vl/configuration_qwen2_vl.py +1 -1
- transformers/models/qwen2_vl/modeling_qwen2_vl.py +6 -16
- transformers/models/qwen3/modeling_qwen3.py +4 -4
- transformers/models/qwen3_moe/modeling_qwen3_moe.py +4 -4
- transformers/models/qwen3_next/modeling_qwen3_next.py +4 -3
- transformers/models/qwen3_omni_moe/modeling_qwen3_omni_moe.py +21 -23
- transformers/models/qwen3_omni_moe/modular_qwen3_omni_moe.py +14 -16
- transformers/models/qwen3_vl/modeling_qwen3_vl.py +39 -37
- transformers/models/qwen3_vl/modular_qwen3_vl.py +37 -35
- transformers/models/qwen3_vl_moe/modeling_qwen3_vl_moe.py +39 -37
- transformers/models/qwen3_vl_moe/modular_qwen3_vl_moe.py +4 -1
- transformers/models/rag/modeling_rag.py +1 -0
- transformers/models/recurrent_gemma/modeling_recurrent_gemma.py +15 -1
- transformers/models/reformer/modeling_reformer.py +4 -0
- transformers/models/reformer/tokenization_reformer.py +11 -28
- transformers/models/regnet/modeling_regnet.py +6 -1
- transformers/models/rembert/modeling_rembert.py +6 -0
- transformers/models/rembert/tokenization_rembert.py +3 -10
- transformers/models/resnet/modeling_resnet.py +11 -2
- transformers/models/roberta/tokenization_roberta.py +18 -27
- transformers/models/roformer/modeling_roformer.py +6 -0
- transformers/models/roformer/tokenization_roformer.py +77 -412
- transformers/models/rt_detr/modeling_rt_detr.py +2 -0
- transformers/models/rt_detr/modeling_rt_detr_resnet.py +5 -1
- transformers/models/rt_detr_v2/modeling_rt_detr_v2.py +2 -0
- transformers/models/rwkv/modeling_rwkv.py +1 -0
- transformers/models/sam2/modeling_sam2.py +2 -2
- transformers/models/sam2/modular_sam2.py +2 -2
- transformers/models/sam2_video/modeling_sam2_video.py +1 -0
- transformers/models/sam2_video/modular_sam2_video.py +1 -0
- transformers/models/sam3/modeling_sam3.py +77 -80
- transformers/models/sam3_tracker/modeling_sam3_tracker.py +6 -1
- transformers/models/sam3_tracker/modular_sam3_tracker.py +6 -1
- transformers/models/sam3_tracker_video/modeling_sam3_tracker_video.py +1 -0
- transformers/models/sam3_video/modeling_sam3_video.py +1 -0
- transformers/models/seamless_m4t/modeling_seamless_m4t.py +5 -1
- transformers/models/seamless_m4t/tokenization_seamless_m4t.py +27 -59
- transformers/models/seamless_m4t_v2/modeling_seamless_m4t_v2.py +5 -1
- transformers/models/seed_oss/modeling_seed_oss.py +2 -2
- transformers/models/segformer/modeling_segformer.py +4 -1
- transformers/models/seggpt/modeling_seggpt.py +2 -0
- transformers/models/sew/modeling_sew.py +3 -0
- transformers/models/sew/modular_sew.py +1 -0
- transformers/models/sew_d/modeling_sew_d.py +3 -0
- transformers/models/siglip2/modeling_siglip2.py +4 -0
- transformers/models/siglip2/modular_siglip2.py +4 -0
- transformers/models/smollm3/modeling_smollm3.py +4 -4
- transformers/models/smolvlm/processing_smolvlm.py +0 -7
- transformers/models/speech_to_text/modeling_speech_to_text.py +4 -0
- transformers/models/speecht5/modeling_speecht5.py +13 -1
- transformers/models/splinter/modeling_splinter.py +3 -0
- transformers/models/splinter/tokenization_splinter.py +9 -28
- transformers/models/squeezebert/modeling_squeezebert.py +6 -0
- transformers/models/stablelm/modeling_stablelm.py +3 -1
- transformers/models/starcoder2/modeling_starcoder2.py +4 -3
- transformers/models/superglue/modeling_superglue.py +1 -0
- transformers/models/superpoint/modeling_superpoint.py +1 -0
- transformers/models/swiftformer/modeling_swiftformer.py +2 -0
- transformers/models/swin/modeling_swin.py +4 -0
- transformers/models/swin2sr/modeling_swin2sr.py +2 -0
- transformers/models/swinv2/modeling_swinv2.py +4 -0
- transformers/models/t5/modeling_t5.py +7 -0
- transformers/models/t5/tokenization_t5.py +4 -8
- transformers/models/t5gemma/modeling_t5gemma.py +5 -5
- transformers/models/t5gemma2/modeling_t5gemma2.py +6 -6
- transformers/models/table_transformer/modeling_table_transformer.py +4 -0
- transformers/models/tapas/modeling_tapas.py +3 -0
- transformers/models/textnet/modeling_textnet.py +11 -2
- transformers/models/time_series_transformer/modeling_time_series_transformer.py +4 -0
- transformers/models/timesfm/modeling_timesfm.py +2 -0
- transformers/models/timesfm/modular_timesfm.py +2 -0
- transformers/models/timesformer/modeling_timesformer.py +2 -0
- transformers/models/timm_wrapper/modeling_timm_wrapper.py +1 -1
- transformers/models/trocr/modeling_trocr.py +2 -0
- transformers/models/tvp/modeling_tvp.py +2 -0
- transformers/models/udop/modeling_udop.py +4 -0
- transformers/models/udop/tokenization_udop.py +5 -13
- transformers/models/umt5/modeling_umt5.py +7 -0
- transformers/models/unispeech/modeling_unispeech.py +4 -0
- transformers/models/unispeech/modular_unispeech.py +2 -0
- transformers/models/unispeech_sat/modeling_unispeech_sat.py +6 -0
- transformers/models/unispeech_sat/modular_unispeech_sat.py +2 -0
- transformers/models/univnet/modeling_univnet.py +1 -0
- transformers/models/upernet/modeling_upernet.py +1 -0
- transformers/models/vaultgemma/modeling_vaultgemma.py +4 -4
- transformers/models/vilt/modeling_vilt.py +6 -0
- transformers/models/vision_text_dual_encoder/modeling_vision_text_dual_encoder.py +1 -0
- transformers/models/visual_bert/modeling_visual_bert.py +6 -0
- transformers/models/vitdet/modeling_vitdet.py +2 -0
- transformers/models/vitmatte/modeling_vitmatte.py +1 -0
- transformers/models/vits/modeling_vits.py +1 -0
- transformers/models/vjepa2/modeling_vjepa2.py +1 -0
- transformers/models/wav2vec2/modeling_wav2vec2.py +7 -0
- transformers/models/wav2vec2_bert/modeling_wav2vec2_bert.py +5 -0
- transformers/models/wav2vec2_bert/modular_wav2vec2_bert.py +5 -0
- transformers/models/wav2vec2_conformer/modeling_wav2vec2_conformer.py +6 -0
- transformers/models/wavlm/modeling_wavlm.py +5 -0
- transformers/models/whisper/modeling_whisper.py +6 -0
- transformers/models/whisper/tokenization_whisper.py +4 -15
- transformers/models/x_clip/modeling_x_clip.py +3 -0
- transformers/models/xglm/modeling_xglm.py +1 -0
- transformers/models/xglm/tokenization_xglm.py +4 -9
- transformers/models/xlm/modeling_xlm.py +5 -0
- transformers/models/xlm_roberta/tokenization_xlm_roberta.py +9 -16
- transformers/models/xlnet/tokenization_xlnet.py +3 -7
- transformers/models/yoso/modeling_yoso.py +6 -0
- transformers/models/zamba/modeling_zamba.py +2 -0
- transformers/models/zamba2/modeling_zamba2.py +4 -2
- transformers/models/zamba2/modular_zamba2.py +1 -1
- transformers/models/zoedepth/modeling_zoedepth.py +1 -0
- transformers/pipelines/__init__.py +2 -3
- transformers/pipelines/base.py +1 -9
- transformers/pipelines/document_question_answering.py +3 -1
- transformers/pipelines/text_generation.py +1 -1
- transformers/processing_utils.py +23 -11
- transformers/quantizers/base.py +35 -110
- transformers/quantizers/quantizer_aqlm.py +1 -5
- transformers/quantizers/quantizer_auto_round.py +1 -2
- transformers/quantizers/quantizer_awq.py +17 -81
- transformers/quantizers/quantizer_bitnet.py +3 -8
- transformers/quantizers/quantizer_bnb_4bit.py +13 -110
- transformers/quantizers/quantizer_bnb_8bit.py +16 -92
- transformers/quantizers/quantizer_compressed_tensors.py +1 -5
- transformers/quantizers/quantizer_eetq.py +14 -62
- transformers/quantizers/quantizer_fbgemm_fp8.py +34 -125
- transformers/quantizers/quantizer_finegrained_fp8.py +13 -105
- transformers/quantizers/quantizer_fp_quant.py +48 -78
- transformers/quantizers/quantizer_gptq.py +7 -24
- transformers/quantizers/quantizer_higgs.py +40 -54
- transformers/quantizers/quantizer_hqq.py +144 -153
- transformers/quantizers/quantizer_mxfp4.py +13 -167
- transformers/quantizers/quantizer_quanto.py +20 -64
- transformers/quantizers/quantizer_quark.py +36 -17
- transformers/quantizers/quantizer_spqr.py +1 -4
- transformers/quantizers/quantizer_torchao.py +23 -202
- transformers/quantizers/quantizer_vptq.py +8 -22
- transformers/quantizers/quantizers_utils.py +20 -0
- transformers/testing_utils.py +297 -36
- transformers/tokenization_mistral_common.py +4 -0
- transformers/tokenization_utils_base.py +113 -222
- transformers/tokenization_utils_tokenizers.py +168 -107
- transformers/trainer.py +28 -31
- transformers/trainer_jit_checkpoint.py +126 -0
- transformers/trainer_utils.py +1 -1
- transformers/training_args.py +66 -28
- transformers/utils/__init__.py +3 -4
- transformers/utils/auto_docstring.py +1 -0
- transformers/utils/generic.py +27 -1
- transformers/utils/hub.py +5 -15
- transformers/utils/import_utils.py +61 -16
- transformers/utils/kernel_config.py +4 -2
- transformers/utils/loading_report.py +19 -10
- transformers/utils/quantization_config.py +75 -242
- transformers/video_processing_utils.py +1 -2
- {transformers-5.0.0rc0.dist-info → transformers-5.0.0rc1.dist-info}/METADATA +274 -227
- {transformers-5.0.0rc0.dist-info → transformers-5.0.0rc1.dist-info}/RECORD +536 -520
- {transformers-5.0.0rc0.dist-info → transformers-5.0.0rc1.dist-info}/WHEEL +1 -1
- transformers/kernels/__init__.py +0 -0
- transformers/kernels/falcon_mamba/selective_scan_with_ln_interface.py +0 -529
- transformers/models/roformer/tokenization_roformer_fast.py +0 -160
- {transformers-5.0.0rc0.dist-info → transformers-5.0.0rc1.dist-info}/entry_points.txt +0 -0
- {transformers-5.0.0rc0.dist-info → transformers-5.0.0rc1.dist-info/licenses}/LICENSE +0 -0
- {transformers-5.0.0rc0.dist-info → transformers-5.0.0rc1.dist-info}/top_level.txt +0 -0
|
@@ -14,7 +14,7 @@
|
|
|
14
14
|
# limitations under the License.
|
|
15
15
|
"""Tokenization classes for XGLM."""
|
|
16
16
|
|
|
17
|
-
from typing import Optional
|
|
17
|
+
from typing import Optional, Union
|
|
18
18
|
|
|
19
19
|
from tokenizers import Regex, Tokenizer, decoders, normalizers, pre_tokenizers, processors
|
|
20
20
|
from tokenizers.models import Unigram
|
|
@@ -50,7 +50,7 @@ class XGLMTokenizer(TokenizersBackend):
|
|
|
50
50
|
The unknown token.
|
|
51
51
|
pad_token (`str`, *optional*, defaults to `"<pad>"`):
|
|
52
52
|
The token used for padding.
|
|
53
|
-
vocab (`dict`, *optional*):
|
|
53
|
+
vocab (`str`, `dict` or `list`, *optional*):
|
|
54
54
|
Custom vocabulary dictionary. If not provided, a minimal vocabulary is created.
|
|
55
55
|
merges (`list[tuple[str, str]]`, *optional*):
|
|
56
56
|
Custom merge rules for BPE. If not provided, merges are generated from the vocabulary.
|
|
@@ -60,18 +60,17 @@ class XGLMTokenizer(TokenizersBackend):
|
|
|
60
60
|
|
|
61
61
|
vocab_files_names = VOCAB_FILES_NAMES
|
|
62
62
|
model_input_names = ["input_ids", "attention_mask"]
|
|
63
|
-
|
|
63
|
+
model = Unigram
|
|
64
64
|
|
|
65
65
|
def __init__(
|
|
66
66
|
self,
|
|
67
|
+
vocab: Optional[Union[str, list[tuple[str, float]]]] = None,
|
|
67
68
|
bos_token: str = "<s>",
|
|
68
69
|
eos_token: str = "</s>",
|
|
69
70
|
sep_token: str = "</s>",
|
|
70
71
|
cls_token: str = "<s>",
|
|
71
72
|
unk_token: str = "<unk>",
|
|
72
73
|
pad_token: str = "<pad>",
|
|
73
|
-
vocab: Optional[dict] = None,
|
|
74
|
-
merges: Optional[list[tuple[str, str]]] = None,
|
|
75
74
|
add_prefix_space: bool = True,
|
|
76
75
|
**kwargs,
|
|
77
76
|
):
|
|
@@ -106,11 +105,7 @@ class XGLMTokenizer(TokenizersBackend):
|
|
|
106
105
|
prepend_scheme = "always" if add_prefix_space else "never"
|
|
107
106
|
self._tokenizer.pre_tokenizer = pre_tokenizers.Metaspace(replacement="▁", prepend_scheme=prepend_scheme)
|
|
108
107
|
self._tokenizer.decoder = decoders.Metaspace(replacement="▁", prepend_scheme=prepend_scheme)
|
|
109
|
-
|
|
110
|
-
tokenizer_object = self._tokenizer
|
|
111
|
-
|
|
112
108
|
super().__init__(
|
|
113
|
-
tokenizer_object=tokenizer_object,
|
|
114
109
|
bos_token=bos_token,
|
|
115
110
|
eos_token=eos_token,
|
|
116
111
|
sep_token=sep_token,
|
|
@@ -1082,6 +1082,7 @@ class XLMForSequenceClassification(XLMPreTrainedModel):
|
|
|
1082
1082
|
output_attentions: Optional[bool] = None,
|
|
1083
1083
|
output_hidden_states: Optional[bool] = None,
|
|
1084
1084
|
return_dict: Optional[bool] = None,
|
|
1085
|
+
**kwargs,
|
|
1085
1086
|
) -> Union[tuple, SequenceClassifierOutput]:
|
|
1086
1087
|
r"""
|
|
1087
1088
|
langs (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
|
|
@@ -1190,6 +1191,7 @@ class XLMForQuestionAnsweringSimple(XLMPreTrainedModel):
|
|
|
1190
1191
|
output_attentions: Optional[bool] = None,
|
|
1191
1192
|
output_hidden_states: Optional[bool] = None,
|
|
1192
1193
|
return_dict: Optional[bool] = None,
|
|
1194
|
+
**kwargs,
|
|
1193
1195
|
) -> Union[tuple, QuestionAnsweringModelOutput]:
|
|
1194
1196
|
r"""
|
|
1195
1197
|
langs (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
|
|
@@ -1291,6 +1293,7 @@ class XLMForQuestionAnswering(XLMPreTrainedModel):
|
|
|
1291
1293
|
output_attentions: Optional[bool] = None,
|
|
1292
1294
|
output_hidden_states: Optional[bool] = None,
|
|
1293
1295
|
return_dict: Optional[bool] = None,
|
|
1296
|
+
**kwargs,
|
|
1294
1297
|
) -> Union[tuple, XLMForQuestionAnsweringOutput]:
|
|
1295
1298
|
r"""
|
|
1296
1299
|
langs (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
|
|
@@ -1406,6 +1409,7 @@ class XLMForTokenClassification(XLMPreTrainedModel):
|
|
|
1406
1409
|
output_attentions: Optional[bool] = None,
|
|
1407
1410
|
output_hidden_states: Optional[bool] = None,
|
|
1408
1411
|
return_dict: Optional[bool] = None,
|
|
1412
|
+
**kwargs,
|
|
1409
1413
|
) -> Union[tuple, TokenClassifierOutput]:
|
|
1410
1414
|
r"""
|
|
1411
1415
|
langs (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
|
|
@@ -1491,6 +1495,7 @@ class XLMForMultipleChoice(XLMPreTrainedModel):
|
|
|
1491
1495
|
output_attentions: Optional[bool] = None,
|
|
1492
1496
|
output_hidden_states: Optional[bool] = None,
|
|
1493
1497
|
return_dict: Optional[bool] = None,
|
|
1498
|
+
**kwargs,
|
|
1494
1499
|
) -> Union[tuple, MultipleChoiceModelOutput]:
|
|
1495
1500
|
r"""
|
|
1496
1501
|
input_ids (`torch.LongTensor` of shape `(batch_size, num_choices, sequence_length)`):
|
|
@@ -14,7 +14,7 @@
|
|
|
14
14
|
# limitations under the License
|
|
15
15
|
"""Tokenization classes for XLM-RoBERTa model (Tokenizers backend)."""
|
|
16
16
|
|
|
17
|
-
from typing import Optional
|
|
17
|
+
from typing import Optional, Union
|
|
18
18
|
|
|
19
19
|
from tokenizers import Tokenizer, decoders, normalizers, pre_tokenizers, processors
|
|
20
20
|
from tokenizers.models import Unigram
|
|
@@ -47,16 +47,17 @@ class XLMRobertaTokenizer(TokenizersBackend):
|
|
|
47
47
|
pad_token (`str`, optional, defaults to `"<pad>"`): The padding token.
|
|
48
48
|
mask_token (`str`, optional, defaults to `"<mask>"`): The mask token.
|
|
49
49
|
add_prefix_space (`bool`, optional, defaults to `True`): Whether to add an initial space.
|
|
50
|
-
vocab (`dict`, optional): Custom vocabulary dictionary.
|
|
51
|
-
merges (`list`, optional): Custom merges list.
|
|
50
|
+
vocab (`str`, `dict` or `list`, optional): Custom vocabulary dictionary.
|
|
52
51
|
"""
|
|
53
52
|
|
|
54
53
|
vocab_files_names = VOCAB_FILES_NAMES
|
|
55
54
|
model_input_names = ["input_ids", "attention_mask"]
|
|
56
|
-
|
|
55
|
+
model = Unigram
|
|
57
56
|
|
|
58
57
|
def __init__(
|
|
59
58
|
self,
|
|
59
|
+
vocab: Optional[Union[str, list[tuple[str, float]]]] = None,
|
|
60
|
+
add_prefix_space: bool = True,
|
|
60
61
|
bos_token: str = "<s>",
|
|
61
62
|
eos_token: str = "</s>",
|
|
62
63
|
sep_token: str = "</s>",
|
|
@@ -64,9 +65,6 @@ class XLMRobertaTokenizer(TokenizersBackend):
|
|
|
64
65
|
unk_token: str = "<unk>",
|
|
65
66
|
pad_token: str = "<pad>",
|
|
66
67
|
mask_token: str = "<mask>",
|
|
67
|
-
add_prefix_space: bool = True,
|
|
68
|
-
vocab: Optional[dict] = None,
|
|
69
|
-
vocab_file: Optional[str] = None,
|
|
70
68
|
**kwargs,
|
|
71
69
|
):
|
|
72
70
|
self.add_prefix_space = add_prefix_space
|
|
@@ -99,11 +97,7 @@ class XLMRobertaTokenizer(TokenizersBackend):
|
|
|
99
97
|
]
|
|
100
98
|
)
|
|
101
99
|
self._tokenizer.decoder = decoders.Metaspace(replacement="▁", prepend_scheme=prepend_scheme)
|
|
102
|
-
|
|
103
|
-
tokenizer_object = self._tokenizer
|
|
104
|
-
|
|
105
100
|
super().__init__(
|
|
106
|
-
tokenizer_object=tokenizer_object,
|
|
107
101
|
bos_token=bos_token,
|
|
108
102
|
eos_token=eos_token,
|
|
109
103
|
sep_token=sep_token,
|
|
@@ -116,14 +110,13 @@ class XLMRobertaTokenizer(TokenizersBackend):
|
|
|
116
110
|
)
|
|
117
111
|
|
|
118
112
|
self._tokenizer.post_processor = processors.TemplateProcessing(
|
|
119
|
-
single=["$A",
|
|
120
|
-
pair=["$A",
|
|
113
|
+
single=[str(bos_token), "$A", str(eos_token)],
|
|
114
|
+
pair=[str(bos_token), "$A", str(eos_token), "$B", str(eos_token)],
|
|
121
115
|
special_tokens=[
|
|
122
|
-
(
|
|
116
|
+
(str(bos_token), self.bos_token_id),
|
|
117
|
+
(str(eos_token), self.eos_token_id),
|
|
123
118
|
],
|
|
124
119
|
)
|
|
125
120
|
|
|
126
|
-
self.vocab_file = vocab_file
|
|
127
|
-
|
|
128
121
|
|
|
129
122
|
__all__ = ["XLMRobertaTokenizer"]
|
|
@@ -14,7 +14,7 @@
|
|
|
14
14
|
# limitations under the License.
|
|
15
15
|
"""Tokenization classes for XLNet model."""
|
|
16
16
|
|
|
17
|
-
from typing import Optional
|
|
17
|
+
from typing import Optional, Union
|
|
18
18
|
|
|
19
19
|
from tokenizers import AddedToken, Regex, Tokenizer, decoders, normalizers, pre_tokenizers, processors
|
|
20
20
|
from tokenizers.models import Unigram
|
|
@@ -98,10 +98,11 @@ class XLNetTokenizer(TokenizersBackend):
|
|
|
98
98
|
|
|
99
99
|
vocab_files_names = VOCAB_FILES_NAMES
|
|
100
100
|
padding_side = "left"
|
|
101
|
+
model = Unigram
|
|
101
102
|
|
|
102
103
|
def __init__(
|
|
103
104
|
self,
|
|
104
|
-
vocab: Optional[list] = None,
|
|
105
|
+
vocab: Optional[Union[str, list[tuple[str, float]]]] = None,
|
|
105
106
|
unk_id: int = 0,
|
|
106
107
|
do_lower_case=False,
|
|
107
108
|
remove_space=True,
|
|
@@ -159,13 +160,8 @@ class XLNetTokenizer(TokenizersBackend):
|
|
|
159
160
|
self.do_lower_case = do_lower_case
|
|
160
161
|
self.remove_space = remove_space
|
|
161
162
|
self.keep_accents = keep_accents
|
|
162
|
-
|
|
163
163
|
mask_token = AddedToken(mask_token, lstrip=True, rstrip=False) if isinstance(mask_token, str) else mask_token
|
|
164
|
-
|
|
165
|
-
tokenizer_object = self._tokenizer
|
|
166
|
-
|
|
167
164
|
super().__init__(
|
|
168
|
-
tokenizer_object=tokenizer_object,
|
|
169
165
|
unk_id=unk_id,
|
|
170
166
|
do_lower_case=do_lower_case,
|
|
171
167
|
remove_space=remove_space,
|
|
@@ -642,6 +642,7 @@ class YosoModel(YosoPreTrainedModel):
|
|
|
642
642
|
output_attentions: Optional[bool] = None,
|
|
643
643
|
output_hidden_states: Optional[bool] = None,
|
|
644
644
|
return_dict: Optional[bool] = None,
|
|
645
|
+
**kwargs,
|
|
645
646
|
) -> Union[tuple, BaseModelOutputWithCrossAttentions]:
|
|
646
647
|
output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
|
|
647
648
|
output_hidden_states = (
|
|
@@ -734,6 +735,7 @@ class YosoForMaskedLM(YosoPreTrainedModel):
|
|
|
734
735
|
output_attentions: Optional[bool] = None,
|
|
735
736
|
output_hidden_states: Optional[bool] = None,
|
|
736
737
|
return_dict: Optional[bool] = None,
|
|
738
|
+
**kwargs,
|
|
737
739
|
) -> Union[tuple, MaskedLMOutput]:
|
|
738
740
|
r"""
|
|
739
741
|
labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
|
|
@@ -823,6 +825,7 @@ class YosoForSequenceClassification(YosoPreTrainedModel):
|
|
|
823
825
|
output_attentions: Optional[bool] = None,
|
|
824
826
|
output_hidden_states: Optional[bool] = None,
|
|
825
827
|
return_dict: Optional[bool] = None,
|
|
828
|
+
**kwargs,
|
|
826
829
|
) -> Union[tuple, SequenceClassifierOutput]:
|
|
827
830
|
r"""
|
|
828
831
|
labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
|
|
@@ -904,6 +907,7 @@ class YosoForMultipleChoice(YosoPreTrainedModel):
|
|
|
904
907
|
output_attentions: Optional[bool] = None,
|
|
905
908
|
output_hidden_states: Optional[bool] = None,
|
|
906
909
|
return_dict: Optional[bool] = None,
|
|
910
|
+
**kwargs,
|
|
907
911
|
) -> Union[tuple, MultipleChoiceModelOutput]:
|
|
908
912
|
r"""
|
|
909
913
|
input_ids (`torch.LongTensor` of shape `(batch_size, num_choices, sequence_length)`):
|
|
@@ -1009,6 +1013,7 @@ class YosoForTokenClassification(YosoPreTrainedModel):
|
|
|
1009
1013
|
output_attentions: Optional[bool] = None,
|
|
1010
1014
|
output_hidden_states: Optional[bool] = None,
|
|
1011
1015
|
return_dict: Optional[bool] = None,
|
|
1016
|
+
**kwargs,
|
|
1012
1017
|
) -> Union[tuple, TokenClassifierOutput]:
|
|
1013
1018
|
r"""
|
|
1014
1019
|
labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
|
|
@@ -1085,6 +1090,7 @@ class YosoForQuestionAnswering(YosoPreTrainedModel):
|
|
|
1085
1090
|
output_attentions: Optional[bool] = None,
|
|
1086
1091
|
output_hidden_states: Optional[bool] = None,
|
|
1087
1092
|
return_dict: Optional[bool] = None,
|
|
1093
|
+
**kwargs,
|
|
1088
1094
|
) -> Union[tuple, QuestionAnsweringModelOutput]:
|
|
1089
1095
|
return_dict = return_dict if return_dict is not None else self.config.use_return_dict
|
|
1090
1096
|
|
|
@@ -870,6 +870,7 @@ class ZambaModel(ZambaPreTrainedModel):
|
|
|
870
870
|
output_hidden_states: Optional[bool] = None,
|
|
871
871
|
return_dict: Optional[bool] = None,
|
|
872
872
|
cache_position: Optional[torch.LongTensor] = None,
|
|
873
|
+
**kwargs,
|
|
873
874
|
) -> Union[tuple, BaseModelOutputWithPast]:
|
|
874
875
|
output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
|
|
875
876
|
output_hidden_states = (
|
|
@@ -1192,6 +1193,7 @@ class ZambaForSequenceClassification(ZambaPreTrainedModel):
|
|
|
1192
1193
|
output_attentions: Optional[bool] = None,
|
|
1193
1194
|
output_hidden_states: Optional[bool] = None,
|
|
1194
1195
|
return_dict: Optional[bool] = None,
|
|
1196
|
+
**kwargs,
|
|
1195
1197
|
) -> Union[tuple, SequenceClassifierOutputWithPast]:
|
|
1196
1198
|
r"""
|
|
1197
1199
|
labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
|
|
@@ -41,6 +41,7 @@ from ...modeling_rope_utils import ROPE_INIT_FUNCTIONS, dynamic_rope_update
|
|
|
41
41
|
from ...modeling_utils import ALL_ATTENTION_FUNCTIONS, PreTrainedModel
|
|
42
42
|
from ...processing_utils import Unpack
|
|
43
43
|
from ...utils import auto_docstring, logging
|
|
44
|
+
from ...utils.generic import maybe_autocast
|
|
44
45
|
from ...utils.import_utils import is_causal_conv1d_available, is_mamba_ssm_available
|
|
45
46
|
from .configuration_zamba2 import Zamba2Config
|
|
46
47
|
|
|
@@ -263,7 +264,7 @@ class Zamba2RotaryEmbedding(nn.Module):
|
|
|
263
264
|
position_ids_expanded = position_ids[:, None, :].float()
|
|
264
265
|
|
|
265
266
|
device_type = x.device.type if isinstance(x.device.type, str) and x.device.type != "mps" else "cpu"
|
|
266
|
-
with
|
|
267
|
+
with maybe_autocast(device_type=device_type, enabled=False): # Force float32
|
|
267
268
|
freqs = (inv_freq_expanded.float() @ position_ids_expanded.float()).transpose(1, 2)
|
|
268
269
|
emb = torch.cat((freqs, freqs), dim=-1)
|
|
269
270
|
cos = emb.cos() * self.attention_scaling
|
|
@@ -424,7 +425,6 @@ class Zamba2Attention(nn.Module):
|
|
|
424
425
|
attention_mask: Optional[torch.Tensor] = None,
|
|
425
426
|
past_key_values: Optional[Zamba2HybridDynamicCache] = None,
|
|
426
427
|
position_embeddings: Optional[tuple[torch.Tensor, torch.Tensor]] = None,
|
|
427
|
-
position_ids: Optional[torch.Tensor] = None,
|
|
428
428
|
**kwargs: Unpack[FlashAttentionKwargs],
|
|
429
429
|
) -> tuple[torch.Tensor, Optional[torch.Tensor], Optional[tuple[torch.Tensor]]]:
|
|
430
430
|
input_shape = hidden_states.shape[:-1]
|
|
@@ -1294,6 +1294,7 @@ class Zamba2Model(Zamba2PreTrainedModel):
|
|
|
1294
1294
|
output_hidden_states: Optional[bool] = None,
|
|
1295
1295
|
return_dict: Optional[bool] = None,
|
|
1296
1296
|
cache_position: Optional[torch.LongTensor] = None,
|
|
1297
|
+
**kwargs,
|
|
1297
1298
|
) -> Union[tuple, BaseModelOutputWithPast]:
|
|
1298
1299
|
output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
|
|
1299
1300
|
output_hidden_states = (
|
|
@@ -1638,6 +1639,7 @@ class Zamba2ForSequenceClassification(Zamba2PreTrainedModel):
|
|
|
1638
1639
|
output_attentions: Optional[bool] = None,
|
|
1639
1640
|
output_hidden_states: Optional[bool] = None,
|
|
1640
1641
|
return_dict: Optional[bool] = None,
|
|
1642
|
+
**kwargs,
|
|
1641
1643
|
) -> Union[tuple, SequenceClassifierOutputWithPast]:
|
|
1642
1644
|
r"""
|
|
1643
1645
|
labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
|
|
@@ -232,7 +232,6 @@ class Zamba2Attention(ZambaAttention):
|
|
|
232
232
|
attention_mask: Optional[torch.Tensor] = None,
|
|
233
233
|
past_key_values: Optional[Zamba2HybridDynamicCache] = None,
|
|
234
234
|
position_embeddings: Optional[tuple[torch.Tensor, torch.Tensor]] = None,
|
|
235
|
-
position_ids: Optional[torch.Tensor] = None,
|
|
236
235
|
**kwargs: Unpack[FlashAttentionKwargs],
|
|
237
236
|
) -> tuple[torch.Tensor, Optional[torch.Tensor], Optional[tuple[torch.Tensor]]]:
|
|
238
237
|
input_shape = hidden_states.shape[:-1]
|
|
@@ -993,6 +992,7 @@ class Zamba2Model(ZambaModel, Zamba2PreTrainedModel):
|
|
|
993
992
|
output_hidden_states: Optional[bool] = None,
|
|
994
993
|
return_dict: Optional[bool] = None,
|
|
995
994
|
cache_position: Optional[torch.LongTensor] = None,
|
|
995
|
+
**kwargs,
|
|
996
996
|
) -> Union[tuple, BaseModelOutputWithPast]:
|
|
997
997
|
output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
|
|
998
998
|
output_hidden_states = (
|
|
@@ -1251,6 +1251,7 @@ class ZoeDepthForDepthEstimation(ZoeDepthPreTrainedModel):
|
|
|
1251
1251
|
output_attentions: Optional[bool] = None,
|
|
1252
1252
|
output_hidden_states: Optional[bool] = None,
|
|
1253
1253
|
return_dict: Optional[bool] = None,
|
|
1254
|
+
**kwargs,
|
|
1254
1255
|
) -> Union[tuple[torch.Tensor], DepthEstimatorOutput]:
|
|
1255
1256
|
r"""
|
|
1256
1257
|
labels (`torch.LongTensor` of shape `(batch_size, height, width)`, *optional*):
|
|
@@ -18,7 +18,7 @@ import warnings
|
|
|
18
18
|
from pathlib import Path
|
|
19
19
|
from typing import TYPE_CHECKING, Any, Optional, Union
|
|
20
20
|
|
|
21
|
-
from huggingface_hub import model_info
|
|
21
|
+
from huggingface_hub import is_offline_mode, model_info
|
|
22
22
|
|
|
23
23
|
from ..configuration_utils import PreTrainedConfig
|
|
24
24
|
from ..dynamic_module_utils import get_class_from_dynamic_module
|
|
@@ -38,7 +38,6 @@ from ..utils import (
|
|
|
38
38
|
extract_commit_hash,
|
|
39
39
|
find_adapter_config_file,
|
|
40
40
|
is_kenlm_available,
|
|
41
|
-
is_offline_mode,
|
|
42
41
|
is_peft_available,
|
|
43
42
|
is_pyctcdecode_available,
|
|
44
43
|
is_torch_available,
|
|
@@ -278,7 +277,7 @@ SUPPORTED_TASKS = {
|
|
|
278
277
|
"image-to-text": {
|
|
279
278
|
"impl": ImageToTextPipeline,
|
|
280
279
|
"pt": (AutoModelForImageTextToText,) if is_torch_available() else (),
|
|
281
|
-
"default": {"model": ("ydshieh/vit-gpt2-coco-en", "
|
|
280
|
+
"default": {"model": ("ydshieh/vit-gpt2-coco-en", "e460201")},
|
|
282
281
|
"type": "multimodal",
|
|
283
282
|
},
|
|
284
283
|
"image-text-to-text": {
|
transformers/pipelines/base.py
CHANGED
|
@@ -950,20 +950,13 @@ class Pipeline(_ScikitCompat, PushToHubMixin):
|
|
|
950
950
|
pipe_information["output_modalities"] = self.model.output_modalities
|
|
951
951
|
return f"{self.__class__.__name__}: {pipe_information}"
|
|
952
952
|
|
|
953
|
-
def save_pretrained(
|
|
954
|
-
self,
|
|
955
|
-
save_directory: str | os.PathLike,
|
|
956
|
-
safe_serialization: bool = True,
|
|
957
|
-
**kwargs: Any,
|
|
958
|
-
):
|
|
953
|
+
def save_pretrained(self, save_directory: str | os.PathLike, **kwargs: Any):
|
|
959
954
|
"""
|
|
960
955
|
Save the pipeline's model and tokenizer.
|
|
961
956
|
|
|
962
957
|
Args:
|
|
963
958
|
save_directory (`str` or `os.PathLike`):
|
|
964
959
|
A path to the directory where to saved. It will be created if it doesn't exist.
|
|
965
|
-
safe_serialization (`str`):
|
|
966
|
-
Whether to save the model using `safetensors` or PyTorch serialization.
|
|
967
960
|
kwargs (`dict[str, Any]`, *optional*):
|
|
968
961
|
Additional key word arguments passed along to the [`~utils.PushToHubMixin.push_to_hub`] method.
|
|
969
962
|
"""
|
|
@@ -992,7 +985,6 @@ class Pipeline(_ScikitCompat, PushToHubMixin):
|
|
|
992
985
|
# Save the pipeline custom code
|
|
993
986
|
custom_object_save(self, save_directory)
|
|
994
987
|
|
|
995
|
-
kwargs["safe_serialization"] = safe_serialization
|
|
996
988
|
self.model.save_pretrained(save_directory, **kwargs)
|
|
997
989
|
|
|
998
990
|
if self.tokenizer is not None:
|
|
@@ -146,7 +146,9 @@ class DocumentQuestionAnsweringPipeline(ChunkPipeline):
|
|
|
146
146
|
|
|
147
147
|
def __init__(self, *args, **kwargs):
|
|
148
148
|
super().__init__(*args, **kwargs)
|
|
149
|
-
if self.tokenizer is not None and not
|
|
149
|
+
if self.tokenizer is not None and not (
|
|
150
|
+
self.tokenizer.__class__.__name__.endswith("Fast") or self.tokenizer.backend == "tokenizers"
|
|
151
|
+
):
|
|
150
152
|
raise ValueError(
|
|
151
153
|
"`DocumentQuestionAnsweringPipeline` requires a fast tokenizer, but a slow tokenizer "
|
|
152
154
|
f"(`{self.tokenizer.__class__.__name__}`) is provided."
|
|
@@ -486,7 +486,7 @@ class TextGenerationPipeline(Pipeline):
|
|
|
486
486
|
]
|
|
487
487
|
else:
|
|
488
488
|
# When we're not starting from a prefill, the output is a new assistant message
|
|
489
|
-
if self.tokenizer
|
|
489
|
+
if getattr(self.tokenizer, "response_schema", False):
|
|
490
490
|
assistant_message = self.tokenizer.parse_response(all_text)
|
|
491
491
|
else:
|
|
492
492
|
# If there's no schema, then we have to assume it's all content
|
transformers/processing_utils.py
CHANGED
|
@@ -28,7 +28,7 @@ from typing import Annotated, Any, Literal, Optional, TypedDict, TypeVar, Union
|
|
|
28
28
|
|
|
29
29
|
import numpy as np
|
|
30
30
|
import typing_extensions
|
|
31
|
-
from huggingface_hub import create_repo
|
|
31
|
+
from huggingface_hub import create_repo, is_offline_mode
|
|
32
32
|
from huggingface_hub.dataclasses import validate_typed_dict
|
|
33
33
|
from huggingface_hub.errors import EntryNotFoundError
|
|
34
34
|
|
|
@@ -54,7 +54,6 @@ from .utils import (
|
|
|
54
54
|
cached_file,
|
|
55
55
|
copy_func,
|
|
56
56
|
direct_transformers_import,
|
|
57
|
-
is_offline_mode,
|
|
58
57
|
is_torch_available,
|
|
59
58
|
list_repo_templates,
|
|
60
59
|
logging,
|
|
@@ -696,14 +695,10 @@ class ProcessorMixin(PushToHubMixin):
|
|
|
696
695
|
# extra attributes to be kept
|
|
697
696
|
attrs_to_save += ["auto_map"]
|
|
698
697
|
|
|
699
|
-
|
|
700
|
-
|
|
701
|
-
|
|
702
|
-
|
|
703
|
-
if "protein_tokenizer" in output:
|
|
704
|
-
del output["protein_tokenizer"]
|
|
705
|
-
if "char_tokenizer" in output:
|
|
706
|
-
del output["char_tokenizer"]
|
|
698
|
+
for attribute in self.__class__.get_attributes():
|
|
699
|
+
if "tokenizer" in attribute and attribute in output:
|
|
700
|
+
del output[attribute]
|
|
701
|
+
|
|
707
702
|
if "chat_template" in output:
|
|
708
703
|
del output["chat_template"]
|
|
709
704
|
|
|
@@ -1465,7 +1460,24 @@ class ProcessorMixin(PushToHubMixin):
|
|
|
1465
1460
|
# get args from processor init signature
|
|
1466
1461
|
sub_processors = cls.get_attributes()
|
|
1467
1462
|
for sub_processor_type in sub_processors:
|
|
1468
|
-
if
|
|
1463
|
+
if "FuyuProcessor" in cls.__name__ and "tokenizer" in sub_processor_type:
|
|
1464
|
+
from .tokenization_utils_tokenizers import TokenizersBackend
|
|
1465
|
+
|
|
1466
|
+
tokenizer = TokenizersBackend.from_pretrained(pretrained_model_name_or_path, **kwargs)
|
|
1467
|
+
if "token_type_ids" in tokenizer.model_input_names:
|
|
1468
|
+
tokenizer.model_input_names.remove("token_type_ids")
|
|
1469
|
+
args.append(tokenizer)
|
|
1470
|
+
elif "PixtralProcessor" in cls.__name__ and "tokenizer" in sub_processor_type:
|
|
1471
|
+
from tokenizers import pre_tokenizers
|
|
1472
|
+
|
|
1473
|
+
from .models.llama import LlamaTokenizer
|
|
1474
|
+
|
|
1475
|
+
tokenizer = LlamaTokenizer.from_pretrained(pretrained_model_name_or_path, **kwargs)
|
|
1476
|
+
tokenizer._tokenizer.pre_tokenizer = pre_tokenizers.Sequence(
|
|
1477
|
+
[pre_tokenizers.ByteLevel(False), tokenizer._tokenizer.pre_tokenizer]
|
|
1478
|
+
)
|
|
1479
|
+
args.append(tokenizer)
|
|
1480
|
+
elif sub_processor_type in MODALITY_TO_AUTOPROCESSOR_MAPPING:
|
|
1469
1481
|
auto_processor_class = MODALITY_TO_AUTOPROCESSOR_MAPPING[sub_processor_type]
|
|
1470
1482
|
sub_processor = auto_processor_class.from_pretrained(pretrained_model_name_or_path, **kwargs)
|
|
1471
1483
|
args.append(sub_processor)
|