transformers 5.0.0rc0__py3-none-any.whl → 5.0.0rc1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- transformers/__init__.py +30 -3
- transformers/cli/serve.py +47 -17
- transformers/conversion_mapping.py +15 -2
- transformers/convert_slow_tokenizer.py +225 -10
- transformers/core_model_loading.py +196 -135
- transformers/data/data_collator.py +12 -4
- transformers/dependency_versions_table.py +1 -2
- transformers/dynamic_module_utils.py +1 -2
- transformers/feature_extraction_utils.py +1 -2
- transformers/file_utils.py +0 -1
- transformers/generation/__init__.py +11 -1
- transformers/generation/configuration_utils.py +3 -2
- transformers/generation/continuous_batching/__init__.py +4 -0
- transformers/generation/continuous_batching/continuous_api.py +134 -79
- transformers/image_processing_base.py +1 -2
- transformers/integrations/__init__.py +4 -2
- transformers/integrations/accelerate.py +15 -3
- transformers/integrations/aqlm.py +38 -66
- transformers/integrations/awq.py +48 -514
- transformers/integrations/bitnet.py +45 -100
- transformers/integrations/bitsandbytes.py +79 -191
- transformers/integrations/deepspeed.py +1 -0
- transformers/integrations/eetq.py +84 -79
- transformers/integrations/fbgemm_fp8.py +191 -145
- transformers/integrations/finegrained_fp8.py +236 -193
- transformers/integrations/fp_quant.py +92 -0
- transformers/integrations/ggml.py +11 -1
- transformers/integrations/higgs.py +40 -62
- transformers/integrations/hub_kernels.py +42 -3
- transformers/integrations/integration_utils.py +10 -0
- transformers/integrations/mxfp4.py +25 -65
- transformers/integrations/peft.py +7 -29
- transformers/integrations/quanto.py +73 -55
- transformers/integrations/quark.py +55 -0
- transformers/integrations/spqr.py +44 -90
- transformers/integrations/torchao.py +32 -38
- transformers/integrations/vptq.py +42 -59
- transformers/modelcard.py +1 -2
- transformers/modeling_gguf_pytorch_utils.py +8 -0
- transformers/modeling_rope_utils.py +30 -6
- transformers/modeling_utils.py +116 -112
- transformers/models/__init__.py +3 -0
- transformers/models/afmoe/modeling_afmoe.py +4 -4
- transformers/models/albert/tokenization_albert.py +6 -12
- transformers/models/align/modeling_align.py +2 -0
- transformers/models/altclip/modeling_altclip.py +4 -0
- transformers/models/apertus/modeling_apertus.py +4 -4
- transformers/models/arcee/modeling_arcee.py +4 -4
- transformers/models/aria/modeling_aria.py +4 -4
- transformers/models/audioflamingo3/modeling_audioflamingo3.py +1 -0
- transformers/models/audioflamingo3/modular_audioflamingo3.py +1 -0
- transformers/models/auto/configuration_auto.py +11 -0
- transformers/models/auto/feature_extraction_auto.py +2 -0
- transformers/models/auto/image_processing_auto.py +1 -0
- transformers/models/auto/modeling_auto.py +6 -0
- transformers/models/auto/processing_auto.py +18 -10
- transformers/models/auto/tokenization_auto.py +74 -472
- transformers/models/autoformer/modeling_autoformer.py +4 -0
- transformers/models/bamba/modeling_bamba.py +4 -3
- transformers/models/bark/modeling_bark.py +2 -0
- transformers/models/bart/modeling_bart.py +7 -0
- transformers/models/barthez/tokenization_barthez.py +5 -10
- transformers/models/beit/modeling_beit.py +6 -1
- transformers/models/bert/tokenization_bert.py +8 -21
- transformers/models/big_bird/modeling_big_bird.py +6 -0
- transformers/models/big_bird/tokenization_big_bird.py +18 -42
- transformers/models/bigbird_pegasus/modeling_bigbird_pegasus.py +8 -2
- transformers/models/biogpt/modeling_biogpt.py +2 -0
- transformers/models/biogpt/modular_biogpt.py +2 -0
- transformers/models/bit/modeling_bit.py +11 -2
- transformers/models/bitnet/modeling_bitnet.py +4 -4
- transformers/models/blenderbot/modeling_blenderbot.py +5 -0
- transformers/models/blenderbot/tokenization_blenderbot.py +12 -16
- transformers/models/blenderbot_small/modeling_blenderbot_small.py +5 -0
- transformers/models/blip/modeling_blip_text.py +2 -0
- transformers/models/blip_2/modeling_blip_2.py +2 -1
- transformers/models/bloom/modeling_bloom.py +4 -0
- transformers/models/blt/modeling_blt.py +2 -2
- transformers/models/blt/modular_blt.py +2 -2
- transformers/models/bridgetower/modeling_bridgetower.py +5 -1
- transformers/models/bros/modeling_bros.py +4 -0
- transformers/models/camembert/tokenization_camembert.py +8 -12
- transformers/models/canine/modeling_canine.py +5 -0
- transformers/models/chameleon/modeling_chameleon.py +2 -1
- transformers/models/chinese_clip/modeling_chinese_clip.py +3 -0
- transformers/models/clap/modeling_clap.py +5 -0
- transformers/models/clip/tokenization_clip.py +22 -44
- transformers/models/clipseg/modeling_clipseg.py +5 -0
- transformers/models/clvp/modeling_clvp.py +5 -0
- transformers/models/clvp/tokenization_clvp.py +1 -63
- transformers/models/code_llama/tokenization_code_llama.py +20 -43
- transformers/models/codegen/tokenization_codegen.py +14 -43
- transformers/models/cohere/modeling_cohere.py +4 -3
- transformers/models/cohere/modular_cohere.py +2 -1
- transformers/models/cohere/tokenization_cohere.py +12 -42
- transformers/models/cohere2/modeling_cohere2.py +7 -6
- transformers/models/cohere2/modular_cohere2.py +5 -5
- transformers/models/cohere2_vision/image_processing_cohere2_vision_fast.py +4 -3
- transformers/models/cohere2_vision/modular_cohere2_vision.py +4 -3
- transformers/models/colqwen2/modeling_colqwen2.py +1 -0
- transformers/models/colqwen2/modular_colqwen2.py +1 -0
- transformers/models/conditional_detr/modeling_conditional_detr.py +5 -0
- transformers/models/convbert/modeling_convbert.py +6 -0
- transformers/models/convnext/modeling_convnext.py +2 -4
- transformers/models/convnextv2/modeling_convnextv2.py +2 -4
- transformers/models/csm/modeling_csm.py +4 -3
- transformers/models/ctrl/modeling_ctrl.py +1 -0
- transformers/models/cvt/modeling_cvt.py +2 -0
- transformers/models/cwm/modeling_cwm.py +4 -4
- transformers/models/d_fine/modeling_d_fine.py +2 -0
- transformers/models/d_fine/modular_d_fine.py +1 -0
- transformers/models/dab_detr/modeling_dab_detr.py +4 -0
- transformers/models/dac/modeling_dac.py +2 -2
- transformers/models/data2vec/modeling_data2vec_audio.py +5 -0
- transformers/models/data2vec/modeling_data2vec_vision.py +4 -1
- transformers/models/dbrx/modeling_dbrx.py +2 -2
- transformers/models/deberta/modeling_deberta.py +5 -0
- transformers/models/deberta/tokenization_deberta.py +11 -20
- transformers/models/deberta_v2/modeling_deberta_v2.py +6 -0
- transformers/models/deberta_v2/tokenization_deberta_v2.py +13 -28
- transformers/models/decision_transformer/modeling_decision_transformer.py +4 -1
- transformers/models/deepseek_v2/modeling_deepseek_v2.py +2 -3
- transformers/models/deepseek_v2/modular_deepseek_v2.py +2 -2
- transformers/models/deepseek_v3/modeling_deepseek_v3.py +3 -2
- transformers/models/deepseek_v3/modular_deepseek_v3.py +1 -0
- transformers/models/deformable_detr/modeling_deformable_detr.py +4 -0
- transformers/models/depth_anything/modeling_depth_anything.py +1 -0
- transformers/models/depth_pro/modeling_depth_pro.py +2 -0
- transformers/models/detr/modeling_detr.py +5 -0
- transformers/models/dia/modeling_dia.py +4 -3
- transformers/models/dia/modular_dia.py +0 -1
- transformers/models/diffllama/modeling_diffllama.py +2 -2
- transformers/models/dinat/modeling_dinat.py +3 -0
- transformers/models/dinov3_convnext/modeling_dinov3_convnext.py +1 -1
- transformers/models/dinov3_vit/modeling_dinov3_vit.py +2 -2
- transformers/models/dinov3_vit/modular_dinov3_vit.py +2 -2
- transformers/models/distilbert/tokenization_distilbert.py +13 -0
- transformers/models/doge/modeling_doge.py +2 -3
- transformers/models/doge/modular_doge.py +0 -1
- transformers/models/donut/modeling_donut_swin.py +2 -0
- transformers/models/dots1/modeling_dots1.py +10 -7
- transformers/models/dots1/modular_dots1.py +5 -3
- transformers/models/dpr/modeling_dpr.py +5 -0
- transformers/models/dpr/tokenization_dpr.py +12 -0
- transformers/models/edgetam/modeling_edgetam.py +1 -1
- transformers/models/edgetam_video/modeling_edgetam_video.py +1 -0
- transformers/models/edgetam_video/modular_edgetam_video.py +1 -0
- transformers/models/efficientloftr/modeling_efficientloftr.py +2 -2
- transformers/models/efficientnet/modeling_efficientnet.py +2 -0
- transformers/models/emu3/modeling_emu3.py +4 -4
- transformers/models/eomt/image_processing_eomt.py +13 -1
- transformers/models/eomt/image_processing_eomt_fast.py +14 -2
- transformers/models/ernie4_5/modeling_ernie4_5.py +4 -4
- transformers/models/ernie4_5/modular_ernie4_5.py +2 -1
- transformers/models/ernie4_5_moe/modeling_ernie4_5_moe.py +5 -5
- transformers/models/ernie4_5_moe/modular_ernie4_5_moe.py +2 -2
- transformers/models/esm/modeling_esmfold.py +5 -4
- transformers/models/evolla/modeling_evolla.py +4 -4
- transformers/models/exaone4/modeling_exaone4.py +2 -2
- transformers/models/exaone4/modular_exaone4.py +0 -1
- transformers/models/falcon/modeling_falcon.py +6 -1
- transformers/models/falcon_h1/modeling_falcon_h1.py +4 -3
- transformers/models/falcon_mamba/modeling_falcon_mamba.py +25 -35
- transformers/models/falcon_mamba/modular_falcon_mamba.py +12 -31
- transformers/{kernels/falcon_mamba → models/fast_vlm}/__init__.py +15 -3
- transformers/models/fast_vlm/configuration_fast_vlm.py +137 -0
- transformers/models/fast_vlm/modeling_fast_vlm.py +455 -0
- transformers/models/fast_vlm/modular_fast_vlm.py +273 -0
- transformers/models/fastspeech2_conformer/modeling_fastspeech2_conformer.py +8 -3
- transformers/models/flaubert/modeling_flaubert.py +7 -0
- transformers/models/flava/modeling_flava.py +6 -1
- transformers/models/flex_olmo/modeling_flex_olmo.py +4 -5
- transformers/models/florence2/modeling_florence2.py +2 -1
- transformers/models/florence2/modular_florence2.py +2 -1
- transformers/models/fnet/modeling_fnet.py +7 -0
- transformers/models/focalnet/modeling_focalnet.py +4 -0
- transformers/models/fsmt/modeling_fsmt.py +2 -0
- transformers/models/funnel/modeling_funnel.py +8 -0
- transformers/models/funnel/tokenization_funnel.py +17 -24
- transformers/models/fuyu/processing_fuyu.py +3 -3
- transformers/models/gemma/modeling_gemma.py +4 -4
- transformers/models/gemma/tokenization_gemma.py +10 -27
- transformers/models/gemma2/modeling_gemma2.py +4 -4
- transformers/models/gemma2/modular_gemma2.py +2 -1
- transformers/models/gemma3/modeling_gemma3.py +14 -84
- transformers/models/gemma3/modular_gemma3.py +12 -81
- transformers/models/gemma3n/modeling_gemma3n.py +18 -209
- transformers/models/gemma3n/modular_gemma3n.py +17 -59
- transformers/models/git/modeling_git.py +2 -0
- transformers/models/glm/modeling_glm.py +4 -4
- transformers/models/glm4/modeling_glm4.py +4 -4
- transformers/models/glm4_moe/modeling_glm4_moe.py +5 -3
- transformers/models/glm4v/configuration_glm4v.py +3 -1
- transformers/models/glm4v/modeling_glm4v.py +3 -3
- transformers/models/glm4v/modular_glm4v.py +6 -4
- transformers/models/glm4v_moe/configuration_glm4v_moe.py +3 -1
- transformers/models/glm4v_moe/modeling_glm4v_moe.py +6 -5
- transformers/models/glm4v_moe/modular_glm4v_moe.py +1 -1
- transformers/models/glpn/modeling_glpn.py +2 -0
- transformers/models/gpt2/modeling_gpt2.py +5 -1
- transformers/models/gpt2/tokenization_gpt2.py +16 -44
- transformers/models/gpt_bigcode/modeling_gpt_bigcode.py +1 -0
- transformers/models/gpt_neo/modeling_gpt_neo.py +4 -0
- transformers/models/gpt_neox/modeling_gpt_neox.py +5 -2
- transformers/models/gpt_neox/modular_gpt_neox.py +3 -0
- transformers/models/gpt_neox/tokenization_gpt_neox.py +10 -49
- transformers/models/gpt_neox_japanese/modeling_gpt_neox_japanese.py +3 -1
- transformers/models/gpt_oss/modeling_gpt_oss.py +5 -6
- transformers/models/gpt_oss/modular_gpt_oss.py +3 -5
- transformers/models/gptj/modeling_gptj.py +3 -0
- transformers/models/granite/modeling_granite.py +4 -4
- transformers/models/granitemoe/modeling_granitemoe.py +4 -6
- transformers/models/granitemoe/modular_granitemoe.py +0 -2
- transformers/models/granitemoehybrid/modeling_granitemoehybrid.py +4 -6
- transformers/models/granitemoeshared/modeling_granitemoeshared.py +4 -6
- transformers/models/grounding_dino/modeling_grounding_dino.py +4 -0
- transformers/models/groupvit/modeling_groupvit.py +3 -0
- transformers/models/helium/modeling_helium.py +4 -3
- transformers/models/herbert/tokenization_herbert.py +9 -25
- transformers/models/hgnet_v2/modeling_hgnet_v2.py +6 -1
- transformers/models/hgnet_v2/modular_hgnet_v2.py +6 -1
- transformers/models/hiera/modeling_hiera.py +4 -0
- transformers/models/hubert/modeling_hubert.py +3 -0
- transformers/models/hubert/modular_hubert.py +1 -0
- transformers/models/hunyuan_v1_dense/modeling_hunyuan_v1_dense.py +4 -4
- transformers/models/hunyuan_v1_moe/modeling_hunyuan_v1_moe.py +4 -4
- transformers/models/ibert/modeling_ibert.py +6 -0
- transformers/models/idefics/modeling_idefics.py +5 -21
- transformers/models/imagegpt/modeling_imagegpt.py +2 -1
- transformers/models/informer/modeling_informer.py +4 -0
- transformers/models/informer/modular_informer.py +1 -0
- transformers/models/internvl/modeling_internvl.py +2 -4
- transformers/models/internvl/modular_internvl.py +2 -4
- transformers/models/jamba/modeling_jamba.py +2 -2
- transformers/models/janus/modeling_janus.py +1 -0
- transformers/models/janus/modular_janus.py +1 -0
- transformers/models/jetmoe/modeling_jetmoe.py +2 -2
- transformers/models/kosmos2/modeling_kosmos2.py +1 -0
- transformers/models/kyutai_speech_to_text/modeling_kyutai_speech_to_text.py +3 -1
- transformers/models/lasr/__init__.py +29 -0
- transformers/models/lasr/configuration_lasr.py +244 -0
- transformers/models/lasr/feature_extraction_lasr.py +277 -0
- transformers/models/lasr/modeling_lasr.py +729 -0
- transformers/models/lasr/modular_lasr.py +569 -0
- transformers/models/lasr/processing_lasr.py +96 -0
- transformers/models/lasr/tokenization_lasr.py +186 -0
- transformers/models/layoutlm/modeling_layoutlm.py +5 -0
- transformers/models/layoutlmv2/modeling_layoutlmv2.py +4 -0
- transformers/models/layoutlmv2/tokenization_layoutlmv2.py +10 -53
- transformers/models/layoutlmv3/modeling_layoutlmv3.py +4 -0
- transformers/models/layoutlmv3/tokenization_layoutlmv3.py +12 -61
- transformers/models/layoutxlm/tokenization_layoutxlm.py +13 -38
- transformers/models/led/modeling_led.py +6 -0
- transformers/models/levit/modeling_levit.py +3 -0
- transformers/models/lfm2/modeling_lfm2.py +4 -5
- transformers/models/lfm2/modular_lfm2.py +0 -1
- transformers/models/lfm2_moe/modeling_lfm2_moe.py +4 -5
- transformers/models/lightglue/modeling_lightglue.py +3 -1
- transformers/models/lightglue/modular_lightglue.py +1 -0
- transformers/models/lilt/modeling_lilt.py +4 -0
- transformers/models/llama/modeling_llama.py +4 -4
- transformers/models/llama/tokenization_llama.py +15 -43
- transformers/models/llama4/modeling_llama4.py +3 -2
- transformers/models/longcat_flash/modeling_longcat_flash.py +4 -4
- transformers/models/longcat_flash/modular_longcat_flash.py +2 -2
- transformers/models/longformer/modeling_longformer.py +6 -0
- transformers/models/longt5/modeling_longt5.py +4 -0
- transformers/models/luke/modeling_luke.py +9 -0
- transformers/models/luke/tokenization_luke.py +11 -38
- transformers/models/lxmert/modeling_lxmert.py +2 -0
- transformers/models/m2m_100/modeling_m2m_100.py +4 -0
- transformers/models/mamba/modeling_mamba.py +14 -22
- transformers/models/marian/modeling_marian.py +5 -0
- transformers/models/markuplm/modeling_markuplm.py +4 -0
- transformers/models/markuplm/tokenization_markuplm.py +28 -61
- transformers/models/mask2former/modeling_mask2former.py +2 -0
- transformers/models/maskformer/modeling_maskformer.py +2 -0
- transformers/models/maskformer/modeling_maskformer_swin.py +2 -0
- transformers/models/mbart/modeling_mbart.py +7 -0
- transformers/models/mbart/tokenization_mbart.py +11 -52
- transformers/models/mbart50/tokenization_mbart50.py +7 -10
- transformers/models/megatron_bert/modeling_megatron_bert.py +7 -0
- transformers/models/mgp_str/modeling_mgp_str.py +2 -0
- transformers/models/mimi/modeling_mimi.py +3 -1
- transformers/models/minimax/modeling_minimax.py +4 -4
- transformers/models/ministral/modeling_ministral.py +4 -4
- transformers/models/ministral3/configuration_ministral3.py +1 -1
- transformers/models/ministral3/modeling_ministral3.py +4 -3
- transformers/models/mistral/modeling_mistral.py +4 -3
- transformers/models/mixtral/modeling_mixtral.py +4 -4
- transformers/models/mllama/modeling_mllama.py +2 -2
- transformers/models/mluke/tokenization_mluke.py +6 -6
- transformers/models/mm_grounding_dino/modeling_mm_grounding_dino.py +4 -0
- transformers/models/mobilenet_v1/modeling_mobilenet_v1.py +2 -0
- transformers/models/mobilenet_v2/modeling_mobilenet_v2.py +3 -0
- transformers/models/mobilevit/modeling_mobilevit.py +3 -0
- transformers/models/mobilevitv2/modeling_mobilevitv2.py +3 -0
- transformers/models/modernbert/modeling_modernbert.py +4 -1
- transformers/models/modernbert/modular_modernbert.py +2 -0
- transformers/models/modernbert_decoder/modeling_modernbert_decoder.py +8 -9
- transformers/models/modernbert_decoder/modular_modernbert_decoder.py +6 -7
- transformers/models/moonshine/modeling_moonshine.py +4 -2
- transformers/models/moshi/modeling_moshi.py +5 -2
- transformers/models/mpnet/modeling_mpnet.py +5 -0
- transformers/models/mpnet/tokenization_mpnet.py +5 -13
- transformers/models/mpt/modeling_mpt.py +2 -0
- transformers/models/mra/modeling_mra.py +6 -0
- transformers/models/mt5/modeling_mt5.py +7 -0
- transformers/models/musicgen/modeling_musicgen.py +2 -0
- transformers/models/musicgen_melody/modeling_musicgen_melody.py +3 -0
- transformers/models/mvp/modeling_mvp.py +7 -0
- transformers/models/nanochat/modeling_nanochat.py +4 -4
- transformers/models/nemotron/modeling_nemotron.py +4 -2
- transformers/models/nllb/tokenization_nllb.py +8 -22
- transformers/models/nougat/tokenization_nougat.py +11 -59
- transformers/models/nystromformer/modeling_nystromformer.py +6 -0
- transformers/models/olmo/modeling_olmo.py +4 -4
- transformers/models/olmo/modular_olmo.py +2 -2
- transformers/models/olmo2/modeling_olmo2.py +4 -5
- transformers/models/olmo2/modular_olmo2.py +0 -1
- transformers/models/olmo3/modeling_olmo3.py +4 -4
- transformers/models/olmoe/modeling_olmoe.py +4 -4
- transformers/models/omdet_turbo/modeling_omdet_turbo.py +2 -0
- transformers/models/oneformer/modeling_oneformer.py +4 -1
- transformers/models/openai/modeling_openai.py +3 -0
- transformers/models/openai/tokenization_openai.py +10 -46
- transformers/models/opt/modeling_opt.py +2 -0
- transformers/models/owlv2/modeling_owlv2.py +4 -0
- transformers/models/owlvit/modeling_owlvit.py +4 -0
- transformers/models/paddleocr_vl/__init__.py +32 -0
- transformers/models/paddleocr_vl/configuration_paddleocr_vl.py +336 -0
- transformers/models/paddleocr_vl/image_processing_paddleocr_vl.py +503 -0
- transformers/models/paddleocr_vl/image_processing_paddleocr_vl_fast.py +209 -0
- transformers/models/paddleocr_vl/modeling_paddleocr_vl.py +1668 -0
- transformers/models/paddleocr_vl/modular_paddleocr_vl.py +1349 -0
- transformers/models/paddleocr_vl/processing_paddleocr_vl.py +135 -0
- transformers/models/parakeet/configuration_parakeet.py +4 -6
- transformers/models/parakeet/modeling_parakeet.py +9 -6
- transformers/models/parakeet/modular_parakeet.py +2 -2
- transformers/models/parakeet/processing_parakeet.py +1 -0
- transformers/models/patchtsmixer/modeling_patchtsmixer.py +6 -0
- transformers/models/patchtst/modeling_patchtst.py +20 -2
- transformers/models/pegasus/modeling_pegasus.py +5 -0
- transformers/models/pegasus/tokenization_pegasus.py +17 -44
- transformers/models/pegasus_x/modeling_pegasus_x.py +4 -0
- transformers/models/perceiver/modeling_perceiver.py +8 -0
- transformers/models/persimmon/modeling_persimmon.py +2 -1
- transformers/models/phi/modeling_phi.py +4 -5
- transformers/models/phi/modular_phi.py +0 -1
- transformers/models/phi3/modeling_phi3.py +2 -1
- transformers/models/phi4_multimodal/modeling_phi4_multimodal.py +5 -5
- transformers/models/phi4_multimodal/modular_phi4_multimodal.py +4 -4
- transformers/models/phimoe/modeling_phimoe.py +4 -4
- transformers/models/phimoe/modular_phimoe.py +2 -2
- transformers/models/pix2struct/modeling_pix2struct.py +2 -0
- transformers/models/pixtral/modeling_pixtral.py +2 -1
- transformers/models/plbart/modeling_plbart.py +6 -0
- transformers/models/plbart/modular_plbart.py +2 -0
- transformers/models/plbart/tokenization_plbart.py +0 -2
- transformers/models/poolformer/modeling_poolformer.py +2 -0
- transformers/models/pop2piano/modeling_pop2piano.py +2 -0
- transformers/models/prompt_depth_anything/modeling_prompt_depth_anything.py +1 -0
- transformers/models/prompt_depth_anything/modular_prompt_depth_anything.py +1 -0
- transformers/models/prophetnet/modeling_prophetnet.py +3 -0
- transformers/models/pvt/modeling_pvt.py +2 -0
- transformers/models/pvt_v2/modeling_pvt_v2.py +3 -0
- transformers/models/qwen2/modeling_qwen2.py +4 -4
- transformers/models/qwen2/tokenization_qwen2.py +14 -18
- transformers/models/qwen2_5_omni/configuration_qwen2_5_omni.py +4 -2
- transformers/models/qwen2_5_omni/modeling_qwen2_5_omni.py +13 -16
- transformers/models/qwen2_5_omni/modular_qwen2_5_omni.py +14 -16
- transformers/models/qwen2_5_vl/configuration_qwen2_5_vl.py +1 -1
- transformers/models/qwen2_5_vl/modeling_qwen2_5_vl.py +5 -6
- transformers/models/qwen2_5_vl/modular_qwen2_5_vl.py +3 -5
- transformers/models/qwen2_audio/modeling_qwen2_audio.py +2 -0
- transformers/models/qwen2_moe/modeling_qwen2_moe.py +4 -4
- transformers/models/qwen2_vl/configuration_qwen2_vl.py +1 -1
- transformers/models/qwen2_vl/modeling_qwen2_vl.py +6 -16
- transformers/models/qwen3/modeling_qwen3.py +4 -4
- transformers/models/qwen3_moe/modeling_qwen3_moe.py +4 -4
- transformers/models/qwen3_next/modeling_qwen3_next.py +4 -3
- transformers/models/qwen3_omni_moe/modeling_qwen3_omni_moe.py +21 -23
- transformers/models/qwen3_omni_moe/modular_qwen3_omni_moe.py +14 -16
- transformers/models/qwen3_vl/modeling_qwen3_vl.py +39 -37
- transformers/models/qwen3_vl/modular_qwen3_vl.py +37 -35
- transformers/models/qwen3_vl_moe/modeling_qwen3_vl_moe.py +39 -37
- transformers/models/qwen3_vl_moe/modular_qwen3_vl_moe.py +4 -1
- transformers/models/rag/modeling_rag.py +1 -0
- transformers/models/recurrent_gemma/modeling_recurrent_gemma.py +15 -1
- transformers/models/reformer/modeling_reformer.py +4 -0
- transformers/models/reformer/tokenization_reformer.py +11 -28
- transformers/models/regnet/modeling_regnet.py +6 -1
- transformers/models/rembert/modeling_rembert.py +6 -0
- transformers/models/rembert/tokenization_rembert.py +3 -10
- transformers/models/resnet/modeling_resnet.py +11 -2
- transformers/models/roberta/tokenization_roberta.py +18 -27
- transformers/models/roformer/modeling_roformer.py +6 -0
- transformers/models/roformer/tokenization_roformer.py +77 -412
- transformers/models/rt_detr/modeling_rt_detr.py +2 -0
- transformers/models/rt_detr/modeling_rt_detr_resnet.py +5 -1
- transformers/models/rt_detr_v2/modeling_rt_detr_v2.py +2 -0
- transformers/models/rwkv/modeling_rwkv.py +1 -0
- transformers/models/sam2/modeling_sam2.py +2 -2
- transformers/models/sam2/modular_sam2.py +2 -2
- transformers/models/sam2_video/modeling_sam2_video.py +1 -0
- transformers/models/sam2_video/modular_sam2_video.py +1 -0
- transformers/models/sam3/modeling_sam3.py +77 -80
- transformers/models/sam3_tracker/modeling_sam3_tracker.py +6 -1
- transformers/models/sam3_tracker/modular_sam3_tracker.py +6 -1
- transformers/models/sam3_tracker_video/modeling_sam3_tracker_video.py +1 -0
- transformers/models/sam3_video/modeling_sam3_video.py +1 -0
- transformers/models/seamless_m4t/modeling_seamless_m4t.py +5 -1
- transformers/models/seamless_m4t/tokenization_seamless_m4t.py +27 -59
- transformers/models/seamless_m4t_v2/modeling_seamless_m4t_v2.py +5 -1
- transformers/models/seed_oss/modeling_seed_oss.py +2 -2
- transformers/models/segformer/modeling_segformer.py +4 -1
- transformers/models/seggpt/modeling_seggpt.py +2 -0
- transformers/models/sew/modeling_sew.py +3 -0
- transformers/models/sew/modular_sew.py +1 -0
- transformers/models/sew_d/modeling_sew_d.py +3 -0
- transformers/models/siglip2/modeling_siglip2.py +4 -0
- transformers/models/siglip2/modular_siglip2.py +4 -0
- transformers/models/smollm3/modeling_smollm3.py +4 -4
- transformers/models/smolvlm/processing_smolvlm.py +0 -7
- transformers/models/speech_to_text/modeling_speech_to_text.py +4 -0
- transformers/models/speecht5/modeling_speecht5.py +13 -1
- transformers/models/splinter/modeling_splinter.py +3 -0
- transformers/models/splinter/tokenization_splinter.py +9 -28
- transformers/models/squeezebert/modeling_squeezebert.py +6 -0
- transformers/models/stablelm/modeling_stablelm.py +3 -1
- transformers/models/starcoder2/modeling_starcoder2.py +4 -3
- transformers/models/superglue/modeling_superglue.py +1 -0
- transformers/models/superpoint/modeling_superpoint.py +1 -0
- transformers/models/swiftformer/modeling_swiftformer.py +2 -0
- transformers/models/swin/modeling_swin.py +4 -0
- transformers/models/swin2sr/modeling_swin2sr.py +2 -0
- transformers/models/swinv2/modeling_swinv2.py +4 -0
- transformers/models/t5/modeling_t5.py +7 -0
- transformers/models/t5/tokenization_t5.py +4 -8
- transformers/models/t5gemma/modeling_t5gemma.py +5 -5
- transformers/models/t5gemma2/modeling_t5gemma2.py +6 -6
- transformers/models/table_transformer/modeling_table_transformer.py +4 -0
- transformers/models/tapas/modeling_tapas.py +3 -0
- transformers/models/textnet/modeling_textnet.py +11 -2
- transformers/models/time_series_transformer/modeling_time_series_transformer.py +4 -0
- transformers/models/timesfm/modeling_timesfm.py +2 -0
- transformers/models/timesfm/modular_timesfm.py +2 -0
- transformers/models/timesformer/modeling_timesformer.py +2 -0
- transformers/models/timm_wrapper/modeling_timm_wrapper.py +1 -1
- transformers/models/trocr/modeling_trocr.py +2 -0
- transformers/models/tvp/modeling_tvp.py +2 -0
- transformers/models/udop/modeling_udop.py +4 -0
- transformers/models/udop/tokenization_udop.py +5 -13
- transformers/models/umt5/modeling_umt5.py +7 -0
- transformers/models/unispeech/modeling_unispeech.py +4 -0
- transformers/models/unispeech/modular_unispeech.py +2 -0
- transformers/models/unispeech_sat/modeling_unispeech_sat.py +6 -0
- transformers/models/unispeech_sat/modular_unispeech_sat.py +2 -0
- transformers/models/univnet/modeling_univnet.py +1 -0
- transformers/models/upernet/modeling_upernet.py +1 -0
- transformers/models/vaultgemma/modeling_vaultgemma.py +4 -4
- transformers/models/vilt/modeling_vilt.py +6 -0
- transformers/models/vision_text_dual_encoder/modeling_vision_text_dual_encoder.py +1 -0
- transformers/models/visual_bert/modeling_visual_bert.py +6 -0
- transformers/models/vitdet/modeling_vitdet.py +2 -0
- transformers/models/vitmatte/modeling_vitmatte.py +1 -0
- transformers/models/vits/modeling_vits.py +1 -0
- transformers/models/vjepa2/modeling_vjepa2.py +1 -0
- transformers/models/wav2vec2/modeling_wav2vec2.py +7 -0
- transformers/models/wav2vec2_bert/modeling_wav2vec2_bert.py +5 -0
- transformers/models/wav2vec2_bert/modular_wav2vec2_bert.py +5 -0
- transformers/models/wav2vec2_conformer/modeling_wav2vec2_conformer.py +6 -0
- transformers/models/wavlm/modeling_wavlm.py +5 -0
- transformers/models/whisper/modeling_whisper.py +6 -0
- transformers/models/whisper/tokenization_whisper.py +4 -15
- transformers/models/x_clip/modeling_x_clip.py +3 -0
- transformers/models/xglm/modeling_xglm.py +1 -0
- transformers/models/xglm/tokenization_xglm.py +4 -9
- transformers/models/xlm/modeling_xlm.py +5 -0
- transformers/models/xlm_roberta/tokenization_xlm_roberta.py +9 -16
- transformers/models/xlnet/tokenization_xlnet.py +3 -7
- transformers/models/yoso/modeling_yoso.py +6 -0
- transformers/models/zamba/modeling_zamba.py +2 -0
- transformers/models/zamba2/modeling_zamba2.py +4 -2
- transformers/models/zamba2/modular_zamba2.py +1 -1
- transformers/models/zoedepth/modeling_zoedepth.py +1 -0
- transformers/pipelines/__init__.py +2 -3
- transformers/pipelines/base.py +1 -9
- transformers/pipelines/document_question_answering.py +3 -1
- transformers/pipelines/text_generation.py +1 -1
- transformers/processing_utils.py +23 -11
- transformers/quantizers/base.py +35 -110
- transformers/quantizers/quantizer_aqlm.py +1 -5
- transformers/quantizers/quantizer_auto_round.py +1 -2
- transformers/quantizers/quantizer_awq.py +17 -81
- transformers/quantizers/quantizer_bitnet.py +3 -8
- transformers/quantizers/quantizer_bnb_4bit.py +13 -110
- transformers/quantizers/quantizer_bnb_8bit.py +16 -92
- transformers/quantizers/quantizer_compressed_tensors.py +1 -5
- transformers/quantizers/quantizer_eetq.py +14 -62
- transformers/quantizers/quantizer_fbgemm_fp8.py +34 -125
- transformers/quantizers/quantizer_finegrained_fp8.py +13 -105
- transformers/quantizers/quantizer_fp_quant.py +48 -78
- transformers/quantizers/quantizer_gptq.py +7 -24
- transformers/quantizers/quantizer_higgs.py +40 -54
- transformers/quantizers/quantizer_hqq.py +144 -153
- transformers/quantizers/quantizer_mxfp4.py +13 -167
- transformers/quantizers/quantizer_quanto.py +20 -64
- transformers/quantizers/quantizer_quark.py +36 -17
- transformers/quantizers/quantizer_spqr.py +1 -4
- transformers/quantizers/quantizer_torchao.py +23 -202
- transformers/quantizers/quantizer_vptq.py +8 -22
- transformers/quantizers/quantizers_utils.py +20 -0
- transformers/testing_utils.py +297 -36
- transformers/tokenization_mistral_common.py +4 -0
- transformers/tokenization_utils_base.py +113 -222
- transformers/tokenization_utils_tokenizers.py +168 -107
- transformers/trainer.py +28 -31
- transformers/trainer_jit_checkpoint.py +126 -0
- transformers/trainer_utils.py +1 -1
- transformers/training_args.py +66 -28
- transformers/utils/__init__.py +3 -4
- transformers/utils/auto_docstring.py +1 -0
- transformers/utils/generic.py +27 -1
- transformers/utils/hub.py +5 -15
- transformers/utils/import_utils.py +61 -16
- transformers/utils/kernel_config.py +4 -2
- transformers/utils/loading_report.py +19 -10
- transformers/utils/quantization_config.py +75 -242
- transformers/video_processing_utils.py +1 -2
- {transformers-5.0.0rc0.dist-info → transformers-5.0.0rc1.dist-info}/METADATA +274 -227
- {transformers-5.0.0rc0.dist-info → transformers-5.0.0rc1.dist-info}/RECORD +536 -520
- {transformers-5.0.0rc0.dist-info → transformers-5.0.0rc1.dist-info}/WHEEL +1 -1
- transformers/kernels/__init__.py +0 -0
- transformers/kernels/falcon_mamba/selective_scan_with_ln_interface.py +0 -529
- transformers/models/roformer/tokenization_roformer_fast.py +0 -160
- {transformers-5.0.0rc0.dist-info → transformers-5.0.0rc1.dist-info}/entry_points.txt +0 -0
- {transformers-5.0.0rc0.dist-info → transformers-5.0.0rc1.dist-info/licenses}/LICENSE +0 -0
- {transformers-5.0.0rc0.dist-info → transformers-5.0.0rc1.dist-info}/top_level.txt +0 -0
|
@@ -31,7 +31,7 @@ from ... import initialization as init
|
|
|
31
31
|
from ...activations import ACT2FN
|
|
32
32
|
from ...cache_utils import Cache, DynamicCache
|
|
33
33
|
from ...generation import GenerationMixin
|
|
34
|
-
from ...integrations import use_kernel_forward_from_hub, use_kernel_func_from_hub
|
|
34
|
+
from ...integrations import use_kernel_forward_from_hub, use_kernel_func_from_hub, use_kernelized_func
|
|
35
35
|
from ...masking_utils import create_bidirectional_mask, create_causal_mask
|
|
36
36
|
from ...modeling_layers import GradientCheckpointingLayer
|
|
37
37
|
from ...modeling_outputs import (
|
|
@@ -45,7 +45,7 @@ from ...modeling_rope_utils import ROPE_INIT_FUNCTIONS, dynamic_rope_update
|
|
|
45
45
|
from ...modeling_utils import ALL_ATTENTION_FUNCTIONS, PreTrainedModel
|
|
46
46
|
from ...processing_utils import Unpack
|
|
47
47
|
from ...utils import TransformersKwargs, auto_docstring, can_return_tuple
|
|
48
|
-
from ...utils.generic import OutputRecorder, check_model_inputs
|
|
48
|
+
from ...utils.generic import OutputRecorder, check_model_inputs, maybe_autocast
|
|
49
49
|
from .configuration_evolla import EvollaConfig, SaProtConfig
|
|
50
50
|
|
|
51
51
|
|
|
@@ -1019,7 +1019,7 @@ class EvollaRotaryEmbedding(nn.Module):
|
|
|
1019
1019
|
position_ids_expanded = position_ids[:, None, :].float()
|
|
1020
1020
|
|
|
1021
1021
|
device_type = x.device.type if isinstance(x.device.type, str) and x.device.type != "mps" else "cpu"
|
|
1022
|
-
with
|
|
1022
|
+
with maybe_autocast(device_type=device_type, enabled=False): # Force float32
|
|
1023
1023
|
freqs = (inv_freq_expanded.float() @ position_ids_expanded.float()).transpose(1, 2)
|
|
1024
1024
|
emb = torch.cat((freqs, freqs), dim=-1)
|
|
1025
1025
|
cos = emb.cos() * self.attention_scaling
|
|
@@ -1091,6 +1091,7 @@ def repeat_kv(hidden_states: torch.Tensor, n_rep: int) -> torch.Tensor:
|
|
|
1091
1091
|
return hidden_states.reshape(batch, num_key_value_heads * n_rep, slen, head_dim)
|
|
1092
1092
|
|
|
1093
1093
|
|
|
1094
|
+
@use_kernelized_func(apply_rotary_pos_emb)
|
|
1094
1095
|
class EvollaAttention(nn.Module):
|
|
1095
1096
|
"""Multi-headed attention from 'Attention Is All You Need' paper"""
|
|
1096
1097
|
|
|
@@ -1116,7 +1117,6 @@ class EvollaAttention(nn.Module):
|
|
|
1116
1117
|
self.o_proj = nn.Linear(
|
|
1117
1118
|
config.num_attention_heads * self.head_dim, config.hidden_size, bias=config.attention_bias
|
|
1118
1119
|
)
|
|
1119
|
-
self.rotary_fn = apply_rotary_pos_emb
|
|
1120
1120
|
|
|
1121
1121
|
def forward(
|
|
1122
1122
|
self,
|
|
@@ -44,6 +44,7 @@ from ...modeling_rope_utils import ROPE_INIT_FUNCTIONS, dynamic_rope_update
|
|
|
44
44
|
from ...modeling_utils import ALL_ATTENTION_FUNCTIONS, PreTrainedModel
|
|
45
45
|
from ...processing_utils import Unpack
|
|
46
46
|
from ...utils import TransformersKwargs, auto_docstring, can_return_tuple
|
|
47
|
+
from ...utils.generic import maybe_autocast
|
|
47
48
|
from .configuration_exaone4 import Exaone4Config
|
|
48
49
|
|
|
49
50
|
|
|
@@ -124,7 +125,7 @@ class Exaone4RotaryEmbedding(nn.Module):
|
|
|
124
125
|
position_ids_expanded = position_ids[:, None, :].float()
|
|
125
126
|
|
|
126
127
|
device_type = x.device.type if isinstance(x.device.type, str) and x.device.type != "mps" else "cpu"
|
|
127
|
-
with
|
|
128
|
+
with maybe_autocast(device_type=device_type, enabled=False): # Force float32
|
|
128
129
|
freqs = (inv_freq_expanded.float() @ position_ids_expanded.float()).transpose(1, 2)
|
|
129
130
|
emb = torch.cat((freqs, freqs), dim=-1)
|
|
130
131
|
cos = emb.cos() * self.attention_scaling
|
|
@@ -239,7 +240,6 @@ class Exaone4Attention(nn.Module):
|
|
|
239
240
|
attention_mask: Optional[torch.Tensor] = None,
|
|
240
241
|
past_key_values: Optional[Cache] = None,
|
|
241
242
|
cache_position: Optional[torch.LongTensor] = None,
|
|
242
|
-
position_ids: Optional[torch.LongTensor] = None,
|
|
243
243
|
**kwargs: Unpack[TransformersKwargs],
|
|
244
244
|
) -> tuple[torch.Tensor, Optional[torch.Tensor], Optional[tuple[torch.Tensor]]]:
|
|
245
245
|
input_shape = hidden_states.shape[:-1]
|
|
@@ -260,7 +260,6 @@ class Exaone4Attention(nn.Module):
|
|
|
260
260
|
attention_mask: Optional[torch.Tensor] = None,
|
|
261
261
|
past_key_values: Optional[Cache] = None,
|
|
262
262
|
cache_position: Optional[torch.LongTensor] = None,
|
|
263
|
-
position_ids: Optional[torch.LongTensor] = None,
|
|
264
263
|
**kwargs: Unpack[TransformersKwargs],
|
|
265
264
|
) -> tuple[torch.Tensor, Optional[torch.Tensor], Optional[tuple[torch.Tensor]]]:
|
|
266
265
|
input_shape = hidden_states.shape[:-1]
|
|
@@ -48,6 +48,7 @@ from ...utils import (
|
|
|
48
48
|
auto_docstring,
|
|
49
49
|
logging,
|
|
50
50
|
)
|
|
51
|
+
from ...utils.generic import maybe_autocast
|
|
51
52
|
from .configuration_falcon import FalconConfig
|
|
52
53
|
|
|
53
54
|
|
|
@@ -160,7 +161,7 @@ class FalconRotaryEmbedding(nn.Module):
|
|
|
160
161
|
position_ids_expanded = position_ids[:, None, :].float()
|
|
161
162
|
|
|
162
163
|
device_type = x.device.type if isinstance(x.device.type, str) and x.device.type != "mps" else "cpu"
|
|
163
|
-
with
|
|
164
|
+
with maybe_autocast(device_type=device_type, enabled=False): # Force float32
|
|
164
165
|
freqs = (inv_freq_expanded.float() @ position_ids_expanded.float()).transpose(1, 2)
|
|
165
166
|
emb = torch.cat((freqs, freqs), dim=-1)
|
|
166
167
|
cos = emb.cos() * self.attention_scaling
|
|
@@ -739,6 +740,7 @@ class FalconModel(FalconPreTrainedModel):
|
|
|
739
740
|
output_hidden_states: Optional[bool] = None,
|
|
740
741
|
return_dict: Optional[bool] = None,
|
|
741
742
|
cache_position: Optional[torch.LongTensor] = None,
|
|
743
|
+
**kwargs,
|
|
742
744
|
) -> Union[tuple[torch.Tensor, ...], BaseModelOutputWithPastAndCrossAttentions]:
|
|
743
745
|
r"""
|
|
744
746
|
input_ids (`torch.LongTensor` of shape `(batch_size, input_ids_length)`):
|
|
@@ -1119,6 +1121,7 @@ class FalconForSequenceClassification(FalconPreTrainedModel):
|
|
|
1119
1121
|
output_attentions: Optional[bool] = None,
|
|
1120
1122
|
output_hidden_states: Optional[bool] = None,
|
|
1121
1123
|
return_dict: Optional[bool] = None,
|
|
1124
|
+
**kwargs,
|
|
1122
1125
|
) -> Union[tuple[torch.Tensor], SequenceClassifierOutputWithPast]:
|
|
1123
1126
|
r"""
|
|
1124
1127
|
input_ids (`torch.LongTensor` of shape `(batch_size, input_ids_length)`):
|
|
@@ -1243,6 +1246,7 @@ class FalconForTokenClassification(FalconPreTrainedModel):
|
|
|
1243
1246
|
output_attentions: Optional[bool] = None,
|
|
1244
1247
|
output_hidden_states: Optional[bool] = None,
|
|
1245
1248
|
return_dict: Optional[bool] = None,
|
|
1249
|
+
**kwargs,
|
|
1246
1250
|
) -> Union[tuple[torch.Tensor], TokenClassifierOutput]:
|
|
1247
1251
|
r"""
|
|
1248
1252
|
input_ids (`torch.LongTensor` of shape `(batch_size, input_ids_length)`):
|
|
@@ -1320,6 +1324,7 @@ class FalconForQuestionAnswering(FalconPreTrainedModel):
|
|
|
1320
1324
|
output_attentions: Optional[bool] = None,
|
|
1321
1325
|
output_hidden_states: Optional[bool] = None,
|
|
1322
1326
|
return_dict: Optional[bool] = None,
|
|
1327
|
+
**kwargs,
|
|
1323
1328
|
) -> Union[tuple, QuestionAnsweringModelOutput]:
|
|
1324
1329
|
r"""
|
|
1325
1330
|
input_ids (`torch.LongTensor` of shape `(batch_size, input_ids_length)`):
|
|
@@ -36,7 +36,7 @@ from transformers.activations import ACT2FN
|
|
|
36
36
|
from ... import initialization as init
|
|
37
37
|
from ...cache_utils import Cache
|
|
38
38
|
from ...generation import GenerationMixin
|
|
39
|
-
from ...integrations import use_kernel_forward_from_hub, use_kernel_func_from_hub
|
|
39
|
+
from ...integrations import use_kernel_forward_from_hub, use_kernel_func_from_hub, use_kernelized_func
|
|
40
40
|
from ...modeling_attn_mask_utils import AttentionMaskConverter
|
|
41
41
|
from ...modeling_flash_attention_utils import FlashAttentionKwargs
|
|
42
42
|
from ...modeling_layers import GradientCheckpointingLayer
|
|
@@ -45,6 +45,7 @@ from ...modeling_rope_utils import ROPE_INIT_FUNCTIONS, dynamic_rope_update
|
|
|
45
45
|
from ...modeling_utils import ALL_ATTENTION_FUNCTIONS, PreTrainedModel
|
|
46
46
|
from ...processing_utils import Unpack
|
|
47
47
|
from ...utils import TransformersKwargs, auto_docstring, can_return_tuple, is_torchdynamo_compiling, logging
|
|
48
|
+
from ...utils.generic import maybe_autocast
|
|
48
49
|
from ...utils.import_utils import is_causal_conv1d_available, is_mamba_2_ssm_available
|
|
49
50
|
from .configuration_falcon_h1 import FalconH1Config
|
|
50
51
|
|
|
@@ -279,7 +280,7 @@ class FalconH1RotaryEmbedding(nn.Module):
|
|
|
279
280
|
position_ids_expanded = position_ids[:, None, :].float()
|
|
280
281
|
|
|
281
282
|
device_type = x.device.type if isinstance(x.device.type, str) and x.device.type != "mps" else "cpu"
|
|
282
|
-
with
|
|
283
|
+
with maybe_autocast(device_type=device_type, enabled=False): # Force float32
|
|
283
284
|
freqs = (inv_freq_expanded.float() @ position_ids_expanded.float()).transpose(1, 2)
|
|
284
285
|
emb = torch.cat((freqs, freqs), dim=-1)
|
|
285
286
|
cos = emb.cos() * self.attention_scaling
|
|
@@ -361,6 +362,7 @@ def eager_attention_forward(
|
|
|
361
362
|
return attn_output, attn_weights
|
|
362
363
|
|
|
363
364
|
|
|
365
|
+
@use_kernelized_func(apply_rotary_pos_emb)
|
|
364
366
|
class FalconH1Attention(nn.Module):
|
|
365
367
|
"""Multi-headed attention from 'Attention Is All You Need' paper"""
|
|
366
368
|
|
|
@@ -386,7 +388,6 @@ class FalconH1Attention(nn.Module):
|
|
|
386
388
|
self.o_proj = nn.Linear(
|
|
387
389
|
config.num_attention_heads * self.head_dim, config.hidden_size, bias=config.attention_bias
|
|
388
390
|
)
|
|
389
|
-
self.rotary_fn = apply_rotary_pos_emb
|
|
390
391
|
self.key_multiplier = config.key_multiplier
|
|
391
392
|
|
|
392
393
|
def forward(
|
|
@@ -35,11 +35,7 @@ from ...integrations.hub_kernels import lazy_load_kernel
|
|
|
35
35
|
from ...modeling_layers import GradientCheckpointingLayer
|
|
36
36
|
from ...modeling_utils import PreTrainedModel
|
|
37
37
|
from ...utils import ModelOutput, auto_docstring, logging
|
|
38
|
-
from ...utils.import_utils import
|
|
39
|
-
is_mamba_ssm_available,
|
|
40
|
-
is_mambapy_available,
|
|
41
|
-
is_torchdynamo_compiling,
|
|
42
|
-
)
|
|
38
|
+
from ...utils.import_utils import is_mambapy_available, is_torchdynamo_compiling
|
|
43
39
|
from .configuration_falcon_mamba import FalconMambaConfig
|
|
44
40
|
|
|
45
41
|
|
|
@@ -48,14 +44,6 @@ if is_mambapy_available():
|
|
|
48
44
|
else:
|
|
49
45
|
pscan = None
|
|
50
46
|
|
|
51
|
-
if is_mamba_ssm_available():
|
|
52
|
-
from mamba_ssm.ops.selective_scan_interface import selective_scan_fn
|
|
53
|
-
from mamba_ssm.ops.triton.selective_state_update import selective_state_update
|
|
54
|
-
|
|
55
|
-
from ...kernels.falcon_mamba import mamba_inner_fn
|
|
56
|
-
else:
|
|
57
|
-
selective_state_update, selective_scan_fn, mamba_inner_fn = None, None, None
|
|
58
|
-
|
|
59
47
|
|
|
60
48
|
logger = logging.get_logger(__name__)
|
|
61
49
|
|
|
@@ -231,7 +219,27 @@ class FalconMambaMixer(nn.Module):
|
|
|
231
219
|
self.out_proj = nn.Linear(self.intermediate_size, self.hidden_size, bias=config.use_bias)
|
|
232
220
|
self.use_bias = config.use_bias
|
|
233
221
|
|
|
222
|
+
global causal_conv1d, causal_conv1d_update, causal_conv1d_fn
|
|
223
|
+
causal_conv1d = lazy_load_kernel("causal-conv1d")
|
|
224
|
+
causal_conv1d_update, causal_conv1d_fn = (
|
|
225
|
+
(causal_conv1d.causal_conv1d_update, causal_conv1d.causal_conv1d_fn)
|
|
226
|
+
if causal_conv1d is not None
|
|
227
|
+
else (None, None)
|
|
228
|
+
)
|
|
229
|
+
global falcon_mamba_ssm, selective_state_update, selective_scan_fn, falcon_mamba_inner_fn
|
|
230
|
+
falcon_mamba_ssm = lazy_load_kernel("falcon_mamba-ssm")
|
|
231
|
+
selective_state_update, selective_scan_fn, falcon_mamba_inner_fn = (
|
|
232
|
+
(
|
|
233
|
+
falcon_mamba_ssm.selective_state_update,
|
|
234
|
+
falcon_mamba_ssm.selective_scan_fn,
|
|
235
|
+
falcon_mamba_ssm.falcon_mamba_inner_fn,
|
|
236
|
+
)
|
|
237
|
+
if falcon_mamba_ssm is not None
|
|
238
|
+
else (None, None, None)
|
|
239
|
+
)
|
|
240
|
+
|
|
234
241
|
self.warn_slow_implementation()
|
|
242
|
+
|
|
235
243
|
# Triton expects to pass RMS weights even if they are non learnable, thus we need to create these weights here
|
|
236
244
|
self.register_buffer(
|
|
237
245
|
"b_c_rms", torch.nn.Parameter(torch.ones(self.ssm_state_size), requires_grad=False), persistent=False
|
|
@@ -242,14 +250,8 @@ class FalconMambaMixer(nn.Module):
|
|
|
242
250
|
self.rms_eps = config.mixer_rms_eps
|
|
243
251
|
|
|
244
252
|
def warn_slow_implementation(self):
|
|
245
|
-
causal_conv1d = lazy_load_kernel("causal-conv1d")
|
|
246
|
-
causal_conv1d_update, causal_conv1d_fn = (
|
|
247
|
-
(causal_conv1d.causal_conv1d_update, causal_conv1d.causal_conv1d_fn)
|
|
248
|
-
if causal_conv1d is not None
|
|
249
|
-
else (None, None)
|
|
250
|
-
)
|
|
251
253
|
is_fast_path_available = all(
|
|
252
|
-
(selective_state_update, selective_scan_fn, causal_conv1d_fn, causal_conv1d_update,
|
|
254
|
+
(selective_state_update, selective_scan_fn, causal_conv1d_fn, causal_conv1d_update, falcon_mamba_inner_fn)
|
|
253
255
|
)
|
|
254
256
|
if not is_fast_path_available:
|
|
255
257
|
if self.use_falcon_mambapy:
|
|
@@ -279,9 +281,8 @@ class FalconMambaMixer(nn.Module):
|
|
|
279
281
|
):
|
|
280
282
|
# 1. Gated MLP's linear projection
|
|
281
283
|
projected_states = self.in_proj(hidden_states).transpose(1, 2)
|
|
282
|
-
|
|
283
284
|
if self.training and cache_params is None: # Doesn't support outputting the states -> used for training
|
|
284
|
-
contextualized_states =
|
|
285
|
+
contextualized_states = falcon_mamba_inner_fn(
|
|
285
286
|
projected_states,
|
|
286
287
|
self.conv1d.weight,
|
|
287
288
|
self.conv1d.bias if self.use_conv_bias else None,
|
|
@@ -302,12 +303,6 @@ class FalconMambaMixer(nn.Module):
|
|
|
302
303
|
)
|
|
303
304
|
|
|
304
305
|
else:
|
|
305
|
-
causal_conv1d = lazy_load_kernel("causal-conv1d")
|
|
306
|
-
causal_conv1d_update, causal_conv1d_fn = (
|
|
307
|
-
(causal_conv1d.causal_conv1d_update, causal_conv1d.causal_conv1d_fn)
|
|
308
|
-
if causal_conv1d is not None
|
|
309
|
-
else (None, None)
|
|
310
|
-
)
|
|
311
306
|
hidden_states, gate = projected_states.chunk(2, dim=1)
|
|
312
307
|
|
|
313
308
|
if attention_mask is not None:
|
|
@@ -502,14 +497,8 @@ class FalconMambaMixer(nn.Module):
|
|
|
502
497
|
cache_position: Optional[torch.LongTensor] = None,
|
|
503
498
|
attention_mask: Optional[torch.LongTensor] = None,
|
|
504
499
|
):
|
|
505
|
-
causal_conv1d = lazy_load_kernel("causal-conv1d")
|
|
506
|
-
causal_conv1d_update, causal_conv1d_fn = (
|
|
507
|
-
(causal_conv1d.causal_conv1d_update, causal_conv1d.causal_conv1d_fn)
|
|
508
|
-
if causal_conv1d is not None
|
|
509
|
-
else (None, None)
|
|
510
|
-
)
|
|
511
500
|
is_fast_path_available = all(
|
|
512
|
-
(selective_state_update, selective_scan_fn, causal_conv1d_fn, causal_conv1d_update,
|
|
501
|
+
(selective_state_update, selective_scan_fn, causal_conv1d_fn, causal_conv1d_update, falcon_mamba_inner_fn)
|
|
513
502
|
)
|
|
514
503
|
if is_fast_path_available and "cuda" in self.x_proj.weight.device.type and not is_torchdynamo_compiling():
|
|
515
504
|
return self.cuda_kernels_forward(hidden_states, cache_params, cache_position, attention_mask)
|
|
@@ -703,6 +692,7 @@ class FalconMambaModel(FalconMambaPreTrainedModel):
|
|
|
703
692
|
return_dict: Optional[bool] = None,
|
|
704
693
|
cache_position: Optional[torch.LongTensor] = None,
|
|
705
694
|
attention_mask: Optional[torch.LongTensor] = None,
|
|
695
|
+
**kwargs,
|
|
706
696
|
) -> Union[tuple, FalconMambaOutput]:
|
|
707
697
|
r"""
|
|
708
698
|
cache_params (`FalconMambaCache`, *optional*):
|
|
@@ -19,9 +19,8 @@ from typing import Optional
|
|
|
19
19
|
import torch
|
|
20
20
|
from torch import nn
|
|
21
21
|
|
|
22
|
-
from ...integrations.hub_kernels import lazy_load_kernel
|
|
23
22
|
from ...utils import auto_docstring, logging
|
|
24
|
-
from ...utils.import_utils import
|
|
23
|
+
from ...utils.import_utils import is_mambapy_available, is_torchdynamo_compiling
|
|
25
24
|
from ..mamba.configuration_mamba import MambaConfig
|
|
26
25
|
from ..mamba.modeling_mamba import (
|
|
27
26
|
MambaBlock,
|
|
@@ -43,13 +42,13 @@ if is_mambapy_available():
|
|
|
43
42
|
else:
|
|
44
43
|
pscan = None
|
|
45
44
|
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
45
|
+
selective_state_update, selective_scan_fn, causal_conv1d_fn, causal_conv1d_update, falcon_mamba_inner_fn = (
|
|
46
|
+
None,
|
|
47
|
+
None,
|
|
48
|
+
None,
|
|
49
|
+
None,
|
|
50
|
+
None,
|
|
51
|
+
)
|
|
53
52
|
|
|
54
53
|
|
|
55
54
|
class FalconMambaConfig(MambaConfig):
|
|
@@ -251,14 +250,8 @@ def rms_forward(hidden_states, variance_epsilon=1e-6):
|
|
|
251
250
|
|
|
252
251
|
class FalconMambaMixer(MambaMixer):
|
|
253
252
|
def warn_slow_implementation(self):
|
|
254
|
-
causal_conv1d = lazy_load_kernel("causal-conv1d")
|
|
255
|
-
causal_conv1d_update, causal_conv1d_fn = (
|
|
256
|
-
(causal_conv1d.causal_conv1d_update, causal_conv1d.causal_conv1d_fn)
|
|
257
|
-
if causal_conv1d is not None
|
|
258
|
-
else (None, None)
|
|
259
|
-
)
|
|
260
253
|
is_fast_path_available = all(
|
|
261
|
-
(selective_state_update, selective_scan_fn, causal_conv1d_fn, causal_conv1d_update,
|
|
254
|
+
(selective_state_update, selective_scan_fn, causal_conv1d_fn, causal_conv1d_update, falcon_mamba_inner_fn)
|
|
262
255
|
)
|
|
263
256
|
if not is_fast_path_available:
|
|
264
257
|
if self.use_falcon_mambapy:
|
|
@@ -281,6 +274,7 @@ class FalconMambaMixer(MambaMixer):
|
|
|
281
274
|
|
|
282
275
|
def __init__(self, config: FalconMambaConfig, layer_idx: int):
|
|
283
276
|
super().__init__(config, layer_idx)
|
|
277
|
+
|
|
284
278
|
# Triton expects to pass RMS weights even if they are non learnable, thus we need to create these weights here
|
|
285
279
|
self.register_buffer(
|
|
286
280
|
"b_c_rms", torch.nn.Parameter(torch.ones(self.ssm_state_size), requires_grad=False), persistent=False
|
|
@@ -299,9 +293,8 @@ class FalconMambaMixer(MambaMixer):
|
|
|
299
293
|
):
|
|
300
294
|
# 1. Gated MLP's linear projection
|
|
301
295
|
projected_states = self.in_proj(hidden_states).transpose(1, 2)
|
|
302
|
-
|
|
303
296
|
if self.training and cache_params is None: # Doesn't support outputting the states -> used for training
|
|
304
|
-
contextualized_states =
|
|
297
|
+
contextualized_states = falcon_mamba_inner_fn(
|
|
305
298
|
projected_states,
|
|
306
299
|
self.conv1d.weight,
|
|
307
300
|
self.conv1d.bias if self.use_conv_bias else None,
|
|
@@ -322,12 +315,6 @@ class FalconMambaMixer(MambaMixer):
|
|
|
322
315
|
)
|
|
323
316
|
|
|
324
317
|
else:
|
|
325
|
-
causal_conv1d = lazy_load_kernel("causal-conv1d")
|
|
326
|
-
causal_conv1d_update, causal_conv1d_fn = (
|
|
327
|
-
(causal_conv1d.causal_conv1d_update, causal_conv1d.causal_conv1d_fn)
|
|
328
|
-
if causal_conv1d is not None
|
|
329
|
-
else (None, None)
|
|
330
|
-
)
|
|
331
318
|
hidden_states, gate = projected_states.chunk(2, dim=1)
|
|
332
319
|
|
|
333
320
|
if attention_mask is not None:
|
|
@@ -521,14 +508,8 @@ class FalconMambaMixer(MambaMixer):
|
|
|
521
508
|
cache_position: Optional[torch.LongTensor] = None,
|
|
522
509
|
attention_mask: Optional[torch.LongTensor] = None,
|
|
523
510
|
):
|
|
524
|
-
causal_conv1d = lazy_load_kernel("causal-conv1d")
|
|
525
|
-
causal_conv1d_update, causal_conv1d_fn = (
|
|
526
|
-
(causal_conv1d.causal_conv1d_update, causal_conv1d.causal_conv1d_fn)
|
|
527
|
-
if causal_conv1d is not None
|
|
528
|
-
else (None, None)
|
|
529
|
-
)
|
|
530
511
|
is_fast_path_available = all(
|
|
531
|
-
(selective_state_update, selective_scan_fn, causal_conv1d_fn, causal_conv1d_update,
|
|
512
|
+
(selective_state_update, selective_scan_fn, causal_conv1d_fn, causal_conv1d_update, falcon_mamba_inner_fn)
|
|
532
513
|
)
|
|
533
514
|
if is_fast_path_available and "cuda" in self.x_proj.weight.device.type and not is_torchdynamo_compiling():
|
|
534
515
|
return self.cuda_kernels_forward(hidden_states, cache_params, cache_position, attention_mask)
|
|
@@ -1,5 +1,4 @@
|
|
|
1
|
-
#
|
|
2
|
-
# Copyright 2024 Tri Dao, Albert Gu, Technological Innovation Institute and HuggingFace Inc. team.
|
|
1
|
+
# Copyright 2025 The HuggingFace Team. All rights reserved.
|
|
3
2
|
#
|
|
4
3
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
4
|
# you may not use this file except in compliance with the License.
|
|
@@ -12,4 +11,17 @@
|
|
|
12
11
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
12
|
# See the License for the specific language governing permissions and
|
|
14
13
|
# limitations under the License.
|
|
15
|
-
from
|
|
14
|
+
from typing import TYPE_CHECKING
|
|
15
|
+
|
|
16
|
+
from ...utils import _LazyModule
|
|
17
|
+
from ...utils.import_utils import define_import_structure
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
if TYPE_CHECKING:
|
|
21
|
+
from .configuration_fast_vlm import *
|
|
22
|
+
from .modeling_fast_vlm import *
|
|
23
|
+
else:
|
|
24
|
+
import sys
|
|
25
|
+
|
|
26
|
+
_file = globals()["__file__"]
|
|
27
|
+
sys.modules[__name__] = _LazyModule(__name__, _file, define_import_structure(_file), module_spec=__spec__)
|
|
@@ -0,0 +1,137 @@
|
|
|
1
|
+
# 🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨
|
|
2
|
+
# This file was automatically generated from src/transformers/models/fast_vlm/modular_fast_vlm.py.
|
|
3
|
+
# Do NOT edit this file manually as any edits will be overwritten by the generation of
|
|
4
|
+
# the file from the modular. If any change should be done, please apply the change to the
|
|
5
|
+
# modular_fast_vlm.py file directly. One of our CI enforces this.
|
|
6
|
+
# 🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨
|
|
7
|
+
# Copyright 2025 The HuggingFace Team. All rights reserved.
|
|
8
|
+
#
|
|
9
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
10
|
+
# you may not use this file except in compliance with the License.
|
|
11
|
+
# You may obtain a copy of the License at
|
|
12
|
+
#
|
|
13
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
14
|
+
#
|
|
15
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
16
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
17
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
18
|
+
# See the License for the specific language governing permissions and
|
|
19
|
+
# limitations under the License.
|
|
20
|
+
|
|
21
|
+
from ...configuration_utils import PreTrainedConfig
|
|
22
|
+
from ..auto import CONFIG_MAPPING, AutoConfig
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class FastVlmConfig(PreTrainedConfig):
|
|
26
|
+
r"""
|
|
27
|
+
This is the configuration class to store the configuration of a [`FastVlmForConditionalGeneration`]. It is used to instantiate a
|
|
28
|
+
FastVLM model according to the specified arguments, defining the model architecture. Instantiating a configuration
|
|
29
|
+
with the defaults will yield the same configuration as the one of FastVLM-7B.
|
|
30
|
+
|
|
31
|
+
e.g. [KamilaMila/FastVLM-7B](https://huggingface.co/KamilaMila/FastVLM-7B)
|
|
32
|
+
|
|
33
|
+
Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model outputs. Read the
|
|
34
|
+
documentation from [`PretrainedConfig`] for more information.
|
|
35
|
+
|
|
36
|
+
Args:
|
|
37
|
+
vision_config (`Union[AutoConfig, dict]`, *optional*, defaults to `TimmWrapperConfig` for `fastvit_mci3`):
|
|
38
|
+
The config object or dictionary of the vision backbone.
|
|
39
|
+
text_config (`Union[AutoConfig, dict]`, *optional*, defaults to `Qwen2Config`):
|
|
40
|
+
The config object or dictionary of the text backbone.
|
|
41
|
+
image_token_id (`int`, *optional*, defaults to 151646):
|
|
42
|
+
The image token index to encode the image prompt.
|
|
43
|
+
projector_hidden_act (`str`, *optional*, defaults to `"gelu"`):
|
|
44
|
+
The activation function used by the multimodal projector.
|
|
45
|
+
vision_feature_select_strategy (`str`, *optional*, defaults to `"full"`):
|
|
46
|
+
The feature selection strategy used to select the vision feature from the vision backbone.
|
|
47
|
+
Only "full" supported.
|
|
48
|
+
vision_feature_layer (`Union[int, list[int]]`, *optional*, defaults to -1):
|
|
49
|
+
The index of the layer to select the vision feature. If multiple indices are provided,
|
|
50
|
+
the vision feature of the corresponding indices will be concatenated to form the
|
|
51
|
+
vision features. Only -1 supported.
|
|
52
|
+
multimodal_projector_bias (`bool`, *optional*, defaults to `True`):
|
|
53
|
+
Whether to use bias in the multimodal projector.
|
|
54
|
+
|
|
55
|
+
Example:
|
|
56
|
+
|
|
57
|
+
```python
|
|
58
|
+
>>> from transformers import FastVlmForConditionalGeneration, FastVlmConfig
|
|
59
|
+
|
|
60
|
+
>>> # Initializing a FastVLM-7B style configuration
|
|
61
|
+
>>> configuration = FastVlmConfig()
|
|
62
|
+
|
|
63
|
+
>>> # Initializing a model from the FastVLM-7B style configuration
|
|
64
|
+
>>> model = FastVlmForConditionalGeneration(configuration)
|
|
65
|
+
|
|
66
|
+
>>> # Accessing the model configuration
|
|
67
|
+
>>> configuration = model.config
|
|
68
|
+
```"""
|
|
69
|
+
|
|
70
|
+
model_type = "fast_vlm"
|
|
71
|
+
attribute_map = {
|
|
72
|
+
"image_token_id": "image_token_index",
|
|
73
|
+
}
|
|
74
|
+
sub_configs = {"text_config": AutoConfig, "vision_config": AutoConfig}
|
|
75
|
+
|
|
76
|
+
def __init__(
|
|
77
|
+
self,
|
|
78
|
+
vision_config=None,
|
|
79
|
+
text_config=None,
|
|
80
|
+
image_token_id=151646,
|
|
81
|
+
projector_hidden_act="gelu",
|
|
82
|
+
vision_feature_select_strategy="full",
|
|
83
|
+
vision_feature_layer=-1,
|
|
84
|
+
multimodal_projector_bias=True,
|
|
85
|
+
**kwargs,
|
|
86
|
+
):
|
|
87
|
+
self.image_token_id = image_token_id
|
|
88
|
+
self.projector_hidden_act = projector_hidden_act
|
|
89
|
+
|
|
90
|
+
if vision_feature_select_strategy != "full":
|
|
91
|
+
raise ValueError(
|
|
92
|
+
f"Unexpected select feature strategy: {vision_feature_select_strategy}. Only 'full' is supported in FastVLM."
|
|
93
|
+
)
|
|
94
|
+
|
|
95
|
+
if vision_feature_layer != -1:
|
|
96
|
+
raise ValueError(
|
|
97
|
+
f"Unexpected vision feature layer: {vision_feature_layer}. Only -1 is supported in FastVLM."
|
|
98
|
+
)
|
|
99
|
+
|
|
100
|
+
self.vision_feature_select_strategy = vision_feature_select_strategy
|
|
101
|
+
self.vision_feature_layer = vision_feature_layer
|
|
102
|
+
|
|
103
|
+
if isinstance(vision_config, dict):
|
|
104
|
+
vision_config["model_type"] = vision_config.get("model_type", "timm_wrapper")
|
|
105
|
+
vision_config = CONFIG_MAPPING[vision_config["model_type"]](**vision_config)
|
|
106
|
+
elif vision_config is None:
|
|
107
|
+
vision_config = CONFIG_MAPPING["timm_wrapper"](
|
|
108
|
+
architecture="fastvit_mci3",
|
|
109
|
+
do_pooling=True,
|
|
110
|
+
global_pool="avg",
|
|
111
|
+
hidden_size=3072,
|
|
112
|
+
initializer_range=0.02,
|
|
113
|
+
model_args={"inference_mode": True},
|
|
114
|
+
)
|
|
115
|
+
|
|
116
|
+
self.vision_config = vision_config
|
|
117
|
+
|
|
118
|
+
if isinstance(text_config, dict):
|
|
119
|
+
text_config["model_type"] = text_config.get("model_type", "qwen2")
|
|
120
|
+
text_config = CONFIG_MAPPING[text_config["model_type"]](**text_config)
|
|
121
|
+
elif text_config is None:
|
|
122
|
+
text_config = CONFIG_MAPPING["qwen2"](
|
|
123
|
+
hidden_size=3584,
|
|
124
|
+
vocab_size=152128,
|
|
125
|
+
intermediate_size=18944,
|
|
126
|
+
num_attention_heads=28,
|
|
127
|
+
num_key_value_heads=4,
|
|
128
|
+
num_hidden_layers=28,
|
|
129
|
+
)
|
|
130
|
+
|
|
131
|
+
self.text_config = text_config
|
|
132
|
+
self.multimodal_projector_bias = multimodal_projector_bias
|
|
133
|
+
|
|
134
|
+
super().__init__(**kwargs)
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
__all__ = ["FastVlmConfig"]
|