xinference 0.16.3__py3-none-any.whl → 1.2.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of xinference might be problematic. Click here for more details.
- xinference/_compat.py +24 -2
- xinference/_version.py +3 -3
- xinference/api/restful_api.py +219 -77
- xinference/client/restful/restful_client.py +47 -2
- xinference/constants.py +1 -0
- xinference/core/chat_interface.py +6 -1
- xinference/core/model.py +124 -34
- xinference/core/supervisor.py +180 -12
- xinference/core/utils.py +73 -4
- xinference/core/worker.py +102 -4
- xinference/deploy/cmdline.py +3 -1
- xinference/deploy/test/test_cmdline.py +56 -0
- xinference/isolation.py +24 -0
- xinference/model/audio/__init__.py +12 -0
- xinference/model/audio/core.py +37 -4
- xinference/model/audio/cosyvoice.py +39 -6
- xinference/model/audio/f5tts.py +200 -0
- xinference/model/audio/f5tts_mlx.py +260 -0
- xinference/model/audio/fish_speech.py +70 -110
- xinference/model/audio/melotts.py +110 -0
- xinference/model/audio/model_spec.json +179 -3
- xinference/model/audio/model_spec_modelscope.json +27 -0
- xinference/model/audio/utils.py +32 -0
- xinference/model/audio/whisper.py +35 -10
- xinference/model/audio/whisper_mlx.py +208 -0
- xinference/model/embedding/core.py +322 -6
- xinference/model/embedding/model_spec.json +8 -1
- xinference/model/embedding/model_spec_modelscope.json +9 -1
- xinference/model/image/core.py +69 -1
- xinference/model/image/model_spec.json +145 -4
- xinference/model/image/model_spec_modelscope.json +150 -4
- xinference/model/image/stable_diffusion/core.py +50 -15
- xinference/model/llm/__init__.py +6 -2
- xinference/model/llm/llm_family.json +1055 -93
- xinference/model/llm/llm_family.py +15 -36
- xinference/model/llm/llm_family_modelscope.json +1031 -78
- xinference/model/llm/memory.py +1 -1
- xinference/model/llm/mlx/core.py +285 -47
- xinference/model/llm/sglang/core.py +2 -0
- xinference/model/llm/transformers/chatglm.py +9 -5
- xinference/model/llm/transformers/cogagent.py +272 -0
- xinference/model/llm/transformers/core.py +3 -0
- xinference/model/llm/transformers/glm_edge_v.py +230 -0
- xinference/model/llm/transformers/qwen2_vl.py +12 -1
- xinference/model/llm/transformers/utils.py +16 -8
- xinference/model/llm/utils.py +55 -4
- xinference/model/llm/vllm/core.py +137 -12
- xinference/model/llm/vllm/xavier/__init__.py +13 -0
- xinference/model/llm/vllm/xavier/allocator.py +74 -0
- xinference/model/llm/vllm/xavier/block.py +111 -0
- xinference/model/llm/vllm/xavier/block_manager.py +71 -0
- xinference/model/llm/vllm/xavier/block_tracker.py +129 -0
- xinference/model/llm/vllm/xavier/collective.py +74 -0
- xinference/model/llm/vllm/xavier/collective_manager.py +147 -0
- xinference/model/llm/vllm/xavier/engine.py +247 -0
- xinference/model/llm/vllm/xavier/executor.py +134 -0
- xinference/model/llm/vllm/xavier/scheduler.py +438 -0
- xinference/model/llm/vllm/xavier/test/__init__.py +13 -0
- xinference/model/llm/vllm/xavier/test/test_xavier.py +147 -0
- xinference/model/llm/vllm/xavier/transfer.py +319 -0
- xinference/model/rerank/core.py +11 -4
- xinference/model/video/diffusers.py +14 -0
- xinference/model/video/model_spec.json +15 -0
- xinference/model/video/model_spec_modelscope.json +16 -0
- xinference/thirdparty/cosyvoice/bin/average_model.py +92 -0
- xinference/thirdparty/cosyvoice/bin/export_jit.py +12 -2
- xinference/thirdparty/cosyvoice/bin/export_onnx.py +112 -0
- xinference/thirdparty/cosyvoice/bin/export_trt.sh +9 -0
- xinference/thirdparty/cosyvoice/bin/inference.py +5 -7
- xinference/thirdparty/cosyvoice/bin/spk2info.pt +0 -0
- xinference/thirdparty/cosyvoice/bin/train.py +42 -8
- xinference/thirdparty/cosyvoice/cli/cosyvoice.py +96 -25
- xinference/thirdparty/cosyvoice/cli/frontend.py +77 -30
- xinference/thirdparty/cosyvoice/cli/model.py +330 -80
- xinference/thirdparty/cosyvoice/dataset/dataset.py +6 -2
- xinference/thirdparty/cosyvoice/dataset/processor.py +76 -14
- xinference/thirdparty/cosyvoice/flow/decoder.py +92 -13
- xinference/thirdparty/cosyvoice/flow/flow.py +99 -9
- xinference/thirdparty/cosyvoice/flow/flow_matching.py +110 -13
- xinference/thirdparty/cosyvoice/flow/length_regulator.py +5 -4
- xinference/thirdparty/cosyvoice/hifigan/discriminator.py +140 -0
- xinference/thirdparty/cosyvoice/hifigan/generator.py +58 -42
- xinference/thirdparty/cosyvoice/hifigan/hifigan.py +67 -0
- xinference/thirdparty/cosyvoice/llm/llm.py +139 -6
- xinference/thirdparty/cosyvoice/tokenizer/assets/multilingual_zh_ja_yue_char_del.tiktoken +58836 -0
- xinference/thirdparty/cosyvoice/tokenizer/tokenizer.py +279 -0
- xinference/thirdparty/cosyvoice/transformer/embedding.py +2 -2
- xinference/thirdparty/cosyvoice/transformer/encoder_layer.py +7 -7
- xinference/thirdparty/cosyvoice/transformer/upsample_encoder.py +318 -0
- xinference/thirdparty/cosyvoice/utils/common.py +28 -1
- xinference/thirdparty/cosyvoice/utils/executor.py +69 -7
- xinference/thirdparty/cosyvoice/utils/file_utils.py +2 -12
- xinference/thirdparty/cosyvoice/utils/frontend_utils.py +9 -5
- xinference/thirdparty/cosyvoice/utils/losses.py +20 -0
- xinference/thirdparty/cosyvoice/utils/scheduler.py +1 -2
- xinference/thirdparty/cosyvoice/utils/train_utils.py +101 -45
- xinference/thirdparty/f5_tts/api.py +166 -0
- xinference/thirdparty/f5_tts/configs/E2TTS_Base_train.yaml +44 -0
- xinference/thirdparty/f5_tts/configs/E2TTS_Small_train.yaml +44 -0
- xinference/thirdparty/f5_tts/configs/F5TTS_Base_train.yaml +46 -0
- xinference/thirdparty/f5_tts/configs/F5TTS_Small_train.yaml +46 -0
- xinference/thirdparty/f5_tts/eval/README.md +49 -0
- xinference/thirdparty/f5_tts/eval/ecapa_tdnn.py +330 -0
- xinference/thirdparty/f5_tts/eval/eval_infer_batch.py +207 -0
- xinference/thirdparty/f5_tts/eval/eval_infer_batch.sh +13 -0
- xinference/thirdparty/f5_tts/eval/eval_librispeech_test_clean.py +84 -0
- xinference/thirdparty/f5_tts/eval/eval_seedtts_testset.py +84 -0
- xinference/thirdparty/f5_tts/eval/utils_eval.py +405 -0
- xinference/thirdparty/f5_tts/infer/README.md +191 -0
- xinference/thirdparty/f5_tts/infer/SHARED.md +74 -0
- xinference/thirdparty/f5_tts/infer/examples/basic/basic.toml +11 -0
- xinference/thirdparty/f5_tts/infer/examples/basic/basic_ref_en.wav +0 -0
- xinference/thirdparty/f5_tts/infer/examples/basic/basic_ref_zh.wav +0 -0
- xinference/thirdparty/f5_tts/infer/examples/multi/country.flac +0 -0
- xinference/thirdparty/f5_tts/infer/examples/multi/main.flac +0 -0
- xinference/thirdparty/f5_tts/infer/examples/multi/story.toml +19 -0
- xinference/thirdparty/f5_tts/infer/examples/multi/story.txt +1 -0
- xinference/thirdparty/f5_tts/infer/examples/multi/town.flac +0 -0
- xinference/thirdparty/f5_tts/infer/examples/vocab.txt +2545 -0
- xinference/thirdparty/f5_tts/infer/infer_cli.py +226 -0
- xinference/thirdparty/f5_tts/infer/infer_gradio.py +851 -0
- xinference/thirdparty/f5_tts/infer/speech_edit.py +193 -0
- xinference/thirdparty/f5_tts/infer/utils_infer.py +538 -0
- xinference/thirdparty/f5_tts/model/__init__.py +10 -0
- xinference/thirdparty/f5_tts/model/backbones/README.md +20 -0
- xinference/thirdparty/f5_tts/model/backbones/dit.py +163 -0
- xinference/thirdparty/f5_tts/model/backbones/mmdit.py +146 -0
- xinference/thirdparty/f5_tts/model/backbones/unett.py +219 -0
- xinference/thirdparty/f5_tts/model/cfm.py +285 -0
- xinference/thirdparty/f5_tts/model/dataset.py +319 -0
- xinference/thirdparty/f5_tts/model/modules.py +658 -0
- xinference/thirdparty/f5_tts/model/trainer.py +366 -0
- xinference/thirdparty/f5_tts/model/utils.py +185 -0
- xinference/thirdparty/f5_tts/scripts/count_max_epoch.py +33 -0
- xinference/thirdparty/f5_tts/scripts/count_params_gflops.py +39 -0
- xinference/thirdparty/f5_tts/socket_server.py +159 -0
- xinference/thirdparty/f5_tts/train/README.md +77 -0
- xinference/thirdparty/f5_tts/train/datasets/prepare_csv_wavs.py +139 -0
- xinference/thirdparty/f5_tts/train/datasets/prepare_emilia.py +230 -0
- xinference/thirdparty/f5_tts/train/datasets/prepare_libritts.py +92 -0
- xinference/thirdparty/f5_tts/train/datasets/prepare_ljspeech.py +65 -0
- xinference/thirdparty/f5_tts/train/datasets/prepare_wenetspeech4tts.py +125 -0
- xinference/thirdparty/f5_tts/train/finetune_cli.py +174 -0
- xinference/thirdparty/f5_tts/train/finetune_gradio.py +1846 -0
- xinference/thirdparty/f5_tts/train/train.py +75 -0
- xinference/thirdparty/fish_speech/fish_speech/conversation.py +266 -1
- xinference/thirdparty/fish_speech/fish_speech/i18n/locale/en_US.json +2 -1
- xinference/thirdparty/fish_speech/fish_speech/i18n/locale/es_ES.json +2 -1
- xinference/thirdparty/fish_speech/fish_speech/i18n/locale/ja_JP.json +2 -2
- xinference/thirdparty/fish_speech/fish_speech/i18n/locale/ko_KR.json +123 -0
- xinference/thirdparty/fish_speech/fish_speech/i18n/locale/zh_CN.json +2 -1
- xinference/thirdparty/fish_speech/fish_speech/models/text2semantic/llama.py +137 -29
- xinference/thirdparty/fish_speech/fish_speech/models/vqgan/modules/firefly.py +9 -9
- xinference/thirdparty/fish_speech/fish_speech/models/vqgan/modules/fsq.py +1 -1
- xinference/thirdparty/fish_speech/fish_speech/text/clean.py +17 -11
- xinference/thirdparty/fish_speech/fish_speech/text/spliter.py +1 -1
- xinference/thirdparty/fish_speech/fish_speech/tokenizer.py +152 -0
- xinference/thirdparty/fish_speech/fish_speech/train.py +2 -2
- xinference/thirdparty/fish_speech/fish_speech/utils/__init__.py +2 -1
- xinference/thirdparty/fish_speech/fish_speech/utils/utils.py +22 -0
- xinference/thirdparty/fish_speech/fish_speech/webui/launch_utils.py +1 -1
- xinference/thirdparty/fish_speech/fish_speech/webui/manage.py +2 -2
- xinference/thirdparty/fish_speech/tools/{post_api.py → api_client.py} +34 -18
- xinference/thirdparty/fish_speech/tools/api_server.py +98 -0
- xinference/thirdparty/fish_speech/tools/download_models.py +5 -5
- xinference/thirdparty/fish_speech/tools/e2e_webui.py +232 -0
- xinference/thirdparty/fish_speech/tools/fish_e2e.py +298 -0
- xinference/thirdparty/fish_speech/tools/inference_engine/__init__.py +192 -0
- xinference/thirdparty/fish_speech/tools/inference_engine/reference_loader.py +125 -0
- xinference/thirdparty/fish_speech/tools/inference_engine/utils.py +39 -0
- xinference/thirdparty/fish_speech/tools/inference_engine/vq_manager.py +57 -0
- xinference/thirdparty/fish_speech/tools/llama/eval_in_context.py +2 -2
- xinference/thirdparty/fish_speech/tools/llama/generate.py +484 -72
- xinference/thirdparty/fish_speech/tools/run_webui.py +104 -0
- xinference/thirdparty/fish_speech/tools/schema.py +170 -0
- xinference/thirdparty/fish_speech/tools/server/agent/__init__.py +57 -0
- xinference/thirdparty/fish_speech/tools/server/agent/generate.py +119 -0
- xinference/thirdparty/fish_speech/tools/server/agent/generation_utils.py +122 -0
- xinference/thirdparty/fish_speech/tools/server/agent/pre_generation_utils.py +72 -0
- xinference/thirdparty/fish_speech/tools/server/api_utils.py +75 -0
- xinference/thirdparty/fish_speech/tools/server/exception_handler.py +27 -0
- xinference/thirdparty/fish_speech/tools/server/inference.py +45 -0
- xinference/thirdparty/fish_speech/tools/server/model_manager.py +122 -0
- xinference/thirdparty/fish_speech/tools/server/model_utils.py +129 -0
- xinference/thirdparty/fish_speech/tools/server/views.py +246 -0
- xinference/thirdparty/fish_speech/tools/vqgan/extract_vq.py +7 -1
- xinference/thirdparty/fish_speech/tools/vqgan/inference.py +2 -3
- xinference/thirdparty/fish_speech/tools/webui/__init__.py +173 -0
- xinference/thirdparty/fish_speech/tools/webui/inference.py +91 -0
- xinference/thirdparty/fish_speech/tools/webui/variables.py +14 -0
- xinference/thirdparty/matcha/utils/utils.py +2 -2
- xinference/thirdparty/melo/api.py +135 -0
- xinference/thirdparty/melo/app.py +61 -0
- xinference/thirdparty/melo/attentions.py +459 -0
- xinference/thirdparty/melo/commons.py +160 -0
- xinference/thirdparty/melo/configs/config.json +94 -0
- xinference/thirdparty/melo/data/example/metadata.list +20 -0
- xinference/thirdparty/melo/data_utils.py +413 -0
- xinference/thirdparty/melo/download_utils.py +67 -0
- xinference/thirdparty/melo/infer.py +25 -0
- xinference/thirdparty/melo/init_downloads.py +14 -0
- xinference/thirdparty/melo/losses.py +58 -0
- xinference/thirdparty/melo/main.py +36 -0
- xinference/thirdparty/melo/mel_processing.py +174 -0
- xinference/thirdparty/melo/models.py +1030 -0
- xinference/thirdparty/melo/modules.py +598 -0
- xinference/thirdparty/melo/monotonic_align/__init__.py +16 -0
- xinference/thirdparty/melo/monotonic_align/core.py +46 -0
- xinference/thirdparty/melo/preprocess_text.py +135 -0
- xinference/thirdparty/melo/split_utils.py +174 -0
- xinference/thirdparty/melo/text/__init__.py +35 -0
- xinference/thirdparty/melo/text/chinese.py +199 -0
- xinference/thirdparty/melo/text/chinese_bert.py +107 -0
- xinference/thirdparty/melo/text/chinese_mix.py +253 -0
- xinference/thirdparty/melo/text/cleaner.py +36 -0
- xinference/thirdparty/melo/text/cleaner_multiling.py +110 -0
- xinference/thirdparty/melo/text/cmudict.rep +129530 -0
- xinference/thirdparty/melo/text/cmudict_cache.pickle +0 -0
- xinference/thirdparty/melo/text/english.py +284 -0
- xinference/thirdparty/melo/text/english_bert.py +39 -0
- xinference/thirdparty/melo/text/english_utils/abbreviations.py +35 -0
- xinference/thirdparty/melo/text/english_utils/number_norm.py +97 -0
- xinference/thirdparty/melo/text/english_utils/time_norm.py +47 -0
- xinference/thirdparty/melo/text/es_phonemizer/base.py +140 -0
- xinference/thirdparty/melo/text/es_phonemizer/cleaner.py +109 -0
- xinference/thirdparty/melo/text/es_phonemizer/es_symbols.json +79 -0
- xinference/thirdparty/melo/text/es_phonemizer/es_symbols.txt +1 -0
- xinference/thirdparty/melo/text/es_phonemizer/es_symbols_v2.json +83 -0
- xinference/thirdparty/melo/text/es_phonemizer/es_to_ipa.py +12 -0
- xinference/thirdparty/melo/text/es_phonemizer/example_ipa.txt +400 -0
- xinference/thirdparty/melo/text/es_phonemizer/gruut_wrapper.py +253 -0
- xinference/thirdparty/melo/text/es_phonemizer/punctuation.py +174 -0
- xinference/thirdparty/melo/text/es_phonemizer/spanish_symbols.txt +1 -0
- xinference/thirdparty/melo/text/es_phonemizer/test.ipynb +124 -0
- xinference/thirdparty/melo/text/fr_phonemizer/base.py +140 -0
- xinference/thirdparty/melo/text/fr_phonemizer/cleaner.py +122 -0
- xinference/thirdparty/melo/text/fr_phonemizer/en_symbols.json +78 -0
- xinference/thirdparty/melo/text/fr_phonemizer/example_ipa.txt +1 -0
- xinference/thirdparty/melo/text/fr_phonemizer/fr_symbols.json +89 -0
- xinference/thirdparty/melo/text/fr_phonemizer/fr_to_ipa.py +30 -0
- xinference/thirdparty/melo/text/fr_phonemizer/french_abbreviations.py +48 -0
- xinference/thirdparty/melo/text/fr_phonemizer/french_symbols.txt +1 -0
- xinference/thirdparty/melo/text/fr_phonemizer/gruut_wrapper.py +258 -0
- xinference/thirdparty/melo/text/fr_phonemizer/punctuation.py +172 -0
- xinference/thirdparty/melo/text/french.py +94 -0
- xinference/thirdparty/melo/text/french_bert.py +39 -0
- xinference/thirdparty/melo/text/japanese.py +647 -0
- xinference/thirdparty/melo/text/japanese_bert.py +49 -0
- xinference/thirdparty/melo/text/ko_dictionary.py +44 -0
- xinference/thirdparty/melo/text/korean.py +192 -0
- xinference/thirdparty/melo/text/opencpop-strict.txt +429 -0
- xinference/thirdparty/melo/text/spanish.py +122 -0
- xinference/thirdparty/melo/text/spanish_bert.py +39 -0
- xinference/thirdparty/melo/text/symbols.py +290 -0
- xinference/thirdparty/melo/text/tone_sandhi.py +769 -0
- xinference/thirdparty/melo/train.py +635 -0
- xinference/thirdparty/melo/train.sh +19 -0
- xinference/thirdparty/melo/transforms.py +209 -0
- xinference/thirdparty/melo/utils.py +424 -0
- xinference/types.py +17 -1
- xinference/web/ui/build/asset-manifest.json +6 -6
- xinference/web/ui/build/index.html +1 -1
- xinference/web/ui/build/static/css/main.51a587ff.css +2 -0
- xinference/web/ui/build/static/css/main.51a587ff.css.map +1 -0
- xinference/web/ui/build/static/js/main.b0936c54.js +3 -0
- xinference/web/ui/build/static/js/main.b0936c54.js.map +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/03c4052f1b91f6ba0c5389bdcf49c43319b4076c08e4b8585dab312538ae290a.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/1786b83003b8e9605a0f5f855a185d4d16e38fc893dfb326a2a9cca206b4240a.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/17cbc181dd674b9150b80c73ed6a82656de0082d857f6e5f66d9716129ac0b38.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/185ceb8872d562e032b47e79df6a45670e06345b8ed70aad1a131e0476783c5c.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/26b8c9f34b0bed789b3a833767672e39302d1e0c09b4276f4d58d1df7b6bd93b.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/2b484da66c724d0d56a40849c109327408796a668b1381511b6e9e03baa48658.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/2cbbbce9b84df73330d4c42b82436ed881b3847628f2fbc346aa62e2859fd88c.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/2ec9b14431ed33ce6901bf9f27007be4e6e472709c99d6e22b50ce528e4b78ee.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/3b966db018f96be4a055d6ca205f0990d4d0b370e2980c17d8bca2c9a021819c.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/3eefb411b24c2b3ce053570ef50daccf154022f0e168be5ed0fec21394baf9f4.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/522b229e3cac219123f0d69673f5570e191c2d2a505dc65b312d336eae2279c0.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/52e45f17ba300580ea3fcc9f9228ccba194bb092b76f25e9255af311f8b05aab.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/5a0bc4631f936459afc1a3b1d3ec2420118b1f00e11f60ccac3e08088f3f27a8.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/611fa2c6c53b66039991d06dfb0473b5ab37fc63b4564e0f6e1718523768a045.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/6329bc76c406fe5eb305412383fbde5950f847bb5e43261f73f37622c365acb4.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/63c8e07687ea53a4f8a910ee5e42e0eb26cd1acbfbe820f3e3248a786ee51401.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/69b2d5001684174ec9da57e07914eed3eac4960018bceb6cbfa801d861301d7c.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/710c1acda69e561e30a933b98c6a56d50197868b15c21e2aad55ab6d46649eb6.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/720deca1fce5a1dc5056048fa8258fd138a82ea855f350b6613f104a73fb761f.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/76a23b92d26a499c57e61eea2b895fbc9771bd0849a72e66f8e633192017978b.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/858063f23b34dfe600254eb5afd85518b0002ec4b30b7386616c45600826e3b2.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/920b82c1c89124cf217109eeedbfcd3aae3b917be50c9dfb6bbb4ce26bdfd2e7.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/94d8b7aeb0076f2ce07db598cea0e87b13bc8d5614eb530b8d6e696c2daf6f88.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/9e917fe7022d01b2ccbe5cc0ce73d70bb72bee584ff293bad71bdff6695dee28.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/9f28fdb8399f1d0474f0aca86f1658dc94f5bf0c90f6146352de150692de8862.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/a0dfafa06b2bb7cba8cad41c482503f61944f759f4318139362602ef5cc47ccb.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/a3ff866acddf34917a7ee399e0e571a4dfd8ba66d5057db885f243e16a6eb17d.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/afb8084f539534cd594755ea2205ecd5bd1f62dddcfdf75a2eace59a28131278.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/b57b1438b77294c1f3f6cfce12ac487d8106c6f016975ba0aec94d98997e2e1e.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/b9917b0bf8e4d55ccbac1c334aa04d6ff3c5b6ed9e5d38b9ea2c687fa7d3f5a9.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/bbcc94b0149963d1d6f267ee1f4f03d3925b758392ce2f516c3fe8af0e0169fc.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/bdee44abeadc4abc17d41c52eb49c6e19a4b1a267b6e16876ce91bdeeebfc52d.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/beb112b70f4a56db95920a9e20efb6c97c37b68450716730217a9ee1a9ae92be.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/c88db97be0cdf440193b3995996e83510a04cb00048135485fc0e26d197e80b5.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/d49e5314d34310a62d01a03067ce1bec5da00abce84c5196aa9c6842fa79a430.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/d7664d18c4ddbad9c3a6a31b91f7c00fb0dde804608674a9860ee50f33e54708.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/d9072c318b819b7c90a0f7e9cc0b6413b4dbeb8e9859898e53d75ea882fcde99.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/db16a983bc08a05f0439cc61ca0840e49e1d8400eef678909f16c032a418a3d6.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/dc249829767b8abcbc3677e0b07b6d3ecbfdfe6d08cfe23a665eb33373a9aa9d.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/e242c583c2dbc2784f0fcf513523975f7d5df447e106c1c17e49e8578a6fc3ed.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/eac5f1296513e69e4b96f750ddccd4d0264e2bae4e4c449144e83274a48698d9.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/ed57202cb79649bb716400436590245547df241988fc7c8e1d85d132299542d2.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/f125bf72e773a14cdaebd0c343e80adb909d12e317ee5c00cd4a57442fbe2c62.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/f91af913d7f91c410719ab13136aaed3aaf0f8dda06652f25c42cb5231587398.json +1 -0
- xinference/web/ui/node_modules/.package-lock.json +67 -3
- xinference/web/ui/node_modules/@babel/runtime/package.json +592 -538
- xinference/web/ui/node_modules/html-parse-stringify/package.json +50 -0
- xinference/web/ui/node_modules/i18next/dist/esm/package.json +1 -0
- xinference/web/ui/node_modules/i18next/package.json +129 -0
- xinference/web/ui/node_modules/react-i18next/.eslintrc.json +74 -0
- xinference/web/ui/node_modules/react-i18next/dist/es/package.json +1 -0
- xinference/web/ui/node_modules/react-i18next/package.json +162 -0
- xinference/web/ui/node_modules/void-elements/package.json +34 -0
- xinference/web/ui/package-lock.json +69 -3
- xinference/web/ui/package.json +2 -0
- xinference/web/ui/src/locales/en.json +186 -0
- xinference/web/ui/src/locales/zh.json +186 -0
- {xinference-0.16.3.dist-info → xinference-1.2.1.dist-info}/METADATA +96 -36
- {xinference-0.16.3.dist-info → xinference-1.2.1.dist-info}/RECORD +335 -146
- {xinference-0.16.3.dist-info → xinference-1.2.1.dist-info}/WHEEL +1 -1
- xinference/thirdparty/cosyvoice/bin/export_trt.py +0 -8
- xinference/thirdparty/fish_speech/fish_speech/configs/lora/__init__.py +0 -0
- xinference/thirdparty/fish_speech/fish_speech/datasets/__init__.py +0 -0
- xinference/thirdparty/fish_speech/fish_speech/datasets/protos/__init__.py +0 -0
- xinference/thirdparty/fish_speech/fish_speech/i18n/locale/__init__.py +0 -0
- xinference/thirdparty/fish_speech/fish_speech/models/__init__.py +0 -0
- xinference/thirdparty/fish_speech/fish_speech/models/vqgan/modules/__init__.py +0 -0
- xinference/thirdparty/fish_speech/fish_speech/webui/__init__.py +0 -0
- xinference/thirdparty/fish_speech/tools/__init__.py +0 -0
- xinference/thirdparty/fish_speech/tools/api.py +0 -440
- xinference/thirdparty/fish_speech/tools/commons.py +0 -35
- xinference/thirdparty/fish_speech/tools/llama/__init__.py +0 -0
- xinference/thirdparty/fish_speech/tools/msgpack_api.py +0 -34
- xinference/thirdparty/fish_speech/tools/vqgan/__init__.py +0 -0
- xinference/thirdparty/fish_speech/tools/webui.py +0 -485
- xinference/web/ui/build/static/css/main.5061c4c3.css +0 -2
- xinference/web/ui/build/static/css/main.5061c4c3.css.map +0 -1
- xinference/web/ui/build/static/js/main.2f269bb3.js +0 -3
- xinference/web/ui/build/static/js/main.2f269bb3.js.map +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/07ce9e632e6aff24d7aa3ad8e48224433bbfeb0d633fca723453f1fcae0c9f1c.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/1130403f9e46f5738a23b45ac59b57de8f360c908c713e2c0670c2cce9bd367a.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/131091b25d26b17cdca187d7542a21475c211138d900cf667682260e76ef9463.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/1f269fb2a368363c1cb2237825f1dba093b6bdd8c44cc05954fd19ec2c1fff03.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/331312668fa8bd3d7401818f4a25fa98135d7f61371cd6bfff78b18cf4fbdd92.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/40f17338fc75ae095de7d2b4d8eae0d5ca0193a7e2bcece4ee745b22a7a2f4b7.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/4de9a6942c5f1749d6cbfdd54279699975f16016b182848bc253886f52ec2ec3.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/822586ed1077201b64b954f12f25e3f9b45678c1acbabe53d8af3ca82ca71f33.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/8d33354bd2100c8602afc3341f131a88cc36aaeecd5a4b365ed038514708e350.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/9375a35b05d56989b2755bf72161fa707c92f28569d33765a75f91a568fda6e9.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/a158a9ffa0c9b169aee53dd4a0c44501a596755b4e4f6ede7746d65a72e2a71f.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/bd6ad8159341315a1764c397621a560809f7eb7219ab5174c801fca7e969d943.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/c7bf40bab396765f67d0fed627ed3665890608b2d0edaa3e8cb7cfc96310db45.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/d6c643278a0b28320e6f33a60f5fb64c053997cbdc39a60e53ccc574688ade9e.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/e42b72d4cc1ea412ebecbb8d040dc6c6bfee462c33903c2f1f3facb602ad742e.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/e64b7e8cedcf43d4c95deba60ec1341855c887705805bb62431693118b870c69.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/f5039ddbeb815c51491a1989532006b96fc3ae49c6c60e3c097f875b4ae915ae.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/f72f011744c4649fabddca6f7a9327861ac0a315a89b1a2e62a39774e7863845.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/feabb04b4aa507102da0a64398a40818e878fd1df9b75dda8461b3e1e7ff3f11.json +0 -1
- /xinference/thirdparty/{cosyvoice/bin → f5_tts}/__init__.py +0 -0
- /xinference/thirdparty/{cosyvoice/flow → melo}/__init__.py +0 -0
- /xinference/thirdparty/{cosyvoice/hifigan → melo/text/english_utils}/__init__.py +0 -0
- /xinference/thirdparty/{cosyvoice/llm → melo/text/es_phonemizer}/__init__.py +0 -0
- /xinference/thirdparty/{fish_speech/fish_speech/configs → melo/text/fr_phonemizer}/__init__.py +0 -0
- /xinference/web/ui/build/static/js/{main.2f269bb3.js.LICENSE.txt → main.b0936c54.js.LICENSE.txt} +0 -0
- {xinference-0.16.3.dist-info → xinference-1.2.1.dist-info}/LICENSE +0 -0
- {xinference-0.16.3.dist-info → xinference-1.2.1.dist-info}/entry_points.txt +0 -0
- {xinference-0.16.3.dist-info → xinference-1.2.1.dist-info}/top_level.txt +0 -0
|
@@ -205,8 +205,8 @@
|
|
|
205
205
|
"8-bit",
|
|
206
206
|
"none"
|
|
207
207
|
],
|
|
208
|
-
"model_id": "THUDM/glm-4-9b-chat",
|
|
209
|
-
"model_revision": "
|
|
208
|
+
"model_id": "THUDM/glm-4-9b-chat-hf",
|
|
209
|
+
"model_revision": "c7f73fd9e0f378c87f3c8f2c25aec6ad705043cd"
|
|
210
210
|
},
|
|
211
211
|
{
|
|
212
212
|
"model_format": "ggufv2",
|
|
@@ -269,8 +269,8 @@
|
|
|
269
269
|
"8-bit",
|
|
270
270
|
"none"
|
|
271
271
|
],
|
|
272
|
-
"model_id": "THUDM/glm-4-9b-chat-1m",
|
|
273
|
-
"model_revision": "
|
|
272
|
+
"model_id": "THUDM/glm-4-9b-chat-1m-hf",
|
|
273
|
+
"model_revision": "0588cb62942f0f0a5545c695e5c1b019d64eabdc"
|
|
274
274
|
},
|
|
275
275
|
{
|
|
276
276
|
"model_format": "ggufv2",
|
|
@@ -952,7 +952,7 @@
|
|
|
952
952
|
"model_format": "mlx",
|
|
953
953
|
"model_size_in_billions": 8,
|
|
954
954
|
"quantizations": [
|
|
955
|
-
"
|
|
955
|
+
"4bit"
|
|
956
956
|
],
|
|
957
957
|
"model_id": "mlx-community/Meta-Llama-3-8B-Instruct-4bit"
|
|
958
958
|
},
|
|
@@ -960,7 +960,7 @@
|
|
|
960
960
|
"model_format": "mlx",
|
|
961
961
|
"model_size_in_billions": 8,
|
|
962
962
|
"quantizations": [
|
|
963
|
-
"
|
|
963
|
+
"8bit"
|
|
964
964
|
],
|
|
965
965
|
"model_id": "mlx-community/Meta-Llama-3-8B-Instruct-8bit"
|
|
966
966
|
},
|
|
@@ -976,7 +976,7 @@
|
|
|
976
976
|
"model_format": "mlx",
|
|
977
977
|
"model_size_in_billions": 70,
|
|
978
978
|
"quantizations": [
|
|
979
|
-
"
|
|
979
|
+
"4bit"
|
|
980
980
|
],
|
|
981
981
|
"model_id": "mlx-community/Meta-Llama-3-70B-Instruct-4bit-mlx"
|
|
982
982
|
},
|
|
@@ -984,7 +984,7 @@
|
|
|
984
984
|
"model_format": "mlx",
|
|
985
985
|
"model_size_in_billions": 70,
|
|
986
986
|
"quantizations": [
|
|
987
|
-
"
|
|
987
|
+
"8bit"
|
|
988
988
|
],
|
|
989
989
|
"model_id": "mlx-community/Meta-Llama-3-70B-Instruct-8bit"
|
|
990
990
|
},
|
|
@@ -1229,7 +1229,7 @@
|
|
|
1229
1229
|
"model_format": "mlx",
|
|
1230
1230
|
"model_size_in_billions": 8,
|
|
1231
1231
|
"quantizations": [
|
|
1232
|
-
"
|
|
1232
|
+
"4bit"
|
|
1233
1233
|
],
|
|
1234
1234
|
"model_id": "mlx-community/Meta-Llama-3.1-8B-Instruct-4bit"
|
|
1235
1235
|
},
|
|
@@ -1237,7 +1237,7 @@
|
|
|
1237
1237
|
"model_format": "mlx",
|
|
1238
1238
|
"model_size_in_billions": 8,
|
|
1239
1239
|
"quantizations": [
|
|
1240
|
-
"
|
|
1240
|
+
"8bit"
|
|
1241
1241
|
],
|
|
1242
1242
|
"model_id": "mlx-community/Meta-Llama-3.1-8B-Instruct-8bit"
|
|
1243
1243
|
},
|
|
@@ -1253,7 +1253,7 @@
|
|
|
1253
1253
|
"model_format": "mlx",
|
|
1254
1254
|
"model_size_in_billions": 70,
|
|
1255
1255
|
"quantizations": [
|
|
1256
|
-
"
|
|
1256
|
+
"4bit"
|
|
1257
1257
|
],
|
|
1258
1258
|
"model_id": "mlx-community/Meta-Llama-3.1-70B-Instruct-4bit"
|
|
1259
1259
|
},
|
|
@@ -1261,7 +1261,7 @@
|
|
|
1261
1261
|
"model_format": "mlx",
|
|
1262
1262
|
"model_size_in_billions": 70,
|
|
1263
1263
|
"quantizations": [
|
|
1264
|
-
"
|
|
1264
|
+
"8bit"
|
|
1265
1265
|
],
|
|
1266
1266
|
"model_id": "mlx-community/Meta-Llama-3.1-70B-Instruct-8bit"
|
|
1267
1267
|
},
|
|
@@ -1399,6 +1399,98 @@
|
|
|
1399
1399
|
}
|
|
1400
1400
|
]
|
|
1401
1401
|
},
|
|
1402
|
+
{
|
|
1403
|
+
"version": 1,
|
|
1404
|
+
"context_length": 131072,
|
|
1405
|
+
"model_name": "llama-3.3-instruct",
|
|
1406
|
+
"model_lang": [
|
|
1407
|
+
"en",
|
|
1408
|
+
"de",
|
|
1409
|
+
"fr",
|
|
1410
|
+
"it",
|
|
1411
|
+
"pt",
|
|
1412
|
+
"hi",
|
|
1413
|
+
"es",
|
|
1414
|
+
"th"
|
|
1415
|
+
],
|
|
1416
|
+
"model_ability": [
|
|
1417
|
+
"chat",
|
|
1418
|
+
"tools"
|
|
1419
|
+
],
|
|
1420
|
+
"model_description": "The Llama 3.3 instruction tuned models are optimized for dialogue use cases and outperform many of the available open source chat models on common industry benchmarks..",
|
|
1421
|
+
"model_specs": [
|
|
1422
|
+
{
|
|
1423
|
+
"model_format": "pytorch",
|
|
1424
|
+
"model_size_in_billions": 70,
|
|
1425
|
+
"quantizations": [
|
|
1426
|
+
"none"
|
|
1427
|
+
],
|
|
1428
|
+
"model_id": "meta-llama/Llama-3.3-70B-Instruct"
|
|
1429
|
+
},
|
|
1430
|
+
{
|
|
1431
|
+
"model_format": "gptq",
|
|
1432
|
+
"model_size_in_billions": 70,
|
|
1433
|
+
"quantizations": [
|
|
1434
|
+
"Int4"
|
|
1435
|
+
],
|
|
1436
|
+
"model_id": "shuyuej/Llama-3.3-70B-Instruct-GPTQ"
|
|
1437
|
+
},
|
|
1438
|
+
{
|
|
1439
|
+
"model_format": "awq",
|
|
1440
|
+
"model_size_in_billions": 70,
|
|
1441
|
+
"quantizations": [
|
|
1442
|
+
"Int4"
|
|
1443
|
+
],
|
|
1444
|
+
"model_id": "casperhansen/llama-3.3-70b-instruct-awq"
|
|
1445
|
+
},
|
|
1446
|
+
{
|
|
1447
|
+
"model_format": "mlx",
|
|
1448
|
+
"model_size_in_billions": 70,
|
|
1449
|
+
"quantizations": [
|
|
1450
|
+
"3bit",
|
|
1451
|
+
"4bit",
|
|
1452
|
+
"6bit",
|
|
1453
|
+
"8bit",
|
|
1454
|
+
"fp16"
|
|
1455
|
+
],
|
|
1456
|
+
"model_id": "mlx-community/Llama-3.3-70B-Instruct-{quantization}"
|
|
1457
|
+
},
|
|
1458
|
+
{
|
|
1459
|
+
"model_format": "ggufv2",
|
|
1460
|
+
"model_size_in_billions": 70,
|
|
1461
|
+
"quantizations": [
|
|
1462
|
+
"Q3_K_L",
|
|
1463
|
+
"Q4_K_M",
|
|
1464
|
+
"Q6_K",
|
|
1465
|
+
"Q8_0"
|
|
1466
|
+
],
|
|
1467
|
+
"quantization_parts": {
|
|
1468
|
+
"Q6_K": [
|
|
1469
|
+
"00001-of-00002",
|
|
1470
|
+
"00002-of-00002"
|
|
1471
|
+
],
|
|
1472
|
+
"Q8_0": [
|
|
1473
|
+
"00001-of-00002",
|
|
1474
|
+
"00002-of-00002"
|
|
1475
|
+
]
|
|
1476
|
+
},
|
|
1477
|
+
"model_id": "lmstudio-community/Llama-3.3-70B-Instruct-GGUF",
|
|
1478
|
+
"model_file_name_template": "Llama-3.3-70B-Instruct-{quantization}.gguf",
|
|
1479
|
+
"model_file_name_split_template": "Llama-3.3-70B-Instruct-{quantization}-{part}.gguf"
|
|
1480
|
+
}
|
|
1481
|
+
],
|
|
1482
|
+
"chat_template": "{{- bos_token }}\n{%- if custom_tools is defined %}\n {%- set tools = custom_tools %}\n{%- endif %}\n{%- if not tools_in_user_message is defined %}\n {%- set tools_in_user_message = true %}\n{%- endif %}\n{%- if not date_string is defined %}\n {%- set date_string = \"26 Jul 2024\" %}\n{%- endif %}\n{%- if not tools is defined %}\n {%- set tools = none %}\n{%- endif %}\n\n{#- This block extracts the system message, so we can slot it into the right place. #}\n{%- if messages[0]['role'] == 'system' %}\n {%- set system_message = messages[0]['content']|trim %}\n {%- set messages = messages[1:] %}\n{%- else %}\n {%- set system_message = \"\" %}\n{%- endif %}\n\n{#- System message + builtin tools #}\n{{- \"<|start_header_id|>system<|end_header_id|>\\n\\n\" }}\n{%- if builtin_tools is defined or tools is not none %}\n {{- \"Environment: ipython\\n\" }}\n{%- endif %}\n{%- if builtin_tools is defined %}\n {{- \"Tools: \" + builtin_tools | reject('equalto', 'code_interpreter') | join(\", \") + \"\\n\\n\"}}\n{%- endif %}\n{{- \"Cutting Knowledge Date: December 2023\\n\" }}\n{{- \"Today Date: \" + date_string + \"\\n\\n\" }}\n{%- if tools is not none and not tools_in_user_message %}\n {{- \"You have access to the following functions. To call a function, please respond with JSON for a function call.\" }}\n {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n {{- \"Do not use variables.\\n\\n\" }}\n {%- for t in tools %}\n {{- t | tojson(indent=4) }}\n {{- \"\\n\\n\" }}\n {%- endfor %}\n{%- endif %}\n{{- system_message }}\n{{- \"<|eot_id|>\" }}\n\n{#- Custom tools are passed in a user message with some extra guidance #}\n{%- if tools_in_user_message and not tools is none %}\n {#- Extract the first user message so we can plug it in here #}\n {%- if messages | length != 0 %}\n {%- set first_user_message = messages[0]['content']|trim %}\n {%- set messages = messages[1:] %}\n {%- else %}\n {{- raise_exception(\"Cannot put tools in the first user message when there's no first user message!\") }}\n{%- endif %}\n {{- '<|start_header_id|>user<|end_header_id|>\\n\\n' -}}\n {{- \"Given the following functions, please respond with a JSON for a function call \" }}\n {{- \"with its proper arguments that best answers the given prompt.\\n\\n\" }}\n {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n {{- \"Do not use variables.\\n\\n\" }}\n {%- for t in tools %}\n {{- t | tojson(indent=4) }}\n {{- \"\\n\\n\" }}\n {%- endfor %}\n {{- first_user_message + \"<|eot_id|>\"}}\n{%- endif %}\n\n{%- for message in messages %}\n {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %}\n {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\\n\\n'+ message['content'] | trim + '<|eot_id|>' }}\n {%- elif 'tool_calls' in message %}\n {%- if not message.tool_calls|length == 1 %}\n {{- raise_exception(\"This model only supports single tool-calls at once!\") }}\n {%- endif %}\n {%- set tool_call = message.tool_calls[0].function %}\n {%- if builtin_tools is defined and tool_call.name in builtin_tools %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' -}}\n {{- \"<|python_tag|>\" + tool_call.name + \".call(\" }}\n {%- for arg_name, arg_val in tool_call.arguments | items %}\n {{- arg_name + '=\"' + arg_val + '\"' }}\n {%- if not loop.last %}\n {{- \", \" }}\n {%- endif %}\n {%- endfor %}\n {{- \")\" }}\n {%- else %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' -}}\n {{- '{\"name\": \"' + tool_call.name + '\", ' }}\n {{- '\"parameters\": ' }}\n {{- tool_call.arguments | tojson }}\n {{- \"}\" }}\n {%- endif %}\n {%- if builtin_tools is defined %}\n {#- This means we're in ipython mode #}\n {{- \"<|eom_id|>\" }}\n {%- else %}\n {{- \"<|eot_id|>\" }}\n {%- endif %}\n {%- elif message.role == \"tool\" or message.role == \"ipython\" %}\n {{- \"<|start_header_id|>ipython<|end_header_id|>\\n\\n\" }}\n {%- if message.content is mapping or message.content is iterable %}\n {{- message.content | tojson }}\n {%- else %}\n {{- message.content }}\n {%- endif %}\n {{- \"<|eot_id|>\" }}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' }}\n{%- endif %}\n",
|
|
1483
|
+
"stop_token_ids": [
|
|
1484
|
+
128001,
|
|
1485
|
+
128008,
|
|
1486
|
+
128009
|
|
1487
|
+
],
|
|
1488
|
+
"stop": [
|
|
1489
|
+
"<|end_of_text|>",
|
|
1490
|
+
"<|eot_id|>",
|
|
1491
|
+
"<|eom_id|>"
|
|
1492
|
+
]
|
|
1493
|
+
},
|
|
1402
1494
|
{
|
|
1403
1495
|
"version": 1,
|
|
1404
1496
|
"context_length": 2048,
|
|
@@ -2199,7 +2291,7 @@
|
|
|
2199
2291
|
"model_format": "mlx",
|
|
2200
2292
|
"model_size_in_billions": "0_5",
|
|
2201
2293
|
"quantizations": [
|
|
2202
|
-
"
|
|
2294
|
+
"4bit"
|
|
2203
2295
|
],
|
|
2204
2296
|
"model_id": "Qwen/Qwen2-0.5B-Instruct-MLX"
|
|
2205
2297
|
},
|
|
@@ -2207,7 +2299,7 @@
|
|
|
2207
2299
|
"model_format": "mlx",
|
|
2208
2300
|
"model_size_in_billions": "1_5",
|
|
2209
2301
|
"quantizations": [
|
|
2210
|
-
"
|
|
2302
|
+
"4bit"
|
|
2211
2303
|
],
|
|
2212
2304
|
"model_id": "Qwen/Qwen2-1.5B-Instruct-MLX"
|
|
2213
2305
|
},
|
|
@@ -2215,7 +2307,7 @@
|
|
|
2215
2307
|
"model_format": "mlx",
|
|
2216
2308
|
"model_size_in_billions": 7,
|
|
2217
2309
|
"quantizations": [
|
|
2218
|
-
"
|
|
2310
|
+
"4bit"
|
|
2219
2311
|
],
|
|
2220
2312
|
"model_id": "Qwen/Qwen2-7B-Instruct-MLX"
|
|
2221
2313
|
},
|
|
@@ -2223,7 +2315,7 @@
|
|
|
2223
2315
|
"model_format": "mlx",
|
|
2224
2316
|
"model_size_in_billions": 72,
|
|
2225
2317
|
"quantizations": [
|
|
2226
|
-
"
|
|
2318
|
+
"4bit"
|
|
2227
2319
|
],
|
|
2228
2320
|
"model_id": "mlx-community/Qwen2-72B-Instruct-4bit"
|
|
2229
2321
|
},
|
|
@@ -3222,7 +3314,7 @@
|
|
|
3222
3314
|
"model_format": "mlx",
|
|
3223
3315
|
"model_size_in_billions": 12,
|
|
3224
3316
|
"quantizations": [
|
|
3225
|
-
"
|
|
3317
|
+
"4bit"
|
|
3226
3318
|
],
|
|
3227
3319
|
"model_id": "mlx-community/Mistral-Nemo-Instruct-2407-4bit"
|
|
3228
3320
|
},
|
|
@@ -3230,7 +3322,7 @@
|
|
|
3230
3322
|
"model_format": "mlx",
|
|
3231
3323
|
"model_size_in_billions": 12,
|
|
3232
3324
|
"quantizations": [
|
|
3233
|
-
"
|
|
3325
|
+
"8bit"
|
|
3234
3326
|
],
|
|
3235
3327
|
"model_id": "mlx-community/Mistral-Nemo-Instruct-2407-8bit"
|
|
3236
3328
|
}
|
|
@@ -3370,7 +3462,7 @@
|
|
|
3370
3462
|
"model_format": "mlx",
|
|
3371
3463
|
"model_size_in_billions": 123,
|
|
3372
3464
|
"quantizations": [
|
|
3373
|
-
"
|
|
3465
|
+
"4bit"
|
|
3374
3466
|
],
|
|
3375
3467
|
"model_id": "mlx-community/Mistral-Large-Instruct-2407-4bit"
|
|
3376
3468
|
},
|
|
@@ -3378,7 +3470,7 @@
|
|
|
3378
3470
|
"model_format": "mlx",
|
|
3379
3471
|
"model_size_in_billions": 123,
|
|
3380
3472
|
"quantizations": [
|
|
3381
|
-
"
|
|
3473
|
+
"8bit"
|
|
3382
3474
|
],
|
|
3383
3475
|
"model_id": "mlx-community/Mistral-Large-Instruct-2407-8bit"
|
|
3384
3476
|
}
|
|
@@ -3411,8 +3503,8 @@
|
|
|
3411
3503
|
"8-bit",
|
|
3412
3504
|
"none"
|
|
3413
3505
|
],
|
|
3414
|
-
"model_id": "mistralai/
|
|
3415
|
-
"model_revision": "
|
|
3506
|
+
"model_id": "mistralai/Codestral-22B-v0.1",
|
|
3507
|
+
"model_revision": "8f5fe23af91885222a1563283c87416745a5e212"
|
|
3416
3508
|
},
|
|
3417
3509
|
{
|
|
3418
3510
|
"model_format": "ggufv2",
|
|
@@ -3436,7 +3528,7 @@
|
|
|
3436
3528
|
"model_format": "mlx",
|
|
3437
3529
|
"model_size_in_billions": 22,
|
|
3438
3530
|
"quantizations": [
|
|
3439
|
-
"
|
|
3531
|
+
"4bit"
|
|
3440
3532
|
],
|
|
3441
3533
|
"model_id": "mlx-community/Codestral-22B-v0.1-4bit",
|
|
3442
3534
|
"model_revision": "544626b38eb1c9524f0fa570ec7b29550c26b78d"
|
|
@@ -3445,7 +3537,7 @@
|
|
|
3445
3537
|
"model_format": "mlx",
|
|
3446
3538
|
"model_size_in_billions": 22,
|
|
3447
3539
|
"quantizations": [
|
|
3448
|
-
"
|
|
3540
|
+
"8bit"
|
|
3449
3541
|
],
|
|
3450
3542
|
"model_id": "mlx-community/Codestral-22B-v0.1-8bit",
|
|
3451
3543
|
"model_revision": "0399a53970663950d57010e61a2796af524a1588"
|
|
@@ -4170,7 +4262,7 @@
|
|
|
4170
4262
|
"model_format": "mlx",
|
|
4171
4263
|
"model_size_in_billions": 6,
|
|
4172
4264
|
"quantizations": [
|
|
4173
|
-
"
|
|
4265
|
+
"4bit"
|
|
4174
4266
|
],
|
|
4175
4267
|
"model_id": "mlx-community/Yi-1.5-6B-Chat-4bit",
|
|
4176
4268
|
"model_revision": "0177c9a12b869d6bc73f772b5a1981a7c966adb6"
|
|
@@ -4179,7 +4271,7 @@
|
|
|
4179
4271
|
"model_format": "mlx",
|
|
4180
4272
|
"model_size_in_billions": 6,
|
|
4181
4273
|
"quantizations": [
|
|
4182
|
-
"
|
|
4274
|
+
"8bit"
|
|
4183
4275
|
],
|
|
4184
4276
|
"model_id": "mlx-community/Yi-1.5-6B-Chat-8bit",
|
|
4185
4277
|
"model_revision": "7756e65d1bf1e2e6e97aef6bc9484307225f536b"
|
|
@@ -4188,7 +4280,7 @@
|
|
|
4188
4280
|
"model_format": "mlx",
|
|
4189
4281
|
"model_size_in_billions": 9,
|
|
4190
4282
|
"quantizations": [
|
|
4191
|
-
"
|
|
4283
|
+
"4bit"
|
|
4192
4284
|
],
|
|
4193
4285
|
"model_id": "mlx-community/Yi-1.5-9B-Chat-4bit",
|
|
4194
4286
|
"model_revision": "e15f886479c44e7d90f0ac13ace69b2319b71c2f"
|
|
@@ -4197,7 +4289,7 @@
|
|
|
4197
4289
|
"model_format": "mlx",
|
|
4198
4290
|
"model_size_in_billions": 9,
|
|
4199
4291
|
"quantizations": [
|
|
4200
|
-
"
|
|
4292
|
+
"8bit"
|
|
4201
4293
|
],
|
|
4202
4294
|
"model_id": "mlx-community/Yi-1.5-9B-Chat-8bit",
|
|
4203
4295
|
"model_revision": "c1f742fcf3683edbe2d2c2fd1ad7ac2bb6c5ca36"
|
|
@@ -4206,7 +4298,7 @@
|
|
|
4206
4298
|
"model_format": "mlx",
|
|
4207
4299
|
"model_size_in_billions": 34,
|
|
4208
4300
|
"quantizations": [
|
|
4209
|
-
"
|
|
4301
|
+
"4bit"
|
|
4210
4302
|
],
|
|
4211
4303
|
"model_id": "mlx-community/Yi-1.5-34B-Chat-4bit",
|
|
4212
4304
|
"model_revision": "945e3b306ef37c46ab444fdc857d1f3ea7247374"
|
|
@@ -4215,7 +4307,7 @@
|
|
|
4215
4307
|
"model_format": "mlx",
|
|
4216
4308
|
"model_size_in_billions": 34,
|
|
4217
4309
|
"quantizations": [
|
|
4218
|
-
"
|
|
4310
|
+
"8bit"
|
|
4219
4311
|
],
|
|
4220
4312
|
"model_id": "mlx-community/Yi-1.5-34B-Chat-8bit",
|
|
4221
4313
|
"model_revision": "3c12761a2c6663f216caab6dff84b0dd29b472ac"
|
|
@@ -5266,7 +5358,7 @@
|
|
|
5266
5358
|
"model_format": "mlx",
|
|
5267
5359
|
"model_size_in_billions": 7,
|
|
5268
5360
|
"quantizations": [
|
|
5269
|
-
"
|
|
5361
|
+
"4bit"
|
|
5270
5362
|
],
|
|
5271
5363
|
"model_id": "mlx-community/internlm2_5-7b-chat-4bit",
|
|
5272
5364
|
"model_revision": "d12097a867721978142a6048399f470a3d18beee"
|
|
@@ -5275,7 +5367,7 @@
|
|
|
5275
5367
|
"model_format": "mlx",
|
|
5276
5368
|
"model_size_in_billions": 7,
|
|
5277
5369
|
"quantizations": [
|
|
5278
|
-
"
|
|
5370
|
+
"8bit"
|
|
5279
5371
|
],
|
|
5280
5372
|
"model_id": "mlx-community/internlm2_5-7b-chat-8bit",
|
|
5281
5373
|
"model_revision": "0ec94d61d30ab161b49c69f9bf92ec2b9986d234"
|
|
@@ -5803,7 +5895,7 @@
|
|
|
5803
5895
|
"model_format": "mlx",
|
|
5804
5896
|
"model_size_in_billions": 2,
|
|
5805
5897
|
"quantizations": [
|
|
5806
|
-
"
|
|
5898
|
+
"4bit"
|
|
5807
5899
|
],
|
|
5808
5900
|
"model_id": "mlx-community/gemma-2-2b-it-4bit"
|
|
5809
5901
|
},
|
|
@@ -5811,7 +5903,7 @@
|
|
|
5811
5903
|
"model_format": "mlx",
|
|
5812
5904
|
"model_size_in_billions": 2,
|
|
5813
5905
|
"quantizations": [
|
|
5814
|
-
"
|
|
5906
|
+
"8bit"
|
|
5815
5907
|
],
|
|
5816
5908
|
"model_id": "mlx-community/gemma-2-2b-it-8bit"
|
|
5817
5909
|
},
|
|
@@ -5827,7 +5919,7 @@
|
|
|
5827
5919
|
"model_format": "mlx",
|
|
5828
5920
|
"model_size_in_billions": 9,
|
|
5829
5921
|
"quantizations": [
|
|
5830
|
-
"
|
|
5922
|
+
"4bit"
|
|
5831
5923
|
],
|
|
5832
5924
|
"model_id": "mlx-community/gemma-2-9b-it-4bit"
|
|
5833
5925
|
},
|
|
@@ -5835,7 +5927,7 @@
|
|
|
5835
5927
|
"model_format": "mlx",
|
|
5836
5928
|
"model_size_in_billions": 9,
|
|
5837
5929
|
"quantizations": [
|
|
5838
|
-
"
|
|
5930
|
+
"8bit"
|
|
5839
5931
|
],
|
|
5840
5932
|
"model_id": "mlx-community/gemma-2-9b-it-8bit"
|
|
5841
5933
|
},
|
|
@@ -5851,7 +5943,7 @@
|
|
|
5851
5943
|
"model_format": "mlx",
|
|
5852
5944
|
"model_size_in_billions": 27,
|
|
5853
5945
|
"quantizations": [
|
|
5854
|
-
"
|
|
5946
|
+
"4bit"
|
|
5855
5947
|
],
|
|
5856
5948
|
"model_id": "mlx-community/gemma-2-27b-it-4bit"
|
|
5857
5949
|
},
|
|
@@ -5859,7 +5951,7 @@
|
|
|
5859
5951
|
"model_format": "mlx",
|
|
5860
5952
|
"model_size_in_billions": 27,
|
|
5861
5953
|
"quantizations": [
|
|
5862
|
-
"
|
|
5954
|
+
"8bit"
|
|
5863
5955
|
],
|
|
5864
5956
|
"model_id": "mlx-community/gemma-2-27b-it-8bit"
|
|
5865
5957
|
},
|
|
@@ -6925,7 +7017,7 @@
|
|
|
6925
7017
|
"model_id":"Qwen/Qwen2-VL-2B-Instruct-GPTQ-Int4",
|
|
6926
7018
|
"model_revision":"800d396518c82960ce6d231adecd07bbc474f0a9"
|
|
6927
7019
|
},
|
|
6928
|
-
|
|
7020
|
+
{
|
|
6929
7021
|
"model_format":"awq",
|
|
6930
7022
|
"model_size_in_billions":2,
|
|
6931
7023
|
"quantizations":[
|
|
@@ -6934,6 +7026,15 @@
|
|
|
6934
7026
|
"model_id":"Qwen/Qwen2-VL-2B-Instruct-AWQ",
|
|
6935
7027
|
"model_revision":"ea8c5854c0044e28626719292de0d9b1a671f6fc"
|
|
6936
7028
|
},
|
|
7029
|
+
{
|
|
7030
|
+
"model_format":"mlx",
|
|
7031
|
+
"model_size_in_billions":2,
|
|
7032
|
+
"quantizations":[
|
|
7033
|
+
"4bit",
|
|
7034
|
+
"8bit"
|
|
7035
|
+
],
|
|
7036
|
+
"model_id":"mlx-community/Qwen2-VL-2B-Instruct-{quantization}"
|
|
7037
|
+
},
|
|
6937
7038
|
{
|
|
6938
7039
|
"model_format":"pytorch",
|
|
6939
7040
|
"model_size_in_billions":7,
|
|
@@ -6970,6 +7071,15 @@
|
|
|
6970
7071
|
"model_id":"Qwen/Qwen2-VL-7B-Instruct-AWQ",
|
|
6971
7072
|
"model_revision":"f94216e8b513933bccd567bcd9b7350199f32538"
|
|
6972
7073
|
},
|
|
7074
|
+
{
|
|
7075
|
+
"model_format":"mlx",
|
|
7076
|
+
"model_size_in_billions":7,
|
|
7077
|
+
"quantizations":[
|
|
7078
|
+
"4bit",
|
|
7079
|
+
"8bit"
|
|
7080
|
+
],
|
|
7081
|
+
"model_id":"mlx-community/Qwen2-VL-7B-Instruct-{quantization}"
|
|
7082
|
+
},
|
|
6973
7083
|
{
|
|
6974
7084
|
"model_format":"pytorch",
|
|
6975
7085
|
"model_size_in_billions":72,
|
|
@@ -6994,6 +7104,15 @@
|
|
|
6994
7104
|
"Int8"
|
|
6995
7105
|
],
|
|
6996
7106
|
"model_id":"Qwen/Qwen2-VL-72B-Instruct-GPTQ-{quantization}"
|
|
7107
|
+
},
|
|
7108
|
+
{
|
|
7109
|
+
"model_format":"mlx",
|
|
7110
|
+
"model_size_in_billions":72,
|
|
7111
|
+
"quantizations":[
|
|
7112
|
+
"4bit",
|
|
7113
|
+
"8bit"
|
|
7114
|
+
],
|
|
7115
|
+
"model_id":"mlx-community/Qwen2-VL-72B-Instruct-{quantization}"
|
|
6997
7116
|
}
|
|
6998
7117
|
],
|
|
6999
7118
|
"chat_template": "{% set image_count = namespace(value=0) %}{% set video_count = namespace(value=0) %}{% for message in messages %}{% if loop.first and message['role'] != 'system' %}<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n{% endif %}<|im_start|>{{ message['role'] }}\n{% if message['content'] is string %}{{ message['content'] }}<|im_end|>\n{% else %}{% for content in message['content'] %}{% if content['type'] == 'image' or 'image' in content or 'image_url' in content %}{% set image_count.value = image_count.value + 1 %}{% if add_vision_id %}Picture {{ image_count.value }}: {% endif %}<|vision_start|><|image_pad|><|vision_end|>{% elif content['type'] == 'video' or 'video' in content %}{% set video_count.value = video_count.value + 1 %}{% if add_vision_id %}Video {{ video_count.value }}: {% endif %}<|vision_start|><|video_pad|><|vision_end|>{% elif 'text' in content %}{{ content['text'] }}{% endif %}{% endfor %}<|im_end|>\n{% endif %}{% endfor %}{% if add_generation_prompt %}<|im_start|>assistant\n{% endif %}",
|
|
@@ -8015,7 +8134,7 @@
|
|
|
8015
8134
|
"model_format": "mlx",
|
|
8016
8135
|
"model_size_in_billions": "0_5",
|
|
8017
8136
|
"quantizations": [
|
|
8018
|
-
"
|
|
8137
|
+
"4bit"
|
|
8019
8138
|
],
|
|
8020
8139
|
"model_id": "mlx-community/Qwen2.5-0.5B-Instruct-4bit"
|
|
8021
8140
|
},
|
|
@@ -8023,7 +8142,7 @@
|
|
|
8023
8142
|
"model_format": "mlx",
|
|
8024
8143
|
"model_size_in_billions": "0_5",
|
|
8025
8144
|
"quantizations": [
|
|
8026
|
-
"
|
|
8145
|
+
"8bit"
|
|
8027
8146
|
],
|
|
8028
8147
|
"model_id": "mlx-community/Qwen2.5-0.5B-Instruct-8bit"
|
|
8029
8148
|
},
|
|
@@ -8039,7 +8158,7 @@
|
|
|
8039
8158
|
"model_format": "mlx",
|
|
8040
8159
|
"model_size_in_billions": "1_5",
|
|
8041
8160
|
"quantizations": [
|
|
8042
|
-
"
|
|
8161
|
+
"4bit"
|
|
8043
8162
|
],
|
|
8044
8163
|
"model_id": "mlx-community/Qwen2.5-1.5B-Instruct-4bit"
|
|
8045
8164
|
},
|
|
@@ -8047,7 +8166,7 @@
|
|
|
8047
8166
|
"model_format": "mlx",
|
|
8048
8167
|
"model_size_in_billions": "1_5",
|
|
8049
8168
|
"quantizations": [
|
|
8050
|
-
"
|
|
8169
|
+
"8bit"
|
|
8051
8170
|
],
|
|
8052
8171
|
"model_id": "mlx-community/Qwen2.5-1.5B-Instruct-8bit"
|
|
8053
8172
|
},
|
|
@@ -8063,7 +8182,7 @@
|
|
|
8063
8182
|
"model_format": "mlx",
|
|
8064
8183
|
"model_size_in_billions": 3,
|
|
8065
8184
|
"quantizations": [
|
|
8066
|
-
"
|
|
8185
|
+
"4bit"
|
|
8067
8186
|
],
|
|
8068
8187
|
"model_id": "mlx-community/Qwen2.5-3B-Instruct-4bit"
|
|
8069
8188
|
},
|
|
@@ -8071,7 +8190,7 @@
|
|
|
8071
8190
|
"model_format": "mlx",
|
|
8072
8191
|
"model_size_in_billions": 3,
|
|
8073
8192
|
"quantizations": [
|
|
8074
|
-
"
|
|
8193
|
+
"8bit"
|
|
8075
8194
|
],
|
|
8076
8195
|
"model_id": "mlx-community/Qwen2.5-3B-Instruct-8bit"
|
|
8077
8196
|
},
|
|
@@ -8087,7 +8206,7 @@
|
|
|
8087
8206
|
"model_format": "mlx",
|
|
8088
8207
|
"model_size_in_billions": 7,
|
|
8089
8208
|
"quantizations": [
|
|
8090
|
-
"
|
|
8209
|
+
"4bit"
|
|
8091
8210
|
],
|
|
8092
8211
|
"model_id": "mlx-community/Qwen2.5-7B-Instruct-4bit"
|
|
8093
8212
|
},
|
|
@@ -8095,7 +8214,7 @@
|
|
|
8095
8214
|
"model_format": "mlx",
|
|
8096
8215
|
"model_size_in_billions": 7,
|
|
8097
8216
|
"quantizations": [
|
|
8098
|
-
"
|
|
8217
|
+
"8bit"
|
|
8099
8218
|
],
|
|
8100
8219
|
"model_id": "mlx-community/Qwen2.5-7B-Instruct-8bit"
|
|
8101
8220
|
},
|
|
@@ -8111,7 +8230,7 @@
|
|
|
8111
8230
|
"model_format": "mlx",
|
|
8112
8231
|
"model_size_in_billions": 14,
|
|
8113
8232
|
"quantizations": [
|
|
8114
|
-
"
|
|
8233
|
+
"4bit"
|
|
8115
8234
|
],
|
|
8116
8235
|
"model_id": "mlx-community/Qwen2.5-14B-Instruct-4bit"
|
|
8117
8236
|
},
|
|
@@ -8119,7 +8238,7 @@
|
|
|
8119
8238
|
"model_format": "mlx",
|
|
8120
8239
|
"model_size_in_billions": 14,
|
|
8121
8240
|
"quantizations": [
|
|
8122
|
-
"
|
|
8241
|
+
"8bit"
|
|
8123
8242
|
],
|
|
8124
8243
|
"model_id": "mlx-community/Qwen2.5-14B-Instruct-8bit"
|
|
8125
8244
|
},
|
|
@@ -8135,7 +8254,7 @@
|
|
|
8135
8254
|
"model_format": "mlx",
|
|
8136
8255
|
"model_size_in_billions": 32,
|
|
8137
8256
|
"quantizations": [
|
|
8138
|
-
"
|
|
8257
|
+
"4bit"
|
|
8139
8258
|
],
|
|
8140
8259
|
"model_id": "mlx-community/Qwen2.5-32B-Instruct-4bit"
|
|
8141
8260
|
},
|
|
@@ -8143,7 +8262,7 @@
|
|
|
8143
8262
|
"model_format": "mlx",
|
|
8144
8263
|
"model_size_in_billions": 32,
|
|
8145
8264
|
"quantizations": [
|
|
8146
|
-
"
|
|
8265
|
+
"8bit"
|
|
8147
8266
|
],
|
|
8148
8267
|
"model_id": "mlx-community/Qwen2.5-32B-Instruct-8bit"
|
|
8149
8268
|
},
|
|
@@ -8159,7 +8278,7 @@
|
|
|
8159
8278
|
"model_format": "mlx",
|
|
8160
8279
|
"model_size_in_billions": 72,
|
|
8161
8280
|
"quantizations": [
|
|
8162
|
-
"
|
|
8281
|
+
"4bit"
|
|
8163
8282
|
],
|
|
8164
8283
|
"model_id": "mlx-community/Qwen2.5-72B-Instruct-4bit"
|
|
8165
8284
|
},
|
|
@@ -8167,7 +8286,7 @@
|
|
|
8167
8286
|
"model_format": "mlx",
|
|
8168
8287
|
"model_size_in_billions": 72,
|
|
8169
8288
|
"quantizations": [
|
|
8170
|
-
"
|
|
8289
|
+
"8bit"
|
|
8171
8290
|
],
|
|
8172
8291
|
"model_id": "mlx-community/Qwen2.5-72B-Instruct-8bit"
|
|
8173
8292
|
},
|
|
@@ -8205,6 +8324,16 @@
|
|
|
8205
8324
|
],
|
|
8206
8325
|
"model_description": "Qwen2.5-Coder is the latest series of Code-Specific Qwen large language models (formerly known as CodeQwen).",
|
|
8207
8326
|
"model_specs": [
|
|
8327
|
+
{
|
|
8328
|
+
"model_format": "pytorch",
|
|
8329
|
+
"model_size_in_billions": "0_5",
|
|
8330
|
+
"quantizations": [
|
|
8331
|
+
"4-bit",
|
|
8332
|
+
"8-bit",
|
|
8333
|
+
"none"
|
|
8334
|
+
],
|
|
8335
|
+
"model_id": "Qwen/Qwen2.5-Coder-0.5B"
|
|
8336
|
+
},
|
|
8208
8337
|
{
|
|
8209
8338
|
"model_format": "pytorch",
|
|
8210
8339
|
"model_size_in_billions": "1_5",
|
|
@@ -8213,8 +8342,17 @@
|
|
|
8213
8342
|
"8-bit",
|
|
8214
8343
|
"none"
|
|
8215
8344
|
],
|
|
8216
|
-
"model_id": "Qwen/Qwen2.5-Coder-1.5B"
|
|
8217
|
-
|
|
8345
|
+
"model_id": "Qwen/Qwen2.5-Coder-1.5B"
|
|
8346
|
+
},
|
|
8347
|
+
{
|
|
8348
|
+
"model_format": "pytorch",
|
|
8349
|
+
"model_size_in_billions": "3",
|
|
8350
|
+
"quantizations": [
|
|
8351
|
+
"4-bit",
|
|
8352
|
+
"8-bit",
|
|
8353
|
+
"none"
|
|
8354
|
+
],
|
|
8355
|
+
"model_id": "Qwen/Qwen2.5-Coder-3B"
|
|
8218
8356
|
},
|
|
8219
8357
|
{
|
|
8220
8358
|
"model_format": "pytorch",
|
|
@@ -8224,8 +8362,27 @@
|
|
|
8224
8362
|
"8-bit",
|
|
8225
8363
|
"none"
|
|
8226
8364
|
],
|
|
8227
|
-
"model_id": "Qwen/Qwen2.5-Coder-7B"
|
|
8228
|
-
|
|
8365
|
+
"model_id": "Qwen/Qwen2.5-Coder-7B"
|
|
8366
|
+
},
|
|
8367
|
+
{
|
|
8368
|
+
"model_format": "pytorch",
|
|
8369
|
+
"model_size_in_billions": 14,
|
|
8370
|
+
"quantizations": [
|
|
8371
|
+
"4-bit",
|
|
8372
|
+
"8-bit",
|
|
8373
|
+
"none"
|
|
8374
|
+
],
|
|
8375
|
+
"model_id": "Qwen/Qwen2.5-Coder-14B"
|
|
8376
|
+
},
|
|
8377
|
+
{
|
|
8378
|
+
"model_format": "pytorch",
|
|
8379
|
+
"model_size_in_billions": 32,
|
|
8380
|
+
"quantizations": [
|
|
8381
|
+
"4-bit",
|
|
8382
|
+
"8-bit",
|
|
8383
|
+
"none"
|
|
8384
|
+
],
|
|
8385
|
+
"model_id": "Qwen/Qwen2.5-Coder-32B"
|
|
8229
8386
|
}
|
|
8230
8387
|
]
|
|
8231
8388
|
},
|
|
@@ -8243,6 +8400,16 @@
|
|
|
8243
8400
|
],
|
|
8244
8401
|
"model_description": "Qwen2.5-Coder is the latest series of Code-Specific Qwen large language models (formerly known as CodeQwen).",
|
|
8245
8402
|
"model_specs": [
|
|
8403
|
+
{
|
|
8404
|
+
"model_format": "pytorch",
|
|
8405
|
+
"model_size_in_billions": "0_5",
|
|
8406
|
+
"quantizations": [
|
|
8407
|
+
"4-bit",
|
|
8408
|
+
"8-bit",
|
|
8409
|
+
"none"
|
|
8410
|
+
],
|
|
8411
|
+
"model_id": "Qwen/Qwen2.5-Coder-0.5B-Instruct"
|
|
8412
|
+
},
|
|
8246
8413
|
{
|
|
8247
8414
|
"model_format": "pytorch",
|
|
8248
8415
|
"model_size_in_billions": "1_5",
|
|
@@ -8253,6 +8420,16 @@
|
|
|
8253
8420
|
],
|
|
8254
8421
|
"model_id": "Qwen/Qwen2.5-Coder-1.5B-Instruct"
|
|
8255
8422
|
},
|
|
8423
|
+
{
|
|
8424
|
+
"model_format": "pytorch",
|
|
8425
|
+
"model_size_in_billions": "3",
|
|
8426
|
+
"quantizations": [
|
|
8427
|
+
"4-bit",
|
|
8428
|
+
"8-bit",
|
|
8429
|
+
"none"
|
|
8430
|
+
],
|
|
8431
|
+
"model_id": "Qwen/Qwen2.5-Coder-3B-Instruct"
|
|
8432
|
+
},
|
|
8256
8433
|
{
|
|
8257
8434
|
"model_format": "pytorch",
|
|
8258
8435
|
"model_size_in_billions": 7,
|
|
@@ -8263,57 +8440,171 @@
|
|
|
8263
8440
|
],
|
|
8264
8441
|
"model_id": "Qwen/Qwen2.5-Coder-7B-Instruct"
|
|
8265
8442
|
},
|
|
8443
|
+
{
|
|
8444
|
+
"model_format": "pytorch",
|
|
8445
|
+
"model_size_in_billions": 14,
|
|
8446
|
+
"quantizations": [
|
|
8447
|
+
"4-bit",
|
|
8448
|
+
"8-bit",
|
|
8449
|
+
"none"
|
|
8450
|
+
],
|
|
8451
|
+
"model_id": "Qwen/Qwen2.5-Coder-14B-Instruct"
|
|
8452
|
+
},
|
|
8453
|
+
{
|
|
8454
|
+
"model_format": "pytorch",
|
|
8455
|
+
"model_size_in_billions": 32,
|
|
8456
|
+
"quantizations": [
|
|
8457
|
+
"4-bit",
|
|
8458
|
+
"8-bit",
|
|
8459
|
+
"none"
|
|
8460
|
+
],
|
|
8461
|
+
"model_id": "Qwen/Qwen2.5-Coder-32B-Instruct"
|
|
8462
|
+
},
|
|
8266
8463
|
{
|
|
8267
8464
|
"model_format": "gptq",
|
|
8268
|
-
"model_size_in_billions": "
|
|
8465
|
+
"model_size_in_billions": "0_5",
|
|
8269
8466
|
"quantizations": [
|
|
8270
8467
|
"Int4",
|
|
8271
8468
|
"Int8"
|
|
8272
8469
|
],
|
|
8273
|
-
"model_id": "Qwen/Qwen2.5-Coder-
|
|
8470
|
+
"model_id": "Qwen/Qwen2.5-Coder-0.5B-Instruct-GPTQ-{quantization}"
|
|
8274
8471
|
},
|
|
8275
8472
|
{
|
|
8276
|
-
"model_format": "
|
|
8473
|
+
"model_format": "gptq",
|
|
8277
8474
|
"model_size_in_billions": "1_5",
|
|
8278
8475
|
"quantizations": [
|
|
8279
|
-
|
|
8280
|
-
|
|
8281
|
-
"q4_0",
|
|
8282
|
-
"q4_k_m",
|
|
8283
|
-
"q5_0",
|
|
8284
|
-
"q5_k_m",
|
|
8285
|
-
"q6_k",
|
|
8286
|
-
"q8_0"
|
|
8476
|
+
"Int4",
|
|
8477
|
+
"Int8"
|
|
8287
8478
|
],
|
|
8288
|
-
"model_id": "Qwen/Qwen2.5-Coder-1.5B-Instruct-
|
|
8289
|
-
"model_file_name_template": "qwen2.5-coder-1.5b-instruct-{quantization}.gguf"
|
|
8479
|
+
"model_id": "Qwen/Qwen2.5-Coder-1.5B-Instruct-GPTQ-{quantization}"
|
|
8290
8480
|
},
|
|
8291
8481
|
{
|
|
8292
|
-
"model_format": "
|
|
8293
|
-
"model_size_in_billions":
|
|
8482
|
+
"model_format": "gptq",
|
|
8483
|
+
"model_size_in_billions": "3",
|
|
8294
8484
|
"quantizations": [
|
|
8295
|
-
|
|
8296
|
-
|
|
8297
|
-
"q4_0",
|
|
8298
|
-
"q4_k_m",
|
|
8299
|
-
"q5_0",
|
|
8300
|
-
"q5_k_m",
|
|
8301
|
-
"q6_k",
|
|
8302
|
-
"q8_0"
|
|
8485
|
+
"Int4",
|
|
8486
|
+
"Int8"
|
|
8303
8487
|
],
|
|
8304
|
-
"model_id": "Qwen/Qwen2.5-Coder-
|
|
8305
|
-
|
|
8306
|
-
|
|
8307
|
-
"
|
|
8308
|
-
|
|
8309
|
-
|
|
8310
|
-
"
|
|
8311
|
-
|
|
8312
|
-
|
|
8313
|
-
|
|
8314
|
-
|
|
8315
|
-
|
|
8316
|
-
|
|
8488
|
+
"model_id": "Qwen/Qwen2.5-Coder-3B-Instruct-GPTQ-{quantization}"
|
|
8489
|
+
},
|
|
8490
|
+
{
|
|
8491
|
+
"model_format": "gptq",
|
|
8492
|
+
"model_size_in_billions": "7",
|
|
8493
|
+
"quantizations": [
|
|
8494
|
+
"Int4",
|
|
8495
|
+
"Int8"
|
|
8496
|
+
],
|
|
8497
|
+
"model_id": "Qwen/Qwen2.5-Coder-7B-Instruct-GPTQ-{quantization}"
|
|
8498
|
+
},
|
|
8499
|
+
{
|
|
8500
|
+
"model_format": "gptq",
|
|
8501
|
+
"model_size_in_billions": "14",
|
|
8502
|
+
"quantizations": [
|
|
8503
|
+
"Int4",
|
|
8504
|
+
"Int8"
|
|
8505
|
+
],
|
|
8506
|
+
"model_id": "Qwen/Qwen2.5-Coder-14B-Instruct-GPTQ-{quantization}"
|
|
8507
|
+
},
|
|
8508
|
+
{
|
|
8509
|
+
"model_format": "gptq",
|
|
8510
|
+
"model_size_in_billions": "32",
|
|
8511
|
+
"quantizations": [
|
|
8512
|
+
"Int4",
|
|
8513
|
+
"Int8"
|
|
8514
|
+
],
|
|
8515
|
+
"model_id": "Qwen/Qwen2.5-Coder-32B-Instruct-GPTQ-{quantization}"
|
|
8516
|
+
},
|
|
8517
|
+
{
|
|
8518
|
+
"model_format": "awq",
|
|
8519
|
+
"model_size_in_billions": "0_5",
|
|
8520
|
+
"quantizations": [
|
|
8521
|
+
"Int4"
|
|
8522
|
+
],
|
|
8523
|
+
"model_id": "Qwen/Qwen2.5-Coder-0.5B-Instruct-AWQ"
|
|
8524
|
+
},
|
|
8525
|
+
{
|
|
8526
|
+
"model_format": "awq",
|
|
8527
|
+
"model_size_in_billions": "1_5",
|
|
8528
|
+
"quantizations": [
|
|
8529
|
+
"Int4"
|
|
8530
|
+
],
|
|
8531
|
+
"model_id": "Qwen/Qwen2.5-Coder-1.5B-Instruct-AWQ"
|
|
8532
|
+
},
|
|
8533
|
+
{
|
|
8534
|
+
"model_format": "awq",
|
|
8535
|
+
"model_size_in_billions": "3",
|
|
8536
|
+
"quantizations": [
|
|
8537
|
+
"Int4"
|
|
8538
|
+
],
|
|
8539
|
+
"model_id": "Qwen/Qwen2.5-Coder-3B-Instruct-AWQ"
|
|
8540
|
+
},
|
|
8541
|
+
{
|
|
8542
|
+
"model_format": "awq",
|
|
8543
|
+
"model_size_in_billions": "7",
|
|
8544
|
+
"quantizations": [
|
|
8545
|
+
"Int4"
|
|
8546
|
+
],
|
|
8547
|
+
"model_id": "Qwen/Qwen2.5-Coder-7B-Instruct-AWQ"
|
|
8548
|
+
},
|
|
8549
|
+
{
|
|
8550
|
+
"model_format": "awq",
|
|
8551
|
+
"model_size_in_billions": "14",
|
|
8552
|
+
"quantizations": [
|
|
8553
|
+
"Int4"
|
|
8554
|
+
],
|
|
8555
|
+
"model_id": "Qwen/Qwen2.5-Coder-14B-Instruct-AWQ"
|
|
8556
|
+
},
|
|
8557
|
+
{
|
|
8558
|
+
"model_format": "awq",
|
|
8559
|
+
"model_size_in_billions": "32",
|
|
8560
|
+
"quantizations": [
|
|
8561
|
+
"Int4"
|
|
8562
|
+
],
|
|
8563
|
+
"model_id": "Qwen/Qwen2.5-Coder-32B-Instruct-AWQ"
|
|
8564
|
+
},
|
|
8565
|
+
|
|
8566
|
+
{
|
|
8567
|
+
"model_format": "ggufv2",
|
|
8568
|
+
"model_size_in_billions": "1_5",
|
|
8569
|
+
"quantizations": [
|
|
8570
|
+
"q2_k",
|
|
8571
|
+
"q3_k_m",
|
|
8572
|
+
"q4_0",
|
|
8573
|
+
"q4_k_m",
|
|
8574
|
+
"q5_0",
|
|
8575
|
+
"q5_k_m",
|
|
8576
|
+
"q6_k",
|
|
8577
|
+
"q8_0"
|
|
8578
|
+
],
|
|
8579
|
+
"model_id": "Qwen/Qwen2.5-Coder-1.5B-Instruct-GGUF",
|
|
8580
|
+
"model_file_name_template": "qwen2.5-coder-1.5b-instruct-{quantization}.gguf"
|
|
8581
|
+
},
|
|
8582
|
+
{
|
|
8583
|
+
"model_format": "ggufv2",
|
|
8584
|
+
"model_size_in_billions": 7,
|
|
8585
|
+
"quantizations": [
|
|
8586
|
+
"q2_k",
|
|
8587
|
+
"q3_k_m",
|
|
8588
|
+
"q4_0",
|
|
8589
|
+
"q4_k_m",
|
|
8590
|
+
"q5_0",
|
|
8591
|
+
"q5_k_m",
|
|
8592
|
+
"q6_k",
|
|
8593
|
+
"q8_0"
|
|
8594
|
+
],
|
|
8595
|
+
"model_id": "Qwen/Qwen2.5-Coder-7B-Instruct-GGUF",
|
|
8596
|
+
"model_file_name_template": "qwen2.5-coder-7b-instruct-{quantization}.gguf",
|
|
8597
|
+
"model_file_name_split_template": "qwen2.5-coder-7b-instruct-{quantization}-{part}.gguf",
|
|
8598
|
+
"quantization_parts": {
|
|
8599
|
+
"q4_0": [
|
|
8600
|
+
"00001-of-00002",
|
|
8601
|
+
"00002-of-00002"
|
|
8602
|
+
],
|
|
8603
|
+
"q4_k_m": [
|
|
8604
|
+
"00001-of-00002",
|
|
8605
|
+
"00002-of-00002"
|
|
8606
|
+
],
|
|
8607
|
+
"q5_0": [
|
|
8317
8608
|
"00001-of-00002",
|
|
8318
8609
|
"00002-of-00002"
|
|
8319
8610
|
],
|
|
@@ -8344,5 +8635,676 @@
|
|
|
8344
8635
|
"<|im_start|>",
|
|
8345
8636
|
"<|im_end|>"
|
|
8346
8637
|
]
|
|
8638
|
+
},
|
|
8639
|
+
{
|
|
8640
|
+
"version": 1,
|
|
8641
|
+
"context_length": 32768,
|
|
8642
|
+
"model_name": "QwQ-32B-Preview",
|
|
8643
|
+
"model_lang": [
|
|
8644
|
+
"en",
|
|
8645
|
+
"zh"
|
|
8646
|
+
],
|
|
8647
|
+
"model_ability": [
|
|
8648
|
+
"chat"
|
|
8649
|
+
],
|
|
8650
|
+
"model_description": "QwQ-32B-Preview is an experimental research model developed by the Qwen Team, focused on advancing AI reasoning capabilities.",
|
|
8651
|
+
"model_specs": [
|
|
8652
|
+
{
|
|
8653
|
+
"model_format": "pytorch",
|
|
8654
|
+
"model_size_in_billions": 32,
|
|
8655
|
+
"quantizations": [
|
|
8656
|
+
"4-bit",
|
|
8657
|
+
"8-bit",
|
|
8658
|
+
"none"
|
|
8659
|
+
],
|
|
8660
|
+
"model_id": "Qwen/QwQ-32B-Preview"
|
|
8661
|
+
},
|
|
8662
|
+
{
|
|
8663
|
+
"model_format": "awq",
|
|
8664
|
+
"model_size_in_billions": 32,
|
|
8665
|
+
"quantizations": [
|
|
8666
|
+
"Int4"
|
|
8667
|
+
],
|
|
8668
|
+
"model_id": "KirillR/QwQ-32B-Preview-AWQ"
|
|
8669
|
+
},
|
|
8670
|
+
{
|
|
8671
|
+
"model_format": "ggufv2",
|
|
8672
|
+
"model_size_in_billions": 32,
|
|
8673
|
+
"quantizations": [
|
|
8674
|
+
"Q3_K_L",
|
|
8675
|
+
"Q4_K_M",
|
|
8676
|
+
"Q6_K",
|
|
8677
|
+
"Q8_0"
|
|
8678
|
+
],
|
|
8679
|
+
"model_id": "lmstudio-community/QwQ-32B-Preview-GGUF",
|
|
8680
|
+
"model_file_name_template": "QwQ-32B-Preview-{quantization}.gguf"
|
|
8681
|
+
},
|
|
8682
|
+
{
|
|
8683
|
+
"model_format": "mlx",
|
|
8684
|
+
"model_size_in_billions": 32,
|
|
8685
|
+
"quantizations": [
|
|
8686
|
+
"4bit"
|
|
8687
|
+
],
|
|
8688
|
+
"model_id": "mlx-community/Qwen_QwQ-32B-Preview_MLX-4bit"
|
|
8689
|
+
},
|
|
8690
|
+
{
|
|
8691
|
+
"model_format": "mlx",
|
|
8692
|
+
"model_size_in_billions": 32,
|
|
8693
|
+
"quantizations": [
|
|
8694
|
+
"8bit"
|
|
8695
|
+
],
|
|
8696
|
+
"model_id": "mlx-community/Qwen_QwQ-32B-Preview_MLX-8bit"
|
|
8697
|
+
},
|
|
8698
|
+
{
|
|
8699
|
+
"model_format": "mlx",
|
|
8700
|
+
"model_size_in_billions": 32,
|
|
8701
|
+
"quantizations": [
|
|
8702
|
+
"none"
|
|
8703
|
+
],
|
|
8704
|
+
"model_id": "mlx-community/QwQ-32B-Preview-bf16"
|
|
8705
|
+
}
|
|
8706
|
+
],
|
|
8707
|
+
"chat_template": "{%- if tools %}\n {{- '<|im_start|>system\\n' }}\n {%- if messages[0]['role'] == 'system' %}\n {{- messages[0]['content'] }}\n {%- else %}\n {{- 'You are Qwen, created by Alibaba Cloud. You are a helpful assistant.' }}\n {%- endif %}\n {{- \"\\n\\n# Tools\\n\\nYou may call one or more functions to assist with the user query.\\n\\nYou are provided with function signatures within <tools></tools> XML tags:\\n<tools>\" }}\n {%- for tool in tools %}\n {{- \"\\n\" }}\n {{- tool | tojson }}\n {%- endfor %}\n {{- \"\\n</tools>\\n\\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\\n<tool_call>\\n{\\\"name\\\": <function-name>, \\\"arguments\\\": <args-json-object>}\\n</tool_call><|im_end|>\\n\" }}\n{%- else %}\n {%- if messages[0]['role'] == 'system' %}\n {{- '<|im_start|>system\\n' + messages[0]['content'] + '<|im_end|>\\n' }}\n {%- else %}\n {{- '<|im_start|>system\\nYou are Qwen, created by Alibaba Cloud. You are a helpful assistant.<|im_end|>\\n' }}\n {%- endif %}\n{%- endif %}\n{%- for message in messages %}\n {%- if (message.role == \"user\") or (message.role == \"system\" and not loop.first) or (message.role == \"assistant\" and not message.tool_calls) %}\n {{- '<|im_start|>' + message.role + '\\n' + message.content + '<|im_end|>' + '\\n' }}\n {%- elif message.role == \"assistant\" %}\n {{- '<|im_start|>' + message.role }}\n {%- if message.content %}\n {{- '\\n' + message.content }}\n {%- endif %}\n {%- for tool_call in message.tool_calls %}\n {%- if tool_call.function is defined %}\n {%- set tool_call = tool_call.function %}\n {%- endif %}\n {{- '\\n<tool_call>\\n{\"name\": \"' }}\n {{- tool_call.name }}\n {{- '\", \"arguments\": ' }}\n {{- tool_call.arguments | tojson }}\n {{- '}\\n</tool_call>' }}\n {%- endfor %}\n {{- '<|im_end|>\\n' }}\n {%- elif message.role == \"tool\" %}\n {%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != \"tool\") %}\n {{- '<|im_start|>user' }}\n {%- endif %}\n {{- '\\n<tool_response>\\n' }}\n {{- message.content }}\n {{- '\\n</tool_response>' }}\n {%- if loop.last or (messages[loop.index0 + 1].role != \"tool\") %}\n {{- '<|im_end|>\\n' }}\n {%- endif %}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|im_start|>assistant\\n' }}\n{%- endif %}\n",
|
|
8708
|
+
"stop_token_ids": [
|
|
8709
|
+
151643,
|
|
8710
|
+
151644,
|
|
8711
|
+
151645
|
|
8712
|
+
],
|
|
8713
|
+
"stop": [
|
|
8714
|
+
"<|endoftext|>",
|
|
8715
|
+
"<|im_start|>",
|
|
8716
|
+
"<|im_end|>"
|
|
8717
|
+
]
|
|
8718
|
+
},
|
|
8719
|
+
{
|
|
8720
|
+
"version": 1,
|
|
8721
|
+
"context_length": 131072,
|
|
8722
|
+
"model_name": "deepseek-r1-distill-qwen",
|
|
8723
|
+
"model_lang": [
|
|
8724
|
+
"en",
|
|
8725
|
+
"zh"
|
|
8726
|
+
],
|
|
8727
|
+
"model_ability": [
|
|
8728
|
+
"chat"
|
|
8729
|
+
],
|
|
8730
|
+
"model_description": "deepseek-r1-distill-qwen is distilled from DeepSeek-R1 based on Qwen",
|
|
8731
|
+
"model_specs": [
|
|
8732
|
+
{
|
|
8733
|
+
"model_format": "pytorch",
|
|
8734
|
+
"model_size_in_billions": "1_5",
|
|
8735
|
+
"quantizations": [
|
|
8736
|
+
"4-bit",
|
|
8737
|
+
"8-bit",
|
|
8738
|
+
"none"
|
|
8739
|
+
],
|
|
8740
|
+
"model_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B"
|
|
8741
|
+
},
|
|
8742
|
+
{
|
|
8743
|
+
"model_format": "awq",
|
|
8744
|
+
"model_size_in_billions": "1_5",
|
|
8745
|
+
"quantizations": [
|
|
8746
|
+
"Int4"
|
|
8747
|
+
],
|
|
8748
|
+
"model_id": "casperhansen/deepseek-r1-distill-qwen-1.5b-awq"
|
|
8749
|
+
},
|
|
8750
|
+
{
|
|
8751
|
+
"model_format": "gptq",
|
|
8752
|
+
"model_size_in_billions": "1_5",
|
|
8753
|
+
"quantizations": [
|
|
8754
|
+
"Int4"
|
|
8755
|
+
],
|
|
8756
|
+
"model_id": "jakiAJK/DeepSeek-R1-Distill-Qwen-1.5B_GPTQ-int4"
|
|
8757
|
+
},
|
|
8758
|
+
{
|
|
8759
|
+
"model_format": "ggufv2",
|
|
8760
|
+
"model_size_in_billions": "1_5",
|
|
8761
|
+
"quantizations": [
|
|
8762
|
+
"Q2_K",
|
|
8763
|
+
"Q2_K_L",
|
|
8764
|
+
"Q3_K_M",
|
|
8765
|
+
"Q4_K_M",
|
|
8766
|
+
"Q5_K_M",
|
|
8767
|
+
"Q6_K",
|
|
8768
|
+
"Q8_0"
|
|
8769
|
+
],
|
|
8770
|
+
"model_id": "unsloth/DeepSeek-R1-Distill-Qwen-1.5B-GGUF",
|
|
8771
|
+
"model_file_name_template": "DeepSeek-R1-Distill-Qwen-1.5B-{quantization}.gguf"
|
|
8772
|
+
},
|
|
8773
|
+
{
|
|
8774
|
+
"model_format": "mlx",
|
|
8775
|
+
"model_size_in_billions": "1_5",
|
|
8776
|
+
"quantizations": [
|
|
8777
|
+
"3bit",
|
|
8778
|
+
"4bit",
|
|
8779
|
+
"6bit",
|
|
8780
|
+
"8bit",
|
|
8781
|
+
"bf16"
|
|
8782
|
+
],
|
|
8783
|
+
"model_id": "mlx-community/DeepSeek-R1-Distill-Qwen-1.5B-{quantization}"
|
|
8784
|
+
},
|
|
8785
|
+
{
|
|
8786
|
+
"model_format": "pytorch",
|
|
8787
|
+
"model_size_in_billions": 7,
|
|
8788
|
+
"quantizations": [
|
|
8789
|
+
"4-bit",
|
|
8790
|
+
"8-bit",
|
|
8791
|
+
"none"
|
|
8792
|
+
],
|
|
8793
|
+
"model_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B"
|
|
8794
|
+
},
|
|
8795
|
+
{
|
|
8796
|
+
"model_format": "awq",
|
|
8797
|
+
"model_size_in_billions": 7,
|
|
8798
|
+
"quantizations": [
|
|
8799
|
+
"Int4"
|
|
8800
|
+
],
|
|
8801
|
+
"model_id": "jakiAJK/DeepSeek-R1-Distill-Qwen-7B_AWQ"
|
|
8802
|
+
},
|
|
8803
|
+
{
|
|
8804
|
+
"model_format": "gptq",
|
|
8805
|
+
"model_size_in_billions": 7,
|
|
8806
|
+
"quantizations": [
|
|
8807
|
+
"Int4"
|
|
8808
|
+
],
|
|
8809
|
+
"model_id": "jakiAJK/DeepSeek-R1-Distill-Qwen-7B_GPTQ-int4"
|
|
8810
|
+
},
|
|
8811
|
+
{
|
|
8812
|
+
"model_format": "ggufv2",
|
|
8813
|
+
"model_size_in_billions": 7,
|
|
8814
|
+
"quantizations": [
|
|
8815
|
+
"Q2_K",
|
|
8816
|
+
"Q2_K_L",
|
|
8817
|
+
"Q3_K_M",
|
|
8818
|
+
"Q4_K_M",
|
|
8819
|
+
"Q5_K_M",
|
|
8820
|
+
"Q6_K",
|
|
8821
|
+
"Q8_0",
|
|
8822
|
+
"F16"
|
|
8823
|
+
],
|
|
8824
|
+
"model_id": "unsloth/DeepSeek-R1-Distill-Qwen-7B-GGUF",
|
|
8825
|
+
"model_file_name_template": "DeepSeek-R1-Distill-Qwen-7B-{quantization}.gguf"
|
|
8826
|
+
},
|
|
8827
|
+
{
|
|
8828
|
+
"model_format": "mlx",
|
|
8829
|
+
"model_size_in_billions": 7,
|
|
8830
|
+
"quantizations": [
|
|
8831
|
+
"3bit",
|
|
8832
|
+
"4bit",
|
|
8833
|
+
"6bit",
|
|
8834
|
+
"8bit",
|
|
8835
|
+
"bf16"
|
|
8836
|
+
],
|
|
8837
|
+
"model_id": "mlx-community/DeepSeek-R1-Distill-Qwen-7B-{quantization}"
|
|
8838
|
+
},
|
|
8839
|
+
{
|
|
8840
|
+
"model_format": "pytorch",
|
|
8841
|
+
"model_size_in_billions": 14,
|
|
8842
|
+
"quantizations": [
|
|
8843
|
+
"4-bit",
|
|
8844
|
+
"8-bit",
|
|
8845
|
+
"none"
|
|
8846
|
+
],
|
|
8847
|
+
"model_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-14B"
|
|
8848
|
+
},
|
|
8849
|
+
{
|
|
8850
|
+
"model_format": "awq",
|
|
8851
|
+
"model_size_in_billions": 14,
|
|
8852
|
+
"quantizations": [
|
|
8853
|
+
"Int4"
|
|
8854
|
+
],
|
|
8855
|
+
"model_id": "casperhansen/deepseek-r1-distill-qwen-14b-awq"
|
|
8856
|
+
},
|
|
8857
|
+
{
|
|
8858
|
+
"model_format": "ggufv2",
|
|
8859
|
+
"model_size_in_billions": 14,
|
|
8860
|
+
"quantizations": [
|
|
8861
|
+
"Q2_K",
|
|
8862
|
+
"Q2_K_L",
|
|
8863
|
+
"Q3_K_M",
|
|
8864
|
+
"Q4_K_M",
|
|
8865
|
+
"Q5_K_M",
|
|
8866
|
+
"Q6_K",
|
|
8867
|
+
"Q8_0",
|
|
8868
|
+
"F16"
|
|
8869
|
+
],
|
|
8870
|
+
"model_id": "unsloth/DeepSeek-R1-Distill-Qwen-14B-GGUF",
|
|
8871
|
+
"model_file_name_template": "DeepSeek-R1-Distill-Qwen-14B-{quantization}.gguf"
|
|
8872
|
+
},
|
|
8873
|
+
{
|
|
8874
|
+
"model_format": "mlx",
|
|
8875
|
+
"model_size_in_billions": 14,
|
|
8876
|
+
"quantizations": [
|
|
8877
|
+
"3bit",
|
|
8878
|
+
"4bit",
|
|
8879
|
+
"6bit",
|
|
8880
|
+
"8bit",
|
|
8881
|
+
"bf16"
|
|
8882
|
+
],
|
|
8883
|
+
"model_id": "mlx-community/DeepSeek-R1-Distill-Qwen-14B-{quantization}"
|
|
8884
|
+
},
|
|
8885
|
+
{
|
|
8886
|
+
"model_format": "pytorch",
|
|
8887
|
+
"model_size_in_billions": 32,
|
|
8888
|
+
"quantizations": [
|
|
8889
|
+
"4-bit",
|
|
8890
|
+
"8-bit",
|
|
8891
|
+
"none"
|
|
8892
|
+
],
|
|
8893
|
+
"model_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B"
|
|
8894
|
+
},
|
|
8895
|
+
{
|
|
8896
|
+
"model_format": "awq",
|
|
8897
|
+
"model_size_in_billions": 32,
|
|
8898
|
+
"quantizations": [
|
|
8899
|
+
"Int4"
|
|
8900
|
+
],
|
|
8901
|
+
"model_id": "casperhansen/deepseek-r1-distill-qwen-32b-awq"
|
|
8902
|
+
},
|
|
8903
|
+
{
|
|
8904
|
+
"model_format": "ggufv2",
|
|
8905
|
+
"model_size_in_billions": 32,
|
|
8906
|
+
"quantizations": [
|
|
8907
|
+
"Q2_K",
|
|
8908
|
+
"Q2_K_L",
|
|
8909
|
+
"Q3_K_M",
|
|
8910
|
+
"Q4_K_M",
|
|
8911
|
+
"Q5_K_M",
|
|
8912
|
+
"Q6_K",
|
|
8913
|
+
"Q8_0",
|
|
8914
|
+
"F16"
|
|
8915
|
+
],
|
|
8916
|
+
"model_id": "unsloth/DeepSeek-R1-Distill-Qwen-32B-GGUF",
|
|
8917
|
+
"model_file_name_template": "DeepSeek-R1-Distill-Qwen-32B-{quantization}.gguf"
|
|
8918
|
+
},
|
|
8919
|
+
{
|
|
8920
|
+
"model_format": "mlx",
|
|
8921
|
+
"model_size_in_billions": 32,
|
|
8922
|
+
"quantizations": [
|
|
8923
|
+
"3bit",
|
|
8924
|
+
"4bit",
|
|
8925
|
+
"6bit",
|
|
8926
|
+
"8bit",
|
|
8927
|
+
"bf16"
|
|
8928
|
+
],
|
|
8929
|
+
"model_id": "mlx-community/DeepSeek-R1-Distill-Qwen-32B-{quantization}"
|
|
8930
|
+
}
|
|
8931
|
+
],
|
|
8932
|
+
"chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='') %}{%- for message in messages %}{%- if message['role'] == 'system' %}{% set ns.system_prompt = message['content'] %}{%- endif %}{%- endfor %}{{bos_token}}{{ns.system_prompt}}{%- for message in messages %}{%- if message['role'] == 'user' %}{%- set ns.is_tool = false -%}{{'<|User|>' + message['content']}}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is none %}{%- set ns.is_tool = false -%}{%- for tool in message['tool_calls']%}{%- if not ns.is_first %}{{'<|Assistant|><|tool▁calls▁begin|><|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<|tool▁call▁end|>'}}{%- set ns.is_first = true -%}{%- else %}{{'\\n' + '<|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<|tool▁call▁end|>'}}{{'<|tool▁calls▁end|><|end▁of▁sentence|>'}}{%- endif %}{%- endfor %}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is not none %}{%- if ns.is_tool %}{{'<|tool▁outputs▁end|>' + message['content'] + '<|end▁of▁sentence|>'}}{%- set ns.is_tool = false -%}{%- else %}{% set content = message['content'] %}{% if '</think>' in content %}{% set content = content.split('</think>')[-1] %}{% endif %}{{'<|Assistant|>' + content + '<|end▁of▁sentence|>'}}{%- endif %}{%- endif %}{%- if message['role'] == 'tool' %}{%- set ns.is_tool = true -%}{%- if ns.is_output_first %}{{'<|tool▁outputs▁begin|><|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- set ns.is_output_first = false %}{%- else %}{{'\\n<|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- endif %}{%- endif %}{%- endfor -%}{% if ns.is_tool %}{{'<|tool▁outputs▁end|>'}}{% endif %}{% if add_generation_prompt and not ns.is_tool %}{{'<|Assistant|>'}}{% endif %}",
|
|
8933
|
+
"stop_token_ids": [
|
|
8934
|
+
151643
|
|
8935
|
+
],
|
|
8936
|
+
"stop": [
|
|
8937
|
+
"<|end▁of▁sentence|>"
|
|
8938
|
+
]
|
|
8939
|
+
},
|
|
8940
|
+
{
|
|
8941
|
+
"version": 1,
|
|
8942
|
+
"context_length": 8192,
|
|
8943
|
+
"model_name": "glm-edge-chat",
|
|
8944
|
+
"model_lang": [
|
|
8945
|
+
"en",
|
|
8946
|
+
"zh"
|
|
8947
|
+
],
|
|
8948
|
+
"model_ability": [
|
|
8949
|
+
"chat"
|
|
8950
|
+
],
|
|
8951
|
+
"model_description": "The GLM-Edge series is our attempt to face the end-side real-life scenarios, which consists of two sizes of large-language dialogue models and multimodal comprehension models (GLM-Edge-1.5B-Chat, GLM-Edge-4B-Chat, GLM-Edge-V-2B, GLM-Edge-V-5B). Among them, the 1.5B / 2B model is mainly for platforms such as mobile phones and cars, and the 4B / 5B model is mainly for platforms such as PCs.",
|
|
8952
|
+
"model_specs": [
|
|
8953
|
+
{
|
|
8954
|
+
"model_format": "pytorch",
|
|
8955
|
+
"model_size_in_billions": "1_5",
|
|
8956
|
+
"quantizations": [
|
|
8957
|
+
"4-bit",
|
|
8958
|
+
"8-bit",
|
|
8959
|
+
"none"
|
|
8960
|
+
],
|
|
8961
|
+
"model_id": "THUDM/glm-edge-1.5b-chat"
|
|
8962
|
+
},
|
|
8963
|
+
{
|
|
8964
|
+
"model_format": "pytorch",
|
|
8965
|
+
"model_size_in_billions": "4",
|
|
8966
|
+
"quantizations": [
|
|
8967
|
+
"4-bit",
|
|
8968
|
+
"8-bit",
|
|
8969
|
+
"none"
|
|
8970
|
+
],
|
|
8971
|
+
"model_id": "THUDM/glm-edge-4b-chat"
|
|
8972
|
+
},
|
|
8973
|
+
{
|
|
8974
|
+
"model_format": "ggufv2",
|
|
8975
|
+
"model_size_in_billions": "1_5",
|
|
8976
|
+
"quantizations": [
|
|
8977
|
+
"Q4_0",
|
|
8978
|
+
"Q4_1",
|
|
8979
|
+
"Q4_K",
|
|
8980
|
+
"Q4_K_M",
|
|
8981
|
+
"Q4_K_S",
|
|
8982
|
+
"Q5_0",
|
|
8983
|
+
"Q5_1",
|
|
8984
|
+
"Q5_K",
|
|
8985
|
+
"Q5_K_M",
|
|
8986
|
+
"Q5_K_S",
|
|
8987
|
+
"Q6_K",
|
|
8988
|
+
"Q8_0"
|
|
8989
|
+
],
|
|
8990
|
+
"model_file_name_template": "ggml-model-{quantization}.gguf",
|
|
8991
|
+
"model_id": "THUDM/glm-edge-1.5b-chat-gguf"
|
|
8992
|
+
},
|
|
8993
|
+
{
|
|
8994
|
+
"model_format": "ggufv2",
|
|
8995
|
+
"model_size_in_billions": "1_5",
|
|
8996
|
+
"quantizations": [
|
|
8997
|
+
"F16"
|
|
8998
|
+
],
|
|
8999
|
+
"model_file_name_template": "glm-edge-1.5B-chat-{quantization}.gguf",
|
|
9000
|
+
"model_id": "THUDM/glm-edge-1.5b-chat-gguf"
|
|
9001
|
+
},
|
|
9002
|
+
{
|
|
9003
|
+
"model_format": "ggufv2",
|
|
9004
|
+
"model_size_in_billions": "4",
|
|
9005
|
+
"quantizations": [
|
|
9006
|
+
"Q4_0",
|
|
9007
|
+
"Q4_1",
|
|
9008
|
+
"Q4_K",
|
|
9009
|
+
"Q4_K_M",
|
|
9010
|
+
"Q4_K_S",
|
|
9011
|
+
"Q5_0",
|
|
9012
|
+
"Q5_1",
|
|
9013
|
+
"Q5_K",
|
|
9014
|
+
"Q5_K_M",
|
|
9015
|
+
"Q5_K_S",
|
|
9016
|
+
"Q6_K",
|
|
9017
|
+
"Q8_0"
|
|
9018
|
+
],
|
|
9019
|
+
"model_file_name_template": "ggml-model-{quantization}.gguf",
|
|
9020
|
+
"model_id": "THUDM/glm-edge-4b-chat-gguf"
|
|
9021
|
+
},
|
|
9022
|
+
{
|
|
9023
|
+
"model_format": "ggufv2",
|
|
9024
|
+
"model_size_in_billions": "4",
|
|
9025
|
+
"quantizations": [
|
|
9026
|
+
"F16"
|
|
9027
|
+
],
|
|
9028
|
+
"model_file_name_template": "glm-edge-4B-chat-{quantization}.gguf",
|
|
9029
|
+
"model_id": "THUDM/glm-edge-4b-chat-gguf"
|
|
9030
|
+
}
|
|
9031
|
+
],
|
|
9032
|
+
"chat_template": "{% for item in messages %}{% if item['role'] == 'system' %}<|system|>\n{{ item['content'] }}{% elif item['role'] == 'user' %}<|user|>\n{{ item['content'] }}{% elif item['role'] == 'assistant' %}<|assistant|>\n{{ item['content'] }}{% endif %}{% endfor %}{% if add_generation_prompt %}<|assistant|>\n{% endif %}",
|
|
9033
|
+
"stop_token_ids": [
|
|
9034
|
+
59246,
|
|
9035
|
+
59253,
|
|
9036
|
+
59255
|
|
9037
|
+
],
|
|
9038
|
+
"stop": [
|
|
9039
|
+
"<|endoftext|>",
|
|
9040
|
+
"<|user|>",
|
|
9041
|
+
"<|observation|>"
|
|
9042
|
+
]
|
|
9043
|
+
},
|
|
9044
|
+
{
|
|
9045
|
+
"version": 1,
|
|
9046
|
+
"context_length": 8192,
|
|
9047
|
+
"model_name": "glm-edge-v",
|
|
9048
|
+
"model_lang": [
|
|
9049
|
+
"en",
|
|
9050
|
+
"zh"
|
|
9051
|
+
],
|
|
9052
|
+
"model_ability": [
|
|
9053
|
+
"chat",
|
|
9054
|
+
"vision"
|
|
9055
|
+
],
|
|
9056
|
+
"model_description": "The GLM-Edge series is our attempt to face the end-side real-life scenarios, which consists of two sizes of large-language dialogue models and multimodal comprehension models (GLM-Edge-1.5B-Chat, GLM-Edge-4B-Chat, GLM-Edge-V-2B, GLM-Edge-V-5B). Among them, the 1.5B / 2B model is mainly for platforms such as mobile phones and cars, and the 4B / 5B model is mainly for platforms such as PCs.",
|
|
9057
|
+
"model_specs": [
|
|
9058
|
+
{
|
|
9059
|
+
"model_format": "pytorch",
|
|
9060
|
+
"model_size_in_billions": "2",
|
|
9061
|
+
"quantizations": [
|
|
9062
|
+
"4-bit",
|
|
9063
|
+
"8-bit",
|
|
9064
|
+
"none"
|
|
9065
|
+
],
|
|
9066
|
+
"model_id": "THUDM/glm-edge-v-2b"
|
|
9067
|
+
},
|
|
9068
|
+
{
|
|
9069
|
+
"model_format": "pytorch",
|
|
9070
|
+
"model_size_in_billions": "5",
|
|
9071
|
+
"quantizations": [
|
|
9072
|
+
"4-bit",
|
|
9073
|
+
"8-bit",
|
|
9074
|
+
"none"
|
|
9075
|
+
],
|
|
9076
|
+
"model_id": "THUDM/glm-edge-v-5b"
|
|
9077
|
+
},
|
|
9078
|
+
{
|
|
9079
|
+
"model_format": "ggufv2",
|
|
9080
|
+
"model_size_in_billions": "2",
|
|
9081
|
+
"quantizations": [
|
|
9082
|
+
"Q4_0",
|
|
9083
|
+
"Q4_1",
|
|
9084
|
+
"Q4_K",
|
|
9085
|
+
"Q4_K_M",
|
|
9086
|
+
"Q4_K_S",
|
|
9087
|
+
"Q5_0",
|
|
9088
|
+
"Q5_1",
|
|
9089
|
+
"Q5_K",
|
|
9090
|
+
"Q5_K_M",
|
|
9091
|
+
"Q5_K_S",
|
|
9092
|
+
"Q6_K",
|
|
9093
|
+
"Q8_0"
|
|
9094
|
+
],
|
|
9095
|
+
"model_file_name_template": "ggml-model-{quantization}.gguf",
|
|
9096
|
+
"model_id": "THUDM/glm-edge-v-2b-gguf"
|
|
9097
|
+
},
|
|
9098
|
+
{
|
|
9099
|
+
"model_format": "ggufv2",
|
|
9100
|
+
"model_size_in_billions": "2",
|
|
9101
|
+
"quantizations": [
|
|
9102
|
+
"F16"
|
|
9103
|
+
],
|
|
9104
|
+
"model_file_name_template": "glm-edge-v-2B-{quantization}.gguf",
|
|
9105
|
+
"model_id": "THUDM/glm-edge-v-2b-gguf"
|
|
9106
|
+
},
|
|
9107
|
+
{
|
|
9108
|
+
"model_format": "ggufv2",
|
|
9109
|
+
"model_size_in_billions": "2",
|
|
9110
|
+
"quantizations": [
|
|
9111
|
+
"f16"
|
|
9112
|
+
],
|
|
9113
|
+
"model_file_name_template": "mmproj-model-{quantization}.gguf",
|
|
9114
|
+
"model_id": "THUDM/glm-edge-v-2b-gguf"
|
|
9115
|
+
},
|
|
9116
|
+
{
|
|
9117
|
+
"model_format": "ggufv2",
|
|
9118
|
+
"model_size_in_billions": "5",
|
|
9119
|
+
"quantizations": [
|
|
9120
|
+
"Q4_0",
|
|
9121
|
+
"Q4_1",
|
|
9122
|
+
"Q4_K",
|
|
9123
|
+
"Q4_K_M",
|
|
9124
|
+
"Q4_K_S",
|
|
9125
|
+
"Q5_0",
|
|
9126
|
+
"Q5_1",
|
|
9127
|
+
"Q5_K",
|
|
9128
|
+
"Q5_K_M",
|
|
9129
|
+
"Q5_K_S",
|
|
9130
|
+
"Q6_K",
|
|
9131
|
+
"Q8_0"
|
|
9132
|
+
],
|
|
9133
|
+
"model_file_name_template": "ggml-model-{quantization}.gguf",
|
|
9134
|
+
"model_id": "THUDM/glm-edge-v-5b-gguf"
|
|
9135
|
+
},
|
|
9136
|
+
{
|
|
9137
|
+
"model_format": "ggufv2",
|
|
9138
|
+
"model_size_in_billions": "5",
|
|
9139
|
+
"quantizations": [
|
|
9140
|
+
"F16"
|
|
9141
|
+
],
|
|
9142
|
+
"model_file_name_template": "glm-edge-v-5B-{quantization}.gguf",
|
|
9143
|
+
"model_id": "THUDM/glm-edge-v-5b-gguf"
|
|
9144
|
+
},
|
|
9145
|
+
{
|
|
9146
|
+
"model_format": "ggufv2",
|
|
9147
|
+
"model_size_in_billions": "5",
|
|
9148
|
+
"quantizations": [
|
|
9149
|
+
"f16"
|
|
9150
|
+
],
|
|
9151
|
+
"model_file_name_template": "mmproj-model-{quantization}.gguf",
|
|
9152
|
+
"model_id": "THUDM/glm-edge-v-5b-gguf"
|
|
9153
|
+
}
|
|
9154
|
+
],
|
|
9155
|
+
"chat_template": "{% for item in messages %}{% if item['role'] != 'system' %}<|{{ item['role'] }}|>\n{% for content in item['content'] %}{% if content['type'] == 'image' %}{% for _ in range(578) %}<|begin_of_image|>{% endfor %}{% elif content['type'] == 'text' %}{{ content['text'] }}{% endif %}{% endfor %}\n{% endif %}{% endfor %}{% if add_generation_prompt %}<|assistant|>\n{% endif %}",
|
|
9156
|
+
"stop_token_ids": [
|
|
9157
|
+
59246,
|
|
9158
|
+
59253,
|
|
9159
|
+
59255
|
|
9160
|
+
],
|
|
9161
|
+
"stop": [
|
|
9162
|
+
"<|endoftext|>",
|
|
9163
|
+
"<|user|>",
|
|
9164
|
+
"<|observation|>"
|
|
9165
|
+
]
|
|
9166
|
+
},
|
|
9167
|
+
{
|
|
9168
|
+
"version": 1,
|
|
9169
|
+
"context_length": 32768,
|
|
9170
|
+
"model_name": "QvQ-72B-Preview",
|
|
9171
|
+
"model_lang": [
|
|
9172
|
+
"en",
|
|
9173
|
+
"zh"
|
|
9174
|
+
],
|
|
9175
|
+
"model_ability": [
|
|
9176
|
+
"chat",
|
|
9177
|
+
"vision"
|
|
9178
|
+
],
|
|
9179
|
+
"model_description": "QVQ-72B-Preview is an experimental research model developed by the Qwen team, focusing on enhancing visual reasoning capabilities.",
|
|
9180
|
+
"model_specs": [
|
|
9181
|
+
{
|
|
9182
|
+
"model_format": "pytorch",
|
|
9183
|
+
"model_size_in_billions": 72,
|
|
9184
|
+
"quantizations": [
|
|
9185
|
+
"4-bit",
|
|
9186
|
+
"8-bit",
|
|
9187
|
+
"none"
|
|
9188
|
+
],
|
|
9189
|
+
"model_id": "Qwen/QVQ-72B-Preview"
|
|
9190
|
+
},
|
|
9191
|
+
{
|
|
9192
|
+
"model_format": "mlx",
|
|
9193
|
+
"model_size_in_billions": 72,
|
|
9194
|
+
"quantizations": [
|
|
9195
|
+
"3bit",
|
|
9196
|
+
"4bit",
|
|
9197
|
+
"6bit",
|
|
9198
|
+
"8bit",
|
|
9199
|
+
"bf16"
|
|
9200
|
+
],
|
|
9201
|
+
"model_id": "mlx-community/QVQ-72B-Preview-{quantization}"
|
|
9202
|
+
}
|
|
9203
|
+
],
|
|
9204
|
+
"chat_template": "{% set image_count = namespace(value=0) %}{% set video_count = namespace(value=0) %}{% for message in messages %}{% if loop.first and message['role'] != 'system' %}<|im_start|>system\nYou are a helpful and harmless assistant. You are Qwen developed by Alibaba. You should think step-by-step.<|im_end|>\n{% endif %}<|im_start|>{{ message['role'] }}\n{% if message['content'] is string %}{{ message['content'] }}<|im_end|>\n{% else %}{% for content in message['content'] %}{% if content['type'] == 'image' or 'image' in content or 'image_url' in content %}{% set image_count.value = image_count.value + 1 %}{% if add_vision_id %}Picture {{ image_count.value }}: {% endif %}<|vision_start|><|image_pad|><|vision_end|>{% elif content['type'] == 'video' or 'video' in content %}{% set video_count.value = video_count.value + 1 %}{% if add_vision_id %}Video {{ video_count.value }}: {% endif %}<|vision_start|><|video_pad|><|vision_end|>{% elif 'text' in content %}{{ content['text'] }}{% endif %}{% endfor %}<|im_end|>\n{% endif %}{% endfor %}{% if add_generation_prompt %}<|im_start|>assistant\n{% endif %}",
|
|
9205
|
+
"stop_token_ids": [
|
|
9206
|
+
151645,
|
|
9207
|
+
151643
|
|
9208
|
+
],
|
|
9209
|
+
"stop": [
|
|
9210
|
+
"<|im_end|>",
|
|
9211
|
+
"<|endoftext|>"
|
|
9212
|
+
]
|
|
9213
|
+
},
|
|
9214
|
+
{
|
|
9215
|
+
"version": 1,
|
|
9216
|
+
"context_length": 32768,
|
|
9217
|
+
"model_name": "marco-o1",
|
|
9218
|
+
"model_lang": [
|
|
9219
|
+
"en",
|
|
9220
|
+
"zh"
|
|
9221
|
+
],
|
|
9222
|
+
"model_ability": [
|
|
9223
|
+
"chat",
|
|
9224
|
+
"tools"
|
|
9225
|
+
],
|
|
9226
|
+
"model_description": "Marco-o1: Towards Open Reasoning Models for Open-Ended Solutions",
|
|
9227
|
+
"model_specs": [
|
|
9228
|
+
{
|
|
9229
|
+
"model_format": "pytorch",
|
|
9230
|
+
"model_size_in_billions": 7,
|
|
9231
|
+
"quantizations": [
|
|
9232
|
+
"4-bit",
|
|
9233
|
+
"8-bit",
|
|
9234
|
+
"none"
|
|
9235
|
+
],
|
|
9236
|
+
"model_id": "AIDC-AI/Marco-o1"
|
|
9237
|
+
},
|
|
9238
|
+
{
|
|
9239
|
+
"model_format": "ggufv2",
|
|
9240
|
+
"model_size_in_billions": 7,
|
|
9241
|
+
"quantizations": [
|
|
9242
|
+
"Q2_K",
|
|
9243
|
+
"Q3_K_L",
|
|
9244
|
+
"Q3_K_M",
|
|
9245
|
+
"Q3_K_S",
|
|
9246
|
+
"Q4_0",
|
|
9247
|
+
"Q4_1",
|
|
9248
|
+
"Q4_K_M",
|
|
9249
|
+
"Q4_K_S",
|
|
9250
|
+
"Q5_0",
|
|
9251
|
+
"Q5_1",
|
|
9252
|
+
"Q5_K_M",
|
|
9253
|
+
"Q5_K_S",
|
|
9254
|
+
"Q6_K",
|
|
9255
|
+
"Q8_0"
|
|
9256
|
+
],
|
|
9257
|
+
"model_id": "QuantFactory/Marco-o1-GGUF",
|
|
9258
|
+
"model_file_name_template": "Marco-o1.{quantization}.gguf"
|
|
9259
|
+
}
|
|
9260
|
+
],
|
|
9261
|
+
"chat_template": "{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system\n\n你是一个经过良好训练的AI助手,你的名字是Marco-o1.由阿里国际数字商业集团的AI Business创造.\n \n## 重要!!!!!\n当你回答问题时,你的思考应该在<Thought>内完成,<Output>内输出你的结果。\n<Thought>应该尽可能是英文,但是有2个特例,一个是对原文中的引用,另一个是是数学应该使用markdown格式,<Output>内的输出需要遵循用户输入的语言。\n <|im_end|>\n' }}{% endif %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
|
|
9262
|
+
"stop_token_ids": [
|
|
9263
|
+
151643,
|
|
9264
|
+
151644,
|
|
9265
|
+
151645
|
|
9266
|
+
],
|
|
9267
|
+
"stop": [
|
|
9268
|
+
"<|endoftext|>",
|
|
9269
|
+
"<|im_start|>",
|
|
9270
|
+
"<|im_end|>"
|
|
9271
|
+
]
|
|
9272
|
+
},
|
|
9273
|
+
{
|
|
9274
|
+
"version": 1,
|
|
9275
|
+
"context_length": 4096,
|
|
9276
|
+
"model_name": "cogagent",
|
|
9277
|
+
"model_lang": [
|
|
9278
|
+
"en",
|
|
9279
|
+
"zh"
|
|
9280
|
+
],
|
|
9281
|
+
"model_ability": [
|
|
9282
|
+
"chat",
|
|
9283
|
+
"vision"
|
|
9284
|
+
],
|
|
9285
|
+
"model_description": "The CogAgent-9B-20241220 model is based on GLM-4V-9B, a bilingual open-source VLM base model. Through data collection and optimization, multi-stage training, and strategy improvements, CogAgent-9B-20241220 achieves significant advancements in GUI perception, inference prediction accuracy, action space completeness, and task generalizability. ",
|
|
9286
|
+
"model_specs": [
|
|
9287
|
+
{
|
|
9288
|
+
"model_format": "pytorch",
|
|
9289
|
+
"model_size_in_billions": "9",
|
|
9290
|
+
"quantizations": [
|
|
9291
|
+
"4-bit",
|
|
9292
|
+
"8-bit",
|
|
9293
|
+
"none"
|
|
9294
|
+
],
|
|
9295
|
+
"model_id": "THUDM/cogagent-9b-20241220"
|
|
9296
|
+
}
|
|
9297
|
+
],
|
|
9298
|
+
"chat_template": "{% for message in messages %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
|
|
9299
|
+
"stop_token_ids": [
|
|
9300
|
+
151329,
|
|
9301
|
+
151336,
|
|
9302
|
+
151338
|
|
9303
|
+
],
|
|
9304
|
+
"stop": [
|
|
9305
|
+
"<|endoftext|>",
|
|
9306
|
+
"<|user|>",
|
|
9307
|
+
"<|observation|>"
|
|
9308
|
+
]
|
|
8347
9309
|
}
|
|
8348
9310
|
]
|