xinference 0.16.3__py3-none-any.whl → 1.2.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of xinference might be problematic. Click here for more details.
- xinference/_compat.py +24 -2
- xinference/_version.py +3 -3
- xinference/api/restful_api.py +219 -77
- xinference/client/restful/restful_client.py +47 -2
- xinference/constants.py +1 -0
- xinference/core/chat_interface.py +6 -1
- xinference/core/model.py +124 -34
- xinference/core/supervisor.py +180 -12
- xinference/core/utils.py +73 -4
- xinference/core/worker.py +102 -4
- xinference/deploy/cmdline.py +3 -1
- xinference/deploy/test/test_cmdline.py +56 -0
- xinference/isolation.py +24 -0
- xinference/model/audio/__init__.py +12 -0
- xinference/model/audio/core.py +37 -4
- xinference/model/audio/cosyvoice.py +39 -6
- xinference/model/audio/f5tts.py +200 -0
- xinference/model/audio/f5tts_mlx.py +260 -0
- xinference/model/audio/fish_speech.py +70 -110
- xinference/model/audio/melotts.py +110 -0
- xinference/model/audio/model_spec.json +179 -3
- xinference/model/audio/model_spec_modelscope.json +27 -0
- xinference/model/audio/utils.py +32 -0
- xinference/model/audio/whisper.py +35 -10
- xinference/model/audio/whisper_mlx.py +208 -0
- xinference/model/embedding/core.py +322 -6
- xinference/model/embedding/model_spec.json +8 -1
- xinference/model/embedding/model_spec_modelscope.json +9 -1
- xinference/model/image/core.py +69 -1
- xinference/model/image/model_spec.json +145 -4
- xinference/model/image/model_spec_modelscope.json +150 -4
- xinference/model/image/stable_diffusion/core.py +50 -15
- xinference/model/llm/__init__.py +6 -2
- xinference/model/llm/llm_family.json +1055 -93
- xinference/model/llm/llm_family.py +15 -36
- xinference/model/llm/llm_family_modelscope.json +1031 -78
- xinference/model/llm/memory.py +1 -1
- xinference/model/llm/mlx/core.py +285 -47
- xinference/model/llm/sglang/core.py +2 -0
- xinference/model/llm/transformers/chatglm.py +9 -5
- xinference/model/llm/transformers/cogagent.py +272 -0
- xinference/model/llm/transformers/core.py +3 -0
- xinference/model/llm/transformers/glm_edge_v.py +230 -0
- xinference/model/llm/transformers/qwen2_vl.py +12 -1
- xinference/model/llm/transformers/utils.py +16 -8
- xinference/model/llm/utils.py +55 -4
- xinference/model/llm/vllm/core.py +137 -12
- xinference/model/llm/vllm/xavier/__init__.py +13 -0
- xinference/model/llm/vllm/xavier/allocator.py +74 -0
- xinference/model/llm/vllm/xavier/block.py +111 -0
- xinference/model/llm/vllm/xavier/block_manager.py +71 -0
- xinference/model/llm/vllm/xavier/block_tracker.py +129 -0
- xinference/model/llm/vllm/xavier/collective.py +74 -0
- xinference/model/llm/vllm/xavier/collective_manager.py +147 -0
- xinference/model/llm/vllm/xavier/engine.py +247 -0
- xinference/model/llm/vllm/xavier/executor.py +134 -0
- xinference/model/llm/vllm/xavier/scheduler.py +438 -0
- xinference/model/llm/vllm/xavier/test/__init__.py +13 -0
- xinference/model/llm/vllm/xavier/test/test_xavier.py +147 -0
- xinference/model/llm/vllm/xavier/transfer.py +319 -0
- xinference/model/rerank/core.py +11 -4
- xinference/model/video/diffusers.py +14 -0
- xinference/model/video/model_spec.json +15 -0
- xinference/model/video/model_spec_modelscope.json +16 -0
- xinference/thirdparty/cosyvoice/bin/average_model.py +92 -0
- xinference/thirdparty/cosyvoice/bin/export_jit.py +12 -2
- xinference/thirdparty/cosyvoice/bin/export_onnx.py +112 -0
- xinference/thirdparty/cosyvoice/bin/export_trt.sh +9 -0
- xinference/thirdparty/cosyvoice/bin/inference.py +5 -7
- xinference/thirdparty/cosyvoice/bin/spk2info.pt +0 -0
- xinference/thirdparty/cosyvoice/bin/train.py +42 -8
- xinference/thirdparty/cosyvoice/cli/cosyvoice.py +96 -25
- xinference/thirdparty/cosyvoice/cli/frontend.py +77 -30
- xinference/thirdparty/cosyvoice/cli/model.py +330 -80
- xinference/thirdparty/cosyvoice/dataset/dataset.py +6 -2
- xinference/thirdparty/cosyvoice/dataset/processor.py +76 -14
- xinference/thirdparty/cosyvoice/flow/decoder.py +92 -13
- xinference/thirdparty/cosyvoice/flow/flow.py +99 -9
- xinference/thirdparty/cosyvoice/flow/flow_matching.py +110 -13
- xinference/thirdparty/cosyvoice/flow/length_regulator.py +5 -4
- xinference/thirdparty/cosyvoice/hifigan/discriminator.py +140 -0
- xinference/thirdparty/cosyvoice/hifigan/generator.py +58 -42
- xinference/thirdparty/cosyvoice/hifigan/hifigan.py +67 -0
- xinference/thirdparty/cosyvoice/llm/llm.py +139 -6
- xinference/thirdparty/cosyvoice/tokenizer/assets/multilingual_zh_ja_yue_char_del.tiktoken +58836 -0
- xinference/thirdparty/cosyvoice/tokenizer/tokenizer.py +279 -0
- xinference/thirdparty/cosyvoice/transformer/embedding.py +2 -2
- xinference/thirdparty/cosyvoice/transformer/encoder_layer.py +7 -7
- xinference/thirdparty/cosyvoice/transformer/upsample_encoder.py +318 -0
- xinference/thirdparty/cosyvoice/utils/common.py +28 -1
- xinference/thirdparty/cosyvoice/utils/executor.py +69 -7
- xinference/thirdparty/cosyvoice/utils/file_utils.py +2 -12
- xinference/thirdparty/cosyvoice/utils/frontend_utils.py +9 -5
- xinference/thirdparty/cosyvoice/utils/losses.py +20 -0
- xinference/thirdparty/cosyvoice/utils/scheduler.py +1 -2
- xinference/thirdparty/cosyvoice/utils/train_utils.py +101 -45
- xinference/thirdparty/f5_tts/api.py +166 -0
- xinference/thirdparty/f5_tts/configs/E2TTS_Base_train.yaml +44 -0
- xinference/thirdparty/f5_tts/configs/E2TTS_Small_train.yaml +44 -0
- xinference/thirdparty/f5_tts/configs/F5TTS_Base_train.yaml +46 -0
- xinference/thirdparty/f5_tts/configs/F5TTS_Small_train.yaml +46 -0
- xinference/thirdparty/f5_tts/eval/README.md +49 -0
- xinference/thirdparty/f5_tts/eval/ecapa_tdnn.py +330 -0
- xinference/thirdparty/f5_tts/eval/eval_infer_batch.py +207 -0
- xinference/thirdparty/f5_tts/eval/eval_infer_batch.sh +13 -0
- xinference/thirdparty/f5_tts/eval/eval_librispeech_test_clean.py +84 -0
- xinference/thirdparty/f5_tts/eval/eval_seedtts_testset.py +84 -0
- xinference/thirdparty/f5_tts/eval/utils_eval.py +405 -0
- xinference/thirdparty/f5_tts/infer/README.md +191 -0
- xinference/thirdparty/f5_tts/infer/SHARED.md +74 -0
- xinference/thirdparty/f5_tts/infer/examples/basic/basic.toml +11 -0
- xinference/thirdparty/f5_tts/infer/examples/basic/basic_ref_en.wav +0 -0
- xinference/thirdparty/f5_tts/infer/examples/basic/basic_ref_zh.wav +0 -0
- xinference/thirdparty/f5_tts/infer/examples/multi/country.flac +0 -0
- xinference/thirdparty/f5_tts/infer/examples/multi/main.flac +0 -0
- xinference/thirdparty/f5_tts/infer/examples/multi/story.toml +19 -0
- xinference/thirdparty/f5_tts/infer/examples/multi/story.txt +1 -0
- xinference/thirdparty/f5_tts/infer/examples/multi/town.flac +0 -0
- xinference/thirdparty/f5_tts/infer/examples/vocab.txt +2545 -0
- xinference/thirdparty/f5_tts/infer/infer_cli.py +226 -0
- xinference/thirdparty/f5_tts/infer/infer_gradio.py +851 -0
- xinference/thirdparty/f5_tts/infer/speech_edit.py +193 -0
- xinference/thirdparty/f5_tts/infer/utils_infer.py +538 -0
- xinference/thirdparty/f5_tts/model/__init__.py +10 -0
- xinference/thirdparty/f5_tts/model/backbones/README.md +20 -0
- xinference/thirdparty/f5_tts/model/backbones/dit.py +163 -0
- xinference/thirdparty/f5_tts/model/backbones/mmdit.py +146 -0
- xinference/thirdparty/f5_tts/model/backbones/unett.py +219 -0
- xinference/thirdparty/f5_tts/model/cfm.py +285 -0
- xinference/thirdparty/f5_tts/model/dataset.py +319 -0
- xinference/thirdparty/f5_tts/model/modules.py +658 -0
- xinference/thirdparty/f5_tts/model/trainer.py +366 -0
- xinference/thirdparty/f5_tts/model/utils.py +185 -0
- xinference/thirdparty/f5_tts/scripts/count_max_epoch.py +33 -0
- xinference/thirdparty/f5_tts/scripts/count_params_gflops.py +39 -0
- xinference/thirdparty/f5_tts/socket_server.py +159 -0
- xinference/thirdparty/f5_tts/train/README.md +77 -0
- xinference/thirdparty/f5_tts/train/datasets/prepare_csv_wavs.py +139 -0
- xinference/thirdparty/f5_tts/train/datasets/prepare_emilia.py +230 -0
- xinference/thirdparty/f5_tts/train/datasets/prepare_libritts.py +92 -0
- xinference/thirdparty/f5_tts/train/datasets/prepare_ljspeech.py +65 -0
- xinference/thirdparty/f5_tts/train/datasets/prepare_wenetspeech4tts.py +125 -0
- xinference/thirdparty/f5_tts/train/finetune_cli.py +174 -0
- xinference/thirdparty/f5_tts/train/finetune_gradio.py +1846 -0
- xinference/thirdparty/f5_tts/train/train.py +75 -0
- xinference/thirdparty/fish_speech/fish_speech/conversation.py +266 -1
- xinference/thirdparty/fish_speech/fish_speech/i18n/locale/en_US.json +2 -1
- xinference/thirdparty/fish_speech/fish_speech/i18n/locale/es_ES.json +2 -1
- xinference/thirdparty/fish_speech/fish_speech/i18n/locale/ja_JP.json +2 -2
- xinference/thirdparty/fish_speech/fish_speech/i18n/locale/ko_KR.json +123 -0
- xinference/thirdparty/fish_speech/fish_speech/i18n/locale/zh_CN.json +2 -1
- xinference/thirdparty/fish_speech/fish_speech/models/text2semantic/llama.py +137 -29
- xinference/thirdparty/fish_speech/fish_speech/models/vqgan/modules/firefly.py +9 -9
- xinference/thirdparty/fish_speech/fish_speech/models/vqgan/modules/fsq.py +1 -1
- xinference/thirdparty/fish_speech/fish_speech/text/clean.py +17 -11
- xinference/thirdparty/fish_speech/fish_speech/text/spliter.py +1 -1
- xinference/thirdparty/fish_speech/fish_speech/tokenizer.py +152 -0
- xinference/thirdparty/fish_speech/fish_speech/train.py +2 -2
- xinference/thirdparty/fish_speech/fish_speech/utils/__init__.py +2 -1
- xinference/thirdparty/fish_speech/fish_speech/utils/utils.py +22 -0
- xinference/thirdparty/fish_speech/fish_speech/webui/launch_utils.py +1 -1
- xinference/thirdparty/fish_speech/fish_speech/webui/manage.py +2 -2
- xinference/thirdparty/fish_speech/tools/{post_api.py → api_client.py} +34 -18
- xinference/thirdparty/fish_speech/tools/api_server.py +98 -0
- xinference/thirdparty/fish_speech/tools/download_models.py +5 -5
- xinference/thirdparty/fish_speech/tools/e2e_webui.py +232 -0
- xinference/thirdparty/fish_speech/tools/fish_e2e.py +298 -0
- xinference/thirdparty/fish_speech/tools/inference_engine/__init__.py +192 -0
- xinference/thirdparty/fish_speech/tools/inference_engine/reference_loader.py +125 -0
- xinference/thirdparty/fish_speech/tools/inference_engine/utils.py +39 -0
- xinference/thirdparty/fish_speech/tools/inference_engine/vq_manager.py +57 -0
- xinference/thirdparty/fish_speech/tools/llama/eval_in_context.py +2 -2
- xinference/thirdparty/fish_speech/tools/llama/generate.py +484 -72
- xinference/thirdparty/fish_speech/tools/run_webui.py +104 -0
- xinference/thirdparty/fish_speech/tools/schema.py +170 -0
- xinference/thirdparty/fish_speech/tools/server/agent/__init__.py +57 -0
- xinference/thirdparty/fish_speech/tools/server/agent/generate.py +119 -0
- xinference/thirdparty/fish_speech/tools/server/agent/generation_utils.py +122 -0
- xinference/thirdparty/fish_speech/tools/server/agent/pre_generation_utils.py +72 -0
- xinference/thirdparty/fish_speech/tools/server/api_utils.py +75 -0
- xinference/thirdparty/fish_speech/tools/server/exception_handler.py +27 -0
- xinference/thirdparty/fish_speech/tools/server/inference.py +45 -0
- xinference/thirdparty/fish_speech/tools/server/model_manager.py +122 -0
- xinference/thirdparty/fish_speech/tools/server/model_utils.py +129 -0
- xinference/thirdparty/fish_speech/tools/server/views.py +246 -0
- xinference/thirdparty/fish_speech/tools/vqgan/extract_vq.py +7 -1
- xinference/thirdparty/fish_speech/tools/vqgan/inference.py +2 -3
- xinference/thirdparty/fish_speech/tools/webui/__init__.py +173 -0
- xinference/thirdparty/fish_speech/tools/webui/inference.py +91 -0
- xinference/thirdparty/fish_speech/tools/webui/variables.py +14 -0
- xinference/thirdparty/matcha/utils/utils.py +2 -2
- xinference/thirdparty/melo/api.py +135 -0
- xinference/thirdparty/melo/app.py +61 -0
- xinference/thirdparty/melo/attentions.py +459 -0
- xinference/thirdparty/melo/commons.py +160 -0
- xinference/thirdparty/melo/configs/config.json +94 -0
- xinference/thirdparty/melo/data/example/metadata.list +20 -0
- xinference/thirdparty/melo/data_utils.py +413 -0
- xinference/thirdparty/melo/download_utils.py +67 -0
- xinference/thirdparty/melo/infer.py +25 -0
- xinference/thirdparty/melo/init_downloads.py +14 -0
- xinference/thirdparty/melo/losses.py +58 -0
- xinference/thirdparty/melo/main.py +36 -0
- xinference/thirdparty/melo/mel_processing.py +174 -0
- xinference/thirdparty/melo/models.py +1030 -0
- xinference/thirdparty/melo/modules.py +598 -0
- xinference/thirdparty/melo/monotonic_align/__init__.py +16 -0
- xinference/thirdparty/melo/monotonic_align/core.py +46 -0
- xinference/thirdparty/melo/preprocess_text.py +135 -0
- xinference/thirdparty/melo/split_utils.py +174 -0
- xinference/thirdparty/melo/text/__init__.py +35 -0
- xinference/thirdparty/melo/text/chinese.py +199 -0
- xinference/thirdparty/melo/text/chinese_bert.py +107 -0
- xinference/thirdparty/melo/text/chinese_mix.py +253 -0
- xinference/thirdparty/melo/text/cleaner.py +36 -0
- xinference/thirdparty/melo/text/cleaner_multiling.py +110 -0
- xinference/thirdparty/melo/text/cmudict.rep +129530 -0
- xinference/thirdparty/melo/text/cmudict_cache.pickle +0 -0
- xinference/thirdparty/melo/text/english.py +284 -0
- xinference/thirdparty/melo/text/english_bert.py +39 -0
- xinference/thirdparty/melo/text/english_utils/abbreviations.py +35 -0
- xinference/thirdparty/melo/text/english_utils/number_norm.py +97 -0
- xinference/thirdparty/melo/text/english_utils/time_norm.py +47 -0
- xinference/thirdparty/melo/text/es_phonemizer/base.py +140 -0
- xinference/thirdparty/melo/text/es_phonemizer/cleaner.py +109 -0
- xinference/thirdparty/melo/text/es_phonemizer/es_symbols.json +79 -0
- xinference/thirdparty/melo/text/es_phonemizer/es_symbols.txt +1 -0
- xinference/thirdparty/melo/text/es_phonemizer/es_symbols_v2.json +83 -0
- xinference/thirdparty/melo/text/es_phonemizer/es_to_ipa.py +12 -0
- xinference/thirdparty/melo/text/es_phonemizer/example_ipa.txt +400 -0
- xinference/thirdparty/melo/text/es_phonemizer/gruut_wrapper.py +253 -0
- xinference/thirdparty/melo/text/es_phonemizer/punctuation.py +174 -0
- xinference/thirdparty/melo/text/es_phonemizer/spanish_symbols.txt +1 -0
- xinference/thirdparty/melo/text/es_phonemizer/test.ipynb +124 -0
- xinference/thirdparty/melo/text/fr_phonemizer/base.py +140 -0
- xinference/thirdparty/melo/text/fr_phonemizer/cleaner.py +122 -0
- xinference/thirdparty/melo/text/fr_phonemizer/en_symbols.json +78 -0
- xinference/thirdparty/melo/text/fr_phonemizer/example_ipa.txt +1 -0
- xinference/thirdparty/melo/text/fr_phonemizer/fr_symbols.json +89 -0
- xinference/thirdparty/melo/text/fr_phonemizer/fr_to_ipa.py +30 -0
- xinference/thirdparty/melo/text/fr_phonemizer/french_abbreviations.py +48 -0
- xinference/thirdparty/melo/text/fr_phonemizer/french_symbols.txt +1 -0
- xinference/thirdparty/melo/text/fr_phonemizer/gruut_wrapper.py +258 -0
- xinference/thirdparty/melo/text/fr_phonemizer/punctuation.py +172 -0
- xinference/thirdparty/melo/text/french.py +94 -0
- xinference/thirdparty/melo/text/french_bert.py +39 -0
- xinference/thirdparty/melo/text/japanese.py +647 -0
- xinference/thirdparty/melo/text/japanese_bert.py +49 -0
- xinference/thirdparty/melo/text/ko_dictionary.py +44 -0
- xinference/thirdparty/melo/text/korean.py +192 -0
- xinference/thirdparty/melo/text/opencpop-strict.txt +429 -0
- xinference/thirdparty/melo/text/spanish.py +122 -0
- xinference/thirdparty/melo/text/spanish_bert.py +39 -0
- xinference/thirdparty/melo/text/symbols.py +290 -0
- xinference/thirdparty/melo/text/tone_sandhi.py +769 -0
- xinference/thirdparty/melo/train.py +635 -0
- xinference/thirdparty/melo/train.sh +19 -0
- xinference/thirdparty/melo/transforms.py +209 -0
- xinference/thirdparty/melo/utils.py +424 -0
- xinference/types.py +17 -1
- xinference/web/ui/build/asset-manifest.json +6 -6
- xinference/web/ui/build/index.html +1 -1
- xinference/web/ui/build/static/css/main.51a587ff.css +2 -0
- xinference/web/ui/build/static/css/main.51a587ff.css.map +1 -0
- xinference/web/ui/build/static/js/main.b0936c54.js +3 -0
- xinference/web/ui/build/static/js/main.b0936c54.js.map +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/03c4052f1b91f6ba0c5389bdcf49c43319b4076c08e4b8585dab312538ae290a.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/1786b83003b8e9605a0f5f855a185d4d16e38fc893dfb326a2a9cca206b4240a.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/17cbc181dd674b9150b80c73ed6a82656de0082d857f6e5f66d9716129ac0b38.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/185ceb8872d562e032b47e79df6a45670e06345b8ed70aad1a131e0476783c5c.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/26b8c9f34b0bed789b3a833767672e39302d1e0c09b4276f4d58d1df7b6bd93b.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/2b484da66c724d0d56a40849c109327408796a668b1381511b6e9e03baa48658.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/2cbbbce9b84df73330d4c42b82436ed881b3847628f2fbc346aa62e2859fd88c.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/2ec9b14431ed33ce6901bf9f27007be4e6e472709c99d6e22b50ce528e4b78ee.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/3b966db018f96be4a055d6ca205f0990d4d0b370e2980c17d8bca2c9a021819c.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/3eefb411b24c2b3ce053570ef50daccf154022f0e168be5ed0fec21394baf9f4.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/522b229e3cac219123f0d69673f5570e191c2d2a505dc65b312d336eae2279c0.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/52e45f17ba300580ea3fcc9f9228ccba194bb092b76f25e9255af311f8b05aab.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/5a0bc4631f936459afc1a3b1d3ec2420118b1f00e11f60ccac3e08088f3f27a8.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/611fa2c6c53b66039991d06dfb0473b5ab37fc63b4564e0f6e1718523768a045.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/6329bc76c406fe5eb305412383fbde5950f847bb5e43261f73f37622c365acb4.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/63c8e07687ea53a4f8a910ee5e42e0eb26cd1acbfbe820f3e3248a786ee51401.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/69b2d5001684174ec9da57e07914eed3eac4960018bceb6cbfa801d861301d7c.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/710c1acda69e561e30a933b98c6a56d50197868b15c21e2aad55ab6d46649eb6.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/720deca1fce5a1dc5056048fa8258fd138a82ea855f350b6613f104a73fb761f.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/76a23b92d26a499c57e61eea2b895fbc9771bd0849a72e66f8e633192017978b.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/858063f23b34dfe600254eb5afd85518b0002ec4b30b7386616c45600826e3b2.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/920b82c1c89124cf217109eeedbfcd3aae3b917be50c9dfb6bbb4ce26bdfd2e7.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/94d8b7aeb0076f2ce07db598cea0e87b13bc8d5614eb530b8d6e696c2daf6f88.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/9e917fe7022d01b2ccbe5cc0ce73d70bb72bee584ff293bad71bdff6695dee28.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/9f28fdb8399f1d0474f0aca86f1658dc94f5bf0c90f6146352de150692de8862.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/a0dfafa06b2bb7cba8cad41c482503f61944f759f4318139362602ef5cc47ccb.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/a3ff866acddf34917a7ee399e0e571a4dfd8ba66d5057db885f243e16a6eb17d.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/afb8084f539534cd594755ea2205ecd5bd1f62dddcfdf75a2eace59a28131278.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/b57b1438b77294c1f3f6cfce12ac487d8106c6f016975ba0aec94d98997e2e1e.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/b9917b0bf8e4d55ccbac1c334aa04d6ff3c5b6ed9e5d38b9ea2c687fa7d3f5a9.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/bbcc94b0149963d1d6f267ee1f4f03d3925b758392ce2f516c3fe8af0e0169fc.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/bdee44abeadc4abc17d41c52eb49c6e19a4b1a267b6e16876ce91bdeeebfc52d.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/beb112b70f4a56db95920a9e20efb6c97c37b68450716730217a9ee1a9ae92be.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/c88db97be0cdf440193b3995996e83510a04cb00048135485fc0e26d197e80b5.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/d49e5314d34310a62d01a03067ce1bec5da00abce84c5196aa9c6842fa79a430.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/d7664d18c4ddbad9c3a6a31b91f7c00fb0dde804608674a9860ee50f33e54708.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/d9072c318b819b7c90a0f7e9cc0b6413b4dbeb8e9859898e53d75ea882fcde99.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/db16a983bc08a05f0439cc61ca0840e49e1d8400eef678909f16c032a418a3d6.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/dc249829767b8abcbc3677e0b07b6d3ecbfdfe6d08cfe23a665eb33373a9aa9d.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/e242c583c2dbc2784f0fcf513523975f7d5df447e106c1c17e49e8578a6fc3ed.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/eac5f1296513e69e4b96f750ddccd4d0264e2bae4e4c449144e83274a48698d9.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/ed57202cb79649bb716400436590245547df241988fc7c8e1d85d132299542d2.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/f125bf72e773a14cdaebd0c343e80adb909d12e317ee5c00cd4a57442fbe2c62.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/f91af913d7f91c410719ab13136aaed3aaf0f8dda06652f25c42cb5231587398.json +1 -0
- xinference/web/ui/node_modules/.package-lock.json +67 -3
- xinference/web/ui/node_modules/@babel/runtime/package.json +592 -538
- xinference/web/ui/node_modules/html-parse-stringify/package.json +50 -0
- xinference/web/ui/node_modules/i18next/dist/esm/package.json +1 -0
- xinference/web/ui/node_modules/i18next/package.json +129 -0
- xinference/web/ui/node_modules/react-i18next/.eslintrc.json +74 -0
- xinference/web/ui/node_modules/react-i18next/dist/es/package.json +1 -0
- xinference/web/ui/node_modules/react-i18next/package.json +162 -0
- xinference/web/ui/node_modules/void-elements/package.json +34 -0
- xinference/web/ui/package-lock.json +69 -3
- xinference/web/ui/package.json +2 -0
- xinference/web/ui/src/locales/en.json +186 -0
- xinference/web/ui/src/locales/zh.json +186 -0
- {xinference-0.16.3.dist-info → xinference-1.2.1.dist-info}/METADATA +96 -36
- {xinference-0.16.3.dist-info → xinference-1.2.1.dist-info}/RECORD +335 -146
- {xinference-0.16.3.dist-info → xinference-1.2.1.dist-info}/WHEEL +1 -1
- xinference/thirdparty/cosyvoice/bin/export_trt.py +0 -8
- xinference/thirdparty/fish_speech/fish_speech/configs/lora/__init__.py +0 -0
- xinference/thirdparty/fish_speech/fish_speech/datasets/__init__.py +0 -0
- xinference/thirdparty/fish_speech/fish_speech/datasets/protos/__init__.py +0 -0
- xinference/thirdparty/fish_speech/fish_speech/i18n/locale/__init__.py +0 -0
- xinference/thirdparty/fish_speech/fish_speech/models/__init__.py +0 -0
- xinference/thirdparty/fish_speech/fish_speech/models/vqgan/modules/__init__.py +0 -0
- xinference/thirdparty/fish_speech/fish_speech/webui/__init__.py +0 -0
- xinference/thirdparty/fish_speech/tools/__init__.py +0 -0
- xinference/thirdparty/fish_speech/tools/api.py +0 -440
- xinference/thirdparty/fish_speech/tools/commons.py +0 -35
- xinference/thirdparty/fish_speech/tools/llama/__init__.py +0 -0
- xinference/thirdparty/fish_speech/tools/msgpack_api.py +0 -34
- xinference/thirdparty/fish_speech/tools/vqgan/__init__.py +0 -0
- xinference/thirdparty/fish_speech/tools/webui.py +0 -485
- xinference/web/ui/build/static/css/main.5061c4c3.css +0 -2
- xinference/web/ui/build/static/css/main.5061c4c3.css.map +0 -1
- xinference/web/ui/build/static/js/main.2f269bb3.js +0 -3
- xinference/web/ui/build/static/js/main.2f269bb3.js.map +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/07ce9e632e6aff24d7aa3ad8e48224433bbfeb0d633fca723453f1fcae0c9f1c.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/1130403f9e46f5738a23b45ac59b57de8f360c908c713e2c0670c2cce9bd367a.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/131091b25d26b17cdca187d7542a21475c211138d900cf667682260e76ef9463.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/1f269fb2a368363c1cb2237825f1dba093b6bdd8c44cc05954fd19ec2c1fff03.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/331312668fa8bd3d7401818f4a25fa98135d7f61371cd6bfff78b18cf4fbdd92.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/40f17338fc75ae095de7d2b4d8eae0d5ca0193a7e2bcece4ee745b22a7a2f4b7.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/4de9a6942c5f1749d6cbfdd54279699975f16016b182848bc253886f52ec2ec3.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/822586ed1077201b64b954f12f25e3f9b45678c1acbabe53d8af3ca82ca71f33.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/8d33354bd2100c8602afc3341f131a88cc36aaeecd5a4b365ed038514708e350.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/9375a35b05d56989b2755bf72161fa707c92f28569d33765a75f91a568fda6e9.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/a158a9ffa0c9b169aee53dd4a0c44501a596755b4e4f6ede7746d65a72e2a71f.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/bd6ad8159341315a1764c397621a560809f7eb7219ab5174c801fca7e969d943.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/c7bf40bab396765f67d0fed627ed3665890608b2d0edaa3e8cb7cfc96310db45.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/d6c643278a0b28320e6f33a60f5fb64c053997cbdc39a60e53ccc574688ade9e.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/e42b72d4cc1ea412ebecbb8d040dc6c6bfee462c33903c2f1f3facb602ad742e.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/e64b7e8cedcf43d4c95deba60ec1341855c887705805bb62431693118b870c69.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/f5039ddbeb815c51491a1989532006b96fc3ae49c6c60e3c097f875b4ae915ae.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/f72f011744c4649fabddca6f7a9327861ac0a315a89b1a2e62a39774e7863845.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/feabb04b4aa507102da0a64398a40818e878fd1df9b75dda8461b3e1e7ff3f11.json +0 -1
- /xinference/thirdparty/{cosyvoice/bin → f5_tts}/__init__.py +0 -0
- /xinference/thirdparty/{cosyvoice/flow → melo}/__init__.py +0 -0
- /xinference/thirdparty/{cosyvoice/hifigan → melo/text/english_utils}/__init__.py +0 -0
- /xinference/thirdparty/{cosyvoice/llm → melo/text/es_phonemizer}/__init__.py +0 -0
- /xinference/thirdparty/{fish_speech/fish_speech/configs → melo/text/fr_phonemizer}/__init__.py +0 -0
- /xinference/web/ui/build/static/js/{main.2f269bb3.js.LICENSE.txt → main.b0936c54.js.LICENSE.txt} +0 -0
- {xinference-0.16.3.dist-info → xinference-1.2.1.dist-info}/LICENSE +0 -0
- {xinference-0.16.3.dist-info → xinference-1.2.1.dist-info}/entry_points.txt +0 -0
- {xinference-0.16.3.dist-info → xinference-1.2.1.dist-info}/top_level.txt +0 -0
|
@@ -454,6 +454,72 @@
|
|
|
454
454
|
}
|
|
455
455
|
]
|
|
456
456
|
},
|
|
457
|
+
{
|
|
458
|
+
"version": 1,
|
|
459
|
+
"context_length": 131072,
|
|
460
|
+
"model_name": "llama-3.3-instruct",
|
|
461
|
+
"model_lang": [
|
|
462
|
+
"en",
|
|
463
|
+
"de",
|
|
464
|
+
"fr",
|
|
465
|
+
"it",
|
|
466
|
+
"pt",
|
|
467
|
+
"hi",
|
|
468
|
+
"es",
|
|
469
|
+
"th"
|
|
470
|
+
],
|
|
471
|
+
"model_ability": [
|
|
472
|
+
"chat",
|
|
473
|
+
"tools"
|
|
474
|
+
],
|
|
475
|
+
"model_description": "The Llama 3.3 instruction tuned models are optimized for dialogue use cases and outperform many of the available open source chat models on common industry benchmarks..",
|
|
476
|
+
"model_specs": [
|
|
477
|
+
{
|
|
478
|
+
"model_format": "pytorch",
|
|
479
|
+
"model_size_in_billions": 70,
|
|
480
|
+
"quantizations": [
|
|
481
|
+
"none"
|
|
482
|
+
],
|
|
483
|
+
"model_id": "LLM-Research/Llama-3.3-70B-Instruct",
|
|
484
|
+
"model_hub": "modelscope"
|
|
485
|
+
},
|
|
486
|
+
{
|
|
487
|
+
"model_format": "ggufv2",
|
|
488
|
+
"model_size_in_billions": 70,
|
|
489
|
+
"quantizations": [
|
|
490
|
+
"Q3_K_L",
|
|
491
|
+
"Q4_K_M",
|
|
492
|
+
"Q6_K",
|
|
493
|
+
"Q8_0"
|
|
494
|
+
],
|
|
495
|
+
"quantization_parts": {
|
|
496
|
+
"Q6_K": [
|
|
497
|
+
"00001-of-00002",
|
|
498
|
+
"00002-of-00002"
|
|
499
|
+
],
|
|
500
|
+
"Q8_0": [
|
|
501
|
+
"00001-of-00002",
|
|
502
|
+
"00002-of-00002"
|
|
503
|
+
]
|
|
504
|
+
},
|
|
505
|
+
"model_id": "lmstudio-community/Llama-3.3-70B-Instruct-GGUF",
|
|
506
|
+
"model_file_name_template": "Llama-3.3-70B-Instruct-{quantization}.gguf",
|
|
507
|
+
"model_file_name_split_template": "Llama-3.3-70B-Instruct-{quantization}-{part}.gguf",
|
|
508
|
+
"model_hub": "modelscope"
|
|
509
|
+
}
|
|
510
|
+
],
|
|
511
|
+
"chat_template": "{{- bos_token }}\n{%- if custom_tools is defined %}\n {%- set tools = custom_tools %}\n{%- endif %}\n{%- if not tools_in_user_message is defined %}\n {%- set tools_in_user_message = true %}\n{%- endif %}\n{%- if not date_string is defined %}\n {%- set date_string = \"26 Jul 2024\" %}\n{%- endif %}\n{%- if not tools is defined %}\n {%- set tools = none %}\n{%- endif %}\n\n{#- This block extracts the system message, so we can slot it into the right place. #}\n{%- if messages[0]['role'] == 'system' %}\n {%- set system_message = messages[0]['content']|trim %}\n {%- set messages = messages[1:] %}\n{%- else %}\n {%- set system_message = \"\" %}\n{%- endif %}\n\n{#- System message + builtin tools #}\n{{- \"<|start_header_id|>system<|end_header_id|>\\n\\n\" }}\n{%- if builtin_tools is defined or tools is not none %}\n {{- \"Environment: ipython\\n\" }}\n{%- endif %}\n{%- if builtin_tools is defined %}\n {{- \"Tools: \" + builtin_tools | reject('equalto', 'code_interpreter') | join(\", \") + \"\\n\\n\"}}\n{%- endif %}\n{{- \"Cutting Knowledge Date: December 2023\\n\" }}\n{{- \"Today Date: \" + date_string + \"\\n\\n\" }}\n{%- if tools is not none and not tools_in_user_message %}\n {{- \"You have access to the following functions. To call a function, please respond with JSON for a function call.\" }}\n {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n {{- \"Do not use variables.\\n\\n\" }}\n {%- for t in tools %}\n {{- t | tojson(indent=4) }}\n {{- \"\\n\\n\" }}\n {%- endfor %}\n{%- endif %}\n{{- system_message }}\n{{- \"<|eot_id|>\" }}\n\n{#- Custom tools are passed in a user message with some extra guidance #}\n{%- if tools_in_user_message and not tools is none %}\n {#- Extract the first user message so we can plug it in here #}\n {%- if messages | length != 0 %}\n {%- set first_user_message = messages[0]['content']|trim %}\n {%- set messages = messages[1:] %}\n {%- else %}\n {{- raise_exception(\"Cannot put tools in the first user message when there's no first user message!\") }}\n{%- endif %}\n {{- '<|start_header_id|>user<|end_header_id|>\\n\\n' -}}\n {{- \"Given the following functions, please respond with a JSON for a function call \" }}\n {{- \"with its proper arguments that best answers the given prompt.\\n\\n\" }}\n {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n {{- \"Do not use variables.\\n\\n\" }}\n {%- for t in tools %}\n {{- t | tojson(indent=4) }}\n {{- \"\\n\\n\" }}\n {%- endfor %}\n {{- first_user_message + \"<|eot_id|>\"}}\n{%- endif %}\n\n{%- for message in messages %}\n {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %}\n {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\\n\\n'+ message['content'] | trim + '<|eot_id|>' }}\n {%- elif 'tool_calls' in message %}\n {%- if not message.tool_calls|length == 1 %}\n {{- raise_exception(\"This model only supports single tool-calls at once!\") }}\n {%- endif %}\n {%- set tool_call = message.tool_calls[0].function %}\n {%- if builtin_tools is defined and tool_call.name in builtin_tools %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' -}}\n {{- \"<|python_tag|>\" + tool_call.name + \".call(\" }}\n {%- for arg_name, arg_val in tool_call.arguments | items %}\n {{- arg_name + '=\"' + arg_val + '\"' }}\n {%- if not loop.last %}\n {{- \", \" }}\n {%- endif %}\n {%- endfor %}\n {{- \")\" }}\n {%- else %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' -}}\n {{- '{\"name\": \"' + tool_call.name + '\", ' }}\n {{- '\"parameters\": ' }}\n {{- tool_call.arguments | tojson }}\n {{- \"}\" }}\n {%- endif %}\n {%- if builtin_tools is defined %}\n {#- This means we're in ipython mode #}\n {{- \"<|eom_id|>\" }}\n {%- else %}\n {{- \"<|eot_id|>\" }}\n {%- endif %}\n {%- elif message.role == \"tool\" or message.role == \"ipython\" %}\n {{- \"<|start_header_id|>ipython<|end_header_id|>\\n\\n\" }}\n {%- if message.content is mapping or message.content is iterable %}\n {{- message.content | tojson }}\n {%- else %}\n {{- message.content }}\n {%- endif %}\n {{- \"<|eot_id|>\" }}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' }}\n{%- endif %}\n",
|
|
512
|
+
"stop_token_ids": [
|
|
513
|
+
128001,
|
|
514
|
+
128008,
|
|
515
|
+
128009
|
|
516
|
+
],
|
|
517
|
+
"stop": [
|
|
518
|
+
"<|end_of_text|>",
|
|
519
|
+
"<|eot_id|>",
|
|
520
|
+
"<|eom_id|>"
|
|
521
|
+
]
|
|
522
|
+
},
|
|
457
523
|
{
|
|
458
524
|
"version": 1,
|
|
459
525
|
"context_length": 2048,
|
|
@@ -586,7 +652,7 @@
|
|
|
586
652
|
"none"
|
|
587
653
|
],
|
|
588
654
|
"model_hub": "modelscope",
|
|
589
|
-
"model_id": "ZhipuAI/glm-4-9b-chat",
|
|
655
|
+
"model_id": "ZhipuAI/glm-4-9b-chat-hf",
|
|
590
656
|
"model_revision": "master"
|
|
591
657
|
},
|
|
592
658
|
{
|
|
@@ -652,7 +718,7 @@
|
|
|
652
718
|
"none"
|
|
653
719
|
],
|
|
654
720
|
"model_hub": "modelscope",
|
|
655
|
-
"model_id": "ZhipuAI/glm-4-9b-chat-1m",
|
|
721
|
+
"model_id": "ZhipuAI/glm-4-9b-chat-1m-hf",
|
|
656
722
|
"model_revision": "master"
|
|
657
723
|
},
|
|
658
724
|
{
|
|
@@ -2837,7 +2903,7 @@
|
|
|
2837
2903
|
"model_format": "mlx",
|
|
2838
2904
|
"model_size_in_billions": "0_5",
|
|
2839
2905
|
"quantizations": [
|
|
2840
|
-
"
|
|
2906
|
+
"4bit"
|
|
2841
2907
|
],
|
|
2842
2908
|
"model_id": "qwen/Qwen2-0.5B-Instruct-MLX",
|
|
2843
2909
|
"model_hub": "modelscope"
|
|
@@ -2846,7 +2912,7 @@
|
|
|
2846
2912
|
"model_format": "mlx",
|
|
2847
2913
|
"model_size_in_billions": "1_5",
|
|
2848
2914
|
"quantizations": [
|
|
2849
|
-
"
|
|
2915
|
+
"4bit"
|
|
2850
2916
|
],
|
|
2851
2917
|
"model_id": "qwen/Qwen2-1.5B-Instruct-MLX",
|
|
2852
2918
|
"model_hub": "modelscope"
|
|
@@ -2855,7 +2921,7 @@
|
|
|
2855
2921
|
"model_format": "mlx",
|
|
2856
2922
|
"model_size_in_billions": 7,
|
|
2857
2923
|
"quantizations": [
|
|
2858
|
-
"
|
|
2924
|
+
"4bit"
|
|
2859
2925
|
],
|
|
2860
2926
|
"model_id": "qwen/Qwen2-7B-Instruct-MLX",
|
|
2861
2927
|
"model_hub": "modelscope"
|
|
@@ -4649,6 +4715,16 @@
|
|
|
4649
4715
|
"model_id":"qwen/Qwen2-VL-7B-Instruct-AWQ",
|
|
4650
4716
|
"model_revision":"master"
|
|
4651
4717
|
},
|
|
4718
|
+
{
|
|
4719
|
+
"model_format":"mlx",
|
|
4720
|
+
"model_size_in_billions":7,
|
|
4721
|
+
"quantizations":[
|
|
4722
|
+
"8bit"
|
|
4723
|
+
],
|
|
4724
|
+
"model_hub": "modelscope",
|
|
4725
|
+
"model_id":"okwinds/Qwen2-VL-7B-Instruct-MLX-8bit",
|
|
4726
|
+
"model_revision":"master"
|
|
4727
|
+
},
|
|
4652
4728
|
{
|
|
4653
4729
|
"model_format":"pytorch",
|
|
4654
4730
|
"model_size_in_billions":2,
|
|
@@ -4689,6 +4765,16 @@
|
|
|
4689
4765
|
"model_id":"qwen/Qwen2-VL-2B-Instruct-AWQ",
|
|
4690
4766
|
"model_revision":"master"
|
|
4691
4767
|
},
|
|
4768
|
+
{
|
|
4769
|
+
"model_format":"mlx",
|
|
4770
|
+
"model_size_in_billions":2,
|
|
4771
|
+
"quantizations":[
|
|
4772
|
+
"8bit"
|
|
4773
|
+
],
|
|
4774
|
+
"model_hub": "modelscope",
|
|
4775
|
+
"model_id":"okwinds/Qwen2-VL-2B-Instruct-MLX-8bit",
|
|
4776
|
+
"model_revision":"master"
|
|
4777
|
+
},
|
|
4692
4778
|
{
|
|
4693
4779
|
"model_format":"pytorch",
|
|
4694
4780
|
"model_size_in_billions":72,
|
|
@@ -4716,6 +4802,17 @@
|
|
|
4716
4802
|
],
|
|
4717
4803
|
"model_id":"qwen/Qwen2-VL-72B-Instruct-GPTQ-{quantization}",
|
|
4718
4804
|
"model_hub": "modelscope"
|
|
4805
|
+
},
|
|
4806
|
+
{
|
|
4807
|
+
"model_format":"mlx",
|
|
4808
|
+
"model_size_in_billions":72,
|
|
4809
|
+
"quantizations":[
|
|
4810
|
+
"4bit",
|
|
4811
|
+
"8bit"
|
|
4812
|
+
],
|
|
4813
|
+
"model_hub": "modelscope",
|
|
4814
|
+
"model_id":"okwinds/Qwen2-VL-72B-Instruct-MLX-{quantization}",
|
|
4815
|
+
"model_revision":"master"
|
|
4719
4816
|
}
|
|
4720
4817
|
],
|
|
4721
4818
|
"chat_template": "{% set image_count = namespace(value=0) %}{% set video_count = namespace(value=0) %}{% for message in messages %}{% if loop.first and message['role'] != 'system' %}<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n{% endif %}<|im_start|>{{ message['role'] }}\n{% if message['content'] is string %}{{ message['content'] }}<|im_end|>\n{% else %}{% for content in message['content'] %}{% if content['type'] == 'image' or 'image' in content or 'image_url' in content %}{% set image_count.value = image_count.value + 1 %}{% if add_vision_id %}Picture {{ image_count.value }}: {% endif %}<|vision_start|><|image_pad|><|vision_end|>{% elif content['type'] == 'video' or 'video' in content %}{% set video_count.value = video_count.value + 1 %}{% if add_vision_id %}Video {{ video_count.value }}: {% endif %}<|vision_start|><|video_pad|><|vision_end|>{% elif 'text' in content %}{{ content['text'] }}{% endif %}{% endfor %}<|im_end|>\n{% endif %}{% endfor %}{% if add_generation_prompt %}<|im_start|>assistant\n{% endif %}",
|
|
@@ -5777,7 +5874,7 @@
|
|
|
5777
5874
|
"model_format": "mlx",
|
|
5778
5875
|
"model_size_in_billions": 3,
|
|
5779
5876
|
"quantizations": [
|
|
5780
|
-
"
|
|
5877
|
+
"4bit"
|
|
5781
5878
|
],
|
|
5782
5879
|
"model_id": "okwinds/Qwen2.5-3B-Instruct-MLX-4bit",
|
|
5783
5880
|
"model_hub": "modelscope"
|
|
@@ -5786,7 +5883,7 @@
|
|
|
5786
5883
|
"model_format": "mlx",
|
|
5787
5884
|
"model_size_in_billions": 3,
|
|
5788
5885
|
"quantizations": [
|
|
5789
|
-
"
|
|
5886
|
+
"8bit"
|
|
5790
5887
|
],
|
|
5791
5888
|
"model_id": "okwinds/Qwen2.5-3B-Instruct-MLX-8bit",
|
|
5792
5889
|
"model_hub": "modelscope"
|
|
@@ -5795,7 +5892,7 @@
|
|
|
5795
5892
|
"model_format": "mlx",
|
|
5796
5893
|
"model_size_in_billions": 7,
|
|
5797
5894
|
"quantizations": [
|
|
5798
|
-
"
|
|
5895
|
+
"4bit"
|
|
5799
5896
|
],
|
|
5800
5897
|
"model_id": "okwinds/Qwen2.5-7B-Instruct-MLX-4bit",
|
|
5801
5898
|
"model_hub": "modelscope"
|
|
@@ -5804,7 +5901,7 @@
|
|
|
5804
5901
|
"model_format": "mlx",
|
|
5805
5902
|
"model_size_in_billions": 7,
|
|
5806
5903
|
"quantizations": [
|
|
5807
|
-
"
|
|
5904
|
+
"8bit"
|
|
5808
5905
|
],
|
|
5809
5906
|
"model_id": "okwinds/Qwen2.5-7B-Instruct-MLX-8bit",
|
|
5810
5907
|
"model_hub": "modelscope"
|
|
@@ -5813,7 +5910,7 @@
|
|
|
5813
5910
|
"model_format": "mlx",
|
|
5814
5911
|
"model_size_in_billions": 14,
|
|
5815
5912
|
"quantizations": [
|
|
5816
|
-
"
|
|
5913
|
+
"4bit"
|
|
5817
5914
|
],
|
|
5818
5915
|
"model_id": "okwinds/Qwen2.5-14B-Instruct-MLX-4bit",
|
|
5819
5916
|
"model_hub": "modelscope"
|
|
@@ -5822,7 +5919,7 @@
|
|
|
5822
5919
|
"model_format": "mlx",
|
|
5823
5920
|
"model_size_in_billions": 14,
|
|
5824
5921
|
"quantizations": [
|
|
5825
|
-
"
|
|
5922
|
+
"8bit"
|
|
5826
5923
|
],
|
|
5827
5924
|
"model_id": "okwinds/Qwen2.5-14B-Instruct-MLX-8bit",
|
|
5828
5925
|
"model_hub": "modelscope"
|
|
@@ -5831,7 +5928,7 @@
|
|
|
5831
5928
|
"model_format": "mlx",
|
|
5832
5929
|
"model_size_in_billions": 32,
|
|
5833
5930
|
"quantizations": [
|
|
5834
|
-
"
|
|
5931
|
+
"2bit"
|
|
5835
5932
|
],
|
|
5836
5933
|
"model_id": "okwinds/Qwen2.5-32B-Instruct-MLX-2bit",
|
|
5837
5934
|
"model_hub": "modelscope"
|
|
@@ -5840,7 +5937,7 @@
|
|
|
5840
5937
|
"model_format": "mlx",
|
|
5841
5938
|
"model_size_in_billions": 32,
|
|
5842
5939
|
"quantizations": [
|
|
5843
|
-
"
|
|
5940
|
+
"4bit"
|
|
5844
5941
|
],
|
|
5845
5942
|
"model_id": "okwinds/Qwen2.5-32B-Instruct-MLX-4bit",
|
|
5846
5943
|
"model_hub": "modelscope"
|
|
@@ -5849,7 +5946,7 @@
|
|
|
5849
5946
|
"model_format": "mlx",
|
|
5850
5947
|
"model_size_in_billions": 32,
|
|
5851
5948
|
"quantizations": [
|
|
5852
|
-
"
|
|
5949
|
+
"8bit"
|
|
5853
5950
|
],
|
|
5854
5951
|
"model_id": "okwinds/Qwen2.5-32B-Instruct-MLX-8bit",
|
|
5855
5952
|
"model_hub": "modelscope"
|
|
@@ -5858,7 +5955,7 @@
|
|
|
5858
5955
|
"model_format": "mlx",
|
|
5859
5956
|
"model_size_in_billions": 72,
|
|
5860
5957
|
"quantizations": [
|
|
5861
|
-
"
|
|
5958
|
+
"2bit"
|
|
5862
5959
|
],
|
|
5863
5960
|
"model_id": "okwinds/Qwen2.5-32B-Instruct-MLX-2bit",
|
|
5864
5961
|
"model_hub": "modelscope"
|
|
@@ -5867,7 +5964,7 @@
|
|
|
5867
5964
|
"model_format": "mlx",
|
|
5868
5965
|
"model_size_in_billions": 72,
|
|
5869
5966
|
"quantizations": [
|
|
5870
|
-
"
|
|
5967
|
+
"4bit"
|
|
5871
5968
|
],
|
|
5872
5969
|
"model_id": "okwinds/Qwen2.5-72B-Instruct-MLX-4bit",
|
|
5873
5970
|
"model_hub": "modelscope"
|
|
@@ -5876,7 +5973,7 @@
|
|
|
5876
5973
|
"model_format": "mlx",
|
|
5877
5974
|
"model_size_in_billions": 72,
|
|
5878
5975
|
"quantizations": [
|
|
5879
|
-
"
|
|
5976
|
+
"8bit"
|
|
5880
5977
|
],
|
|
5881
5978
|
"model_id": "okwinds/Qwen2.5-72B-Instruct-MLX-8bit",
|
|
5882
5979
|
"model_hub": "modelscope"
|
|
@@ -5907,6 +6004,18 @@
|
|
|
5907
6004
|
],
|
|
5908
6005
|
"model_description": "Qwen2.5-Coder is the latest series of Code-Specific Qwen large language models (formerly known as CodeQwen).",
|
|
5909
6006
|
"model_specs": [
|
|
6007
|
+
{
|
|
6008
|
+
"model_format": "pytorch",
|
|
6009
|
+
"model_size_in_billions": "0_5",
|
|
6010
|
+
"quantizations": [
|
|
6011
|
+
"4-bit",
|
|
6012
|
+
"8-bit",
|
|
6013
|
+
"none"
|
|
6014
|
+
],
|
|
6015
|
+
"model_id": "qwen/Qwen2.5-Coder-0.5B",
|
|
6016
|
+
"model_revision": "master",
|
|
6017
|
+
"model_hub": "modelscope"
|
|
6018
|
+
},
|
|
5910
6019
|
{
|
|
5911
6020
|
"model_format": "pytorch",
|
|
5912
6021
|
"model_size_in_billions": "1_5",
|
|
@@ -5919,6 +6028,18 @@
|
|
|
5919
6028
|
"model_revision": "master",
|
|
5920
6029
|
"model_hub": "modelscope"
|
|
5921
6030
|
},
|
|
6031
|
+
{
|
|
6032
|
+
"model_format": "pytorch",
|
|
6033
|
+
"model_size_in_billions": "3",
|
|
6034
|
+
"quantizations": [
|
|
6035
|
+
"4-bit",
|
|
6036
|
+
"8-bit",
|
|
6037
|
+
"none"
|
|
6038
|
+
],
|
|
6039
|
+
"model_id": "qwen/Qwen2.5-Coder-3B",
|
|
6040
|
+
"model_revision": "master",
|
|
6041
|
+
"model_hub": "modelscope"
|
|
6042
|
+
},
|
|
5922
6043
|
{
|
|
5923
6044
|
"model_format": "pytorch",
|
|
5924
6045
|
"model_size_in_billions": 7,
|
|
@@ -5930,6 +6051,30 @@
|
|
|
5930
6051
|
"model_id": "qwen/Qwen2.5-Coder-7B",
|
|
5931
6052
|
"model_revision": "master",
|
|
5932
6053
|
"model_hub": "modelscope"
|
|
6054
|
+
},
|
|
6055
|
+
{
|
|
6056
|
+
"model_format": "pytorch",
|
|
6057
|
+
"model_size_in_billions": 14,
|
|
6058
|
+
"quantizations": [
|
|
6059
|
+
"4-bit",
|
|
6060
|
+
"8-bit",
|
|
6061
|
+
"none"
|
|
6062
|
+
],
|
|
6063
|
+
"model_id": "qwen/Qwen2.5-Coder-14B",
|
|
6064
|
+
"model_revision": "master",
|
|
6065
|
+
"model_hub": "modelscope"
|
|
6066
|
+
},
|
|
6067
|
+
{
|
|
6068
|
+
"model_format": "pytorch",
|
|
6069
|
+
"model_size_in_billions": 32,
|
|
6070
|
+
"quantizations": [
|
|
6071
|
+
"4-bit",
|
|
6072
|
+
"8-bit",
|
|
6073
|
+
"none"
|
|
6074
|
+
],
|
|
6075
|
+
"model_id": "qwen/Qwen2.5-Coder-32B",
|
|
6076
|
+
"model_revision": "master",
|
|
6077
|
+
"model_hub": "modelscope"
|
|
5933
6078
|
}
|
|
5934
6079
|
]
|
|
5935
6080
|
},
|
|
@@ -5947,6 +6092,18 @@
|
|
|
5947
6092
|
],
|
|
5948
6093
|
"model_description": "Qwen2.5-Coder is the latest series of Code-Specific Qwen large language models (formerly known as CodeQwen).",
|
|
5949
6094
|
"model_specs": [
|
|
6095
|
+
{
|
|
6096
|
+
"model_format": "pytorch",
|
|
6097
|
+
"model_size_in_billions": "0_5",
|
|
6098
|
+
"quantizations": [
|
|
6099
|
+
"4-bit",
|
|
6100
|
+
"8-bit",
|
|
6101
|
+
"none"
|
|
6102
|
+
],
|
|
6103
|
+
"model_id": "qwen/Qwen2.5-Coder-0.5B-Instruct",
|
|
6104
|
+
"model_revision": "master",
|
|
6105
|
+
"model_hub": "modelscope"
|
|
6106
|
+
},
|
|
5950
6107
|
{
|
|
5951
6108
|
"model_format": "pytorch",
|
|
5952
6109
|
"model_size_in_billions": "1_5",
|
|
@@ -5958,6 +6115,17 @@
|
|
|
5958
6115
|
"model_id": "qwen/Qwen2.5-Coder-1.5B-Instruct",
|
|
5959
6116
|
"model_revision": "master",
|
|
5960
6117
|
"model_hub": "modelscope"
|
|
6118
|
+
}, {
|
|
6119
|
+
"model_format": "pytorch",
|
|
6120
|
+
"model_size_in_billions": "3",
|
|
6121
|
+
"quantizations": [
|
|
6122
|
+
"4-bit",
|
|
6123
|
+
"8-bit",
|
|
6124
|
+
"none"
|
|
6125
|
+
],
|
|
6126
|
+
"model_id": "qwen/Qwen2.5-Coder-3B-Instruct",
|
|
6127
|
+
"model_revision": "master",
|
|
6128
|
+
"model_hub": "modelscope"
|
|
5961
6129
|
},
|
|
5962
6130
|
{
|
|
5963
6131
|
"model_format": "pytorch",
|
|
@@ -5971,90 +6139,875 @@
|
|
|
5971
6139
|
"model_revision": "master",
|
|
5972
6140
|
"model_hub": "modelscope"
|
|
5973
6141
|
},
|
|
6142
|
+
{
|
|
6143
|
+
"model_format": "pytorch",
|
|
6144
|
+
"model_size_in_billions": 14,
|
|
6145
|
+
"quantizations": [
|
|
6146
|
+
"4-bit",
|
|
6147
|
+
"8-bit",
|
|
6148
|
+
"none"
|
|
6149
|
+
],
|
|
6150
|
+
"model_id": "qwen/Qwen2.5-Coder-14B-Instruct",
|
|
6151
|
+
"model_revision": "master",
|
|
6152
|
+
"model_hub": "modelscope"
|
|
6153
|
+
},
|
|
6154
|
+
{
|
|
6155
|
+
"model_format": "pytorch",
|
|
6156
|
+
"model_size_in_billions": 32,
|
|
6157
|
+
"quantizations": [
|
|
6158
|
+
"4-bit",
|
|
6159
|
+
"8-bit",
|
|
6160
|
+
"none"
|
|
6161
|
+
],
|
|
6162
|
+
"model_id": "qwen/Qwen2.5-Coder-32B-Instruct",
|
|
6163
|
+
"model_revision": "master",
|
|
6164
|
+
"model_hub": "modelscope"
|
|
6165
|
+
},
|
|
5974
6166
|
{
|
|
5975
6167
|
"model_format": "gptq",
|
|
5976
|
-
"model_size_in_billions":
|
|
6168
|
+
"model_size_in_billions": "0_5",
|
|
5977
6169
|
"quantizations": [
|
|
5978
6170
|
"Int4",
|
|
5979
6171
|
"Int8"
|
|
5980
6172
|
],
|
|
5981
|
-
"model_id": "qwen/Qwen2.5-Coder-
|
|
6173
|
+
"model_id": "qwen/Qwen2.5-Coder-0.5B-Instruct-GPTQ-{quantization}",
|
|
5982
6174
|
"model_revision": "master",
|
|
5983
6175
|
"model_hub": "modelscope"
|
|
5984
6176
|
},
|
|
5985
6177
|
{
|
|
5986
|
-
"model_format": "
|
|
6178
|
+
"model_format": "gptq",
|
|
5987
6179
|
"model_size_in_billions": "1_5",
|
|
5988
6180
|
"quantizations": [
|
|
5989
|
-
"
|
|
5990
|
-
"
|
|
5991
|
-
"q4_0",
|
|
5992
|
-
"q4_k_m",
|
|
5993
|
-
"q5_0",
|
|
5994
|
-
"q5_k_m",
|
|
5995
|
-
"q6_k",
|
|
5996
|
-
"q8_0"
|
|
6181
|
+
"Int4",
|
|
6182
|
+
"Int8"
|
|
5997
6183
|
],
|
|
5998
|
-
"
|
|
5999
|
-
"
|
|
6000
|
-
"
|
|
6184
|
+
"model_id": "qwen/Qwen2.5-Coder-1.5B-Instruct-GPTQ-{quantization}",
|
|
6185
|
+
"model_revision": "master",
|
|
6186
|
+
"model_hub": "modelscope"
|
|
6001
6187
|
},
|
|
6002
6188
|
{
|
|
6003
|
-
"model_format": "
|
|
6189
|
+
"model_format": "gptq",
|
|
6190
|
+
"model_size_in_billions": 3,
|
|
6191
|
+
"quantizations": [
|
|
6192
|
+
"Int4",
|
|
6193
|
+
"Int8"
|
|
6194
|
+
],
|
|
6195
|
+
"model_id": "qwen/Qwen2.5-Coder-3B-Instruct-GPTQ-{quantization}",
|
|
6196
|
+
"model_revision": "master",
|
|
6197
|
+
"model_hub": "modelscope"
|
|
6198
|
+
},
|
|
6199
|
+
{
|
|
6200
|
+
"model_format": "gptq",
|
|
6004
6201
|
"model_size_in_billions": 7,
|
|
6005
6202
|
"quantizations": [
|
|
6006
|
-
"
|
|
6007
|
-
"
|
|
6008
|
-
"q4_0",
|
|
6009
|
-
"q4_k_m",
|
|
6010
|
-
"q5_0",
|
|
6011
|
-
"q5_k_m",
|
|
6012
|
-
"q6_k",
|
|
6013
|
-
"q8_0"
|
|
6203
|
+
"Int4",
|
|
6204
|
+
"Int8"
|
|
6014
6205
|
],
|
|
6015
|
-
"
|
|
6016
|
-
"
|
|
6017
|
-
"
|
|
6018
|
-
|
|
6019
|
-
|
|
6020
|
-
|
|
6021
|
-
|
|
6022
|
-
|
|
6023
|
-
|
|
6024
|
-
"
|
|
6025
|
-
|
|
6026
|
-
|
|
6027
|
-
|
|
6028
|
-
|
|
6029
|
-
|
|
6030
|
-
|
|
6031
|
-
|
|
6032
|
-
|
|
6033
|
-
|
|
6034
|
-
|
|
6035
|
-
|
|
6036
|
-
|
|
6037
|
-
|
|
6038
|
-
|
|
6039
|
-
|
|
6040
|
-
|
|
6041
|
-
|
|
6042
|
-
|
|
6043
|
-
|
|
6044
|
-
|
|
6045
|
-
|
|
6046
|
-
|
|
6047
|
-
|
|
6048
|
-
|
|
6049
|
-
|
|
6050
|
-
|
|
6051
|
-
|
|
6052
|
-
|
|
6206
|
+
"model_id": "qwen/Qwen2.5-Coder-7B-Instruct-GPTQ-{quantization}",
|
|
6207
|
+
"model_revision": "master",
|
|
6208
|
+
"model_hub": "modelscope"
|
|
6209
|
+
},
|
|
6210
|
+
{
|
|
6211
|
+
"model_format": "gptq",
|
|
6212
|
+
"model_size_in_billions": 14,
|
|
6213
|
+
"quantizations": [
|
|
6214
|
+
"Int4",
|
|
6215
|
+
"Int8"
|
|
6216
|
+
],
|
|
6217
|
+
"model_id": "qwen/Qwen2.5-Coder-14B-Instruct-GPTQ-{quantization}",
|
|
6218
|
+
"model_revision": "master",
|
|
6219
|
+
"model_hub": "modelscope"
|
|
6220
|
+
},
|
|
6221
|
+
{
|
|
6222
|
+
"model_format": "gptq",
|
|
6223
|
+
"model_size_in_billions": 32,
|
|
6224
|
+
"quantizations": [
|
|
6225
|
+
"Int4",
|
|
6226
|
+
"Int8"
|
|
6227
|
+
],
|
|
6228
|
+
"model_id": "qwen/Qwen2.5-Coder-32B-Instruct-GPTQ-{quantization}",
|
|
6229
|
+
"model_revision": "master",
|
|
6230
|
+
"model_hub": "modelscope"
|
|
6231
|
+
},
|
|
6232
|
+
{
|
|
6233
|
+
"model_format": "awq",
|
|
6234
|
+
"model_size_in_billions": "0_5",
|
|
6235
|
+
"quantizations": [
|
|
6236
|
+
"Int4"
|
|
6237
|
+
],
|
|
6238
|
+
"model_id": "qwen/Qwen2.5-Coder-0.5B-Instruct-AWQ",
|
|
6239
|
+
"model_revision": "master",
|
|
6240
|
+
"model_hub": "modelscope"
|
|
6241
|
+
},
|
|
6242
|
+
{
|
|
6243
|
+
"model_format": "awq",
|
|
6244
|
+
"model_size_in_billions": "1_5",
|
|
6245
|
+
"quantizations": [
|
|
6246
|
+
"Int4"
|
|
6247
|
+
],
|
|
6248
|
+
"model_id": "qwen/Qwen2.5-Coder-1.5B-Instruct-AWQ",
|
|
6249
|
+
"model_revision": "master",
|
|
6250
|
+
"model_hub": "modelscope"
|
|
6251
|
+
},
|
|
6252
|
+
{
|
|
6253
|
+
"model_format": "awq",
|
|
6254
|
+
"model_size_in_billions": 3,
|
|
6255
|
+
"quantizations": [
|
|
6256
|
+
"Int4"
|
|
6257
|
+
],
|
|
6258
|
+
"model_id": "qwen/Qwen2.5-Coder-3B-Instruct-AWQ",
|
|
6259
|
+
"model_revision": "master",
|
|
6260
|
+
"model_hub": "modelscope"
|
|
6261
|
+
},
|
|
6262
|
+
{
|
|
6263
|
+
"model_format": "awq",
|
|
6264
|
+
"model_size_in_billions": 7,
|
|
6265
|
+
"quantizations": [
|
|
6266
|
+
"Int4"
|
|
6267
|
+
],
|
|
6268
|
+
"model_id": "qwen/Qwen2.5-Coder-7B-Instruct-AWQ",
|
|
6269
|
+
"model_revision": "master",
|
|
6270
|
+
"model_hub": "modelscope"
|
|
6271
|
+
},
|
|
6272
|
+
{
|
|
6273
|
+
"model_format": "awq",
|
|
6274
|
+
"model_size_in_billions": 14,
|
|
6275
|
+
"quantizations": [
|
|
6276
|
+
"Int4"
|
|
6277
|
+
],
|
|
6278
|
+
"model_id": "qwen/Qwen2.5-Coder-14B-Instruct-AWQ",
|
|
6279
|
+
"model_revision": "master",
|
|
6280
|
+
"model_hub": "modelscope"
|
|
6281
|
+
},
|
|
6282
|
+
{
|
|
6283
|
+
"model_format": "awq",
|
|
6284
|
+
"model_size_in_billions": 32,
|
|
6285
|
+
"quantizations": [
|
|
6286
|
+
"Int4"
|
|
6287
|
+
],
|
|
6288
|
+
"model_id": "qwen/Qwen2.5-Coder-32B-Instruct-AWQ",
|
|
6289
|
+
"model_revision": "master",
|
|
6290
|
+
"model_hub": "modelscope"
|
|
6291
|
+
},
|
|
6292
|
+
|
|
6293
|
+
{
|
|
6294
|
+
"model_format": "ggufv2",
|
|
6295
|
+
"model_size_in_billions": "1_5",
|
|
6296
|
+
"quantizations": [
|
|
6297
|
+
"q2_k",
|
|
6298
|
+
"q3_k_m",
|
|
6299
|
+
"q4_0",
|
|
6300
|
+
"q4_k_m",
|
|
6301
|
+
"q5_0",
|
|
6302
|
+
"q5_k_m",
|
|
6303
|
+
"q6_k",
|
|
6304
|
+
"q8_0"
|
|
6305
|
+
],
|
|
6306
|
+
"model_hub": "modelscope",
|
|
6307
|
+
"model_id": "qwen/Qwen2.5-Coder-1.5B-Instruct-GGUF",
|
|
6308
|
+
"model_file_name_template": "qwen2.5-coder-1.5b-instruct-{quantization}.gguf"
|
|
6309
|
+
},
|
|
6310
|
+
{
|
|
6311
|
+
"model_format": "ggufv2",
|
|
6312
|
+
"model_size_in_billions": 7,
|
|
6313
|
+
"quantizations": [
|
|
6314
|
+
"q2_k",
|
|
6315
|
+
"q3_k_m",
|
|
6316
|
+
"q4_0",
|
|
6317
|
+
"q4_k_m",
|
|
6318
|
+
"q5_0",
|
|
6319
|
+
"q5_k_m",
|
|
6320
|
+
"q6_k",
|
|
6321
|
+
"q8_0"
|
|
6322
|
+
],
|
|
6323
|
+
"model_hub": "modelscope",
|
|
6324
|
+
"model_id": "qwen/Qwen2.5-Coder-7B-Instruct-GGUF",
|
|
6325
|
+
"model_file_name_template": "qwen2.5-coder-7b-instruct-{quantization}.gguf",
|
|
6326
|
+
"model_file_name_split_template": "qwen2.5-coder-7b-instruct-{quantization}-{part}.gguf",
|
|
6327
|
+
"quantization_parts": {
|
|
6328
|
+
"q4_0": [
|
|
6329
|
+
"00001-of-00002",
|
|
6330
|
+
"00002-of-00002"
|
|
6331
|
+
],
|
|
6332
|
+
"q4_k_m": [
|
|
6333
|
+
"00001-of-00002",
|
|
6334
|
+
"00002-of-00002"
|
|
6335
|
+
],
|
|
6336
|
+
"q5_0": [
|
|
6337
|
+
"00001-of-00002",
|
|
6338
|
+
"00002-of-00002"
|
|
6339
|
+
],
|
|
6340
|
+
"q5_k_m": [
|
|
6341
|
+
"00001-of-00002",
|
|
6342
|
+
"00002-of-00002"
|
|
6343
|
+
],
|
|
6344
|
+
"q6_k": [
|
|
6345
|
+
"00001-of-00002",
|
|
6346
|
+
"00002-of-00002"
|
|
6347
|
+
],
|
|
6348
|
+
"q8_0": [
|
|
6349
|
+
"00001-of-00003",
|
|
6350
|
+
"00002-of-00003",
|
|
6351
|
+
"00003-of-00003"
|
|
6352
|
+
]
|
|
6353
|
+
}
|
|
6354
|
+
}
|
|
6355
|
+
],
|
|
6356
|
+
"chat_template": "{%- if tools %}\n {{- '<|im_start|>system\\n' }}\n {%- if messages[0]['role'] == 'system' %}\n {{- messages[0]['content'] }}\n {%- else %}\n {{- 'You are a helpful assistant.' }}\n {%- endif %}\n {{- \"\\n\\n# Tools\\n\\nYou may call one or more functions to assist with the user query.\\n\\nYou are provided with function signatures within <tools></tools> XML tags:\\n<tools>\" }}\n {%- for tool in tools %}\n {{- \"\\n\" }}\n {{- tool | tojson }}\n {%- endfor %}\n {{- \"\\n</tools>\\n\\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\\n<tool_call>\\n{{\\\"name\\\": <function-name>, \\\"arguments\\\": <args-json-object>}}\\n</tool_call><|im_end|>\\n\" }}\n{%- else %}\n {%- if messages[0]['role'] == 'system' %}\n {{- '<|im_start|>system\\n' + messages[0]['content'] + '<|im_end|>\\n' }}\n {%- else %}\n {{- '<|im_start|>system\\nYou are a helpful assistant.<|im_end|>\\n' }}\n {%- endif %}\n{%- endif %}\n{%- for message in messages %}\n {%- if (message.role == \"user\") or (message.role == \"system\" and not loop.first) or (message.role == \"assistant\" and not message.tool_calls) %}\n {{- '<|im_start|>' + message.role + '\\n' + message.content + '<|im_end|>' + '\\n' }}\n {%- elif message.role == \"assistant\" %}\n {{- '<|im_start|>' + message.role }}\n {%- if message.content %}\n {{- '\\n' + message.content }}\n {%- endif %}\n {%- for tool_call in message.tool_calls %}\n {%- if tool_call.function is defined %}\n {%- set tool_call = tool_call.function %}\n {%- endif %}\n {{- '\\n<tool_call>\\n{\"name\": \"' }}\n {{- tool_call.name }}\n {{- '\", \"arguments\": ' }}\n {{- tool_call.arguments | tojson }}\n {{- '}\\n</tool_call>' }}\n {%- endfor %}\n {{- '<|im_end|>\\n' }}\n {%- elif message.role == \"tool\" %}\n {%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != \"tool\") %}\n {{- '<|im_start|>user' }}\n {%- endif %}\n {{- '\\n<tool_response>\\n' }}\n {{- message.content }}\n {{- '\\n</tool_response>' }}\n {%- if loop.last or (messages[loop.index0 + 1].role != \"tool\") %}\n {{- '<|im_end|>\\n' }}\n {%- endif %}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|im_start|>assistant\\n' }}\n{%- endif %}\n",
|
|
6357
|
+
"stop_token_ids": [
|
|
6358
|
+
151643,
|
|
6359
|
+
151644,
|
|
6360
|
+
151645
|
|
6361
|
+
],
|
|
6362
|
+
"stop": [
|
|
6363
|
+
"<|endoftext|>",
|
|
6364
|
+
"<|im_start|>",
|
|
6365
|
+
"<|im_end|>"
|
|
6366
|
+
]
|
|
6367
|
+
},
|
|
6368
|
+
{
|
|
6369
|
+
"version": 1,
|
|
6370
|
+
"context_length": 32768,
|
|
6371
|
+
"model_name": "QwQ-32B-Preview",
|
|
6372
|
+
"model_lang": [
|
|
6373
|
+
"en",
|
|
6374
|
+
"zh"
|
|
6375
|
+
],
|
|
6376
|
+
"model_ability": [
|
|
6377
|
+
"chat"
|
|
6378
|
+
],
|
|
6379
|
+
"model_description": "QwQ-32B-Preview is an experimental research model developed by the Qwen Team, focused on advancing AI reasoning capabilities.",
|
|
6380
|
+
"model_specs": [
|
|
6381
|
+
{
|
|
6382
|
+
"model_format": "pytorch",
|
|
6383
|
+
"model_size_in_billions": 32,
|
|
6384
|
+
"quantizations": [
|
|
6385
|
+
"4-bit",
|
|
6386
|
+
"8-bit",
|
|
6387
|
+
"none"
|
|
6388
|
+
],
|
|
6389
|
+
"model_id": "Qwen/QwQ-32B-Preview",
|
|
6390
|
+
"model_hub": "modelscope"
|
|
6391
|
+
},
|
|
6392
|
+
{
|
|
6393
|
+
"model_format": "mlx",
|
|
6394
|
+
"model_size_in_billions": 32,
|
|
6395
|
+
"quantizations": [
|
|
6396
|
+
"4bit"
|
|
6397
|
+
],
|
|
6398
|
+
"model_id": "okwinds/QwQ-32B-Preview-MLX-4bit",
|
|
6399
|
+
"model_hub": "modelscope"
|
|
6400
|
+
},
|
|
6401
|
+
{
|
|
6402
|
+
"model_format": "mlx",
|
|
6403
|
+
"model_size_in_billions": 32,
|
|
6404
|
+
"quantizations": [
|
|
6405
|
+
"8bit"
|
|
6406
|
+
],
|
|
6407
|
+
"model_id": "okwinds/QwQ-32B-Preview-MLX-8bit",
|
|
6408
|
+
"model_hub": "modelscope"
|
|
6409
|
+
},
|
|
6410
|
+
{
|
|
6411
|
+
"model_format": "ggufv2",
|
|
6412
|
+
"model_size_in_billions": 32,
|
|
6413
|
+
"quantizations": [
|
|
6414
|
+
"Q3_K_L",
|
|
6415
|
+
"Q4_K_M",
|
|
6416
|
+
"Q6_K",
|
|
6417
|
+
"Q8_0"
|
|
6418
|
+
],
|
|
6419
|
+
"model_id": "AI-ModelScope/QwQ-32B-Preview-GGUF",
|
|
6420
|
+
"model_file_name_template": "QwQ-32B-Preview-{quantization}.gguf",
|
|
6421
|
+
"model_hub": "modelscope"
|
|
6422
|
+
}
|
|
6423
|
+
],
|
|
6424
|
+
"chat_template": "{%- if tools %}\n {{- '<|im_start|>system\\n' }}\n {%- if messages[0]['role'] == 'system' %}\n {{- messages[0]['content'] }}\n {%- else %}\n {{- 'You are Qwen, created by Alibaba Cloud. You are a helpful assistant.' }}\n {%- endif %}\n {{- \"\\n\\n# Tools\\n\\nYou may call one or more functions to assist with the user query.\\n\\nYou are provided with function signatures within <tools></tools> XML tags:\\n<tools>\" }}\n {%- for tool in tools %}\n {{- \"\\n\" }}\n {{- tool | tojson }}\n {%- endfor %}\n {{- \"\\n</tools>\\n\\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\\n<tool_call>\\n{\\\"name\\\": <function-name>, \\\"arguments\\\": <args-json-object>}\\n</tool_call><|im_end|>\\n\" }}\n{%- else %}\n {%- if messages[0]['role'] == 'system' %}\n {{- '<|im_start|>system\\n' + messages[0]['content'] + '<|im_end|>\\n' }}\n {%- else %}\n {{- '<|im_start|>system\\nYou are Qwen, created by Alibaba Cloud. You are a helpful assistant.<|im_end|>\\n' }}\n {%- endif %}\n{%- endif %}\n{%- for message in messages %}\n {%- if (message.role == \"user\") or (message.role == \"system\" and not loop.first) or (message.role == \"assistant\" and not message.tool_calls) %}\n {{- '<|im_start|>' + message.role + '\\n' + message.content + '<|im_end|>' + '\\n' }}\n {%- elif message.role == \"assistant\" %}\n {{- '<|im_start|>' + message.role }}\n {%- if message.content %}\n {{- '\\n' + message.content }}\n {%- endif %}\n {%- for tool_call in message.tool_calls %}\n {%- if tool_call.function is defined %}\n {%- set tool_call = tool_call.function %}\n {%- endif %}\n {{- '\\n<tool_call>\\n{\"name\": \"' }}\n {{- tool_call.name }}\n {{- '\", \"arguments\": ' }}\n {{- tool_call.arguments | tojson }}\n {{- '}\\n</tool_call>' }}\n {%- endfor %}\n {{- '<|im_end|>\\n' }}\n {%- elif message.role == \"tool\" %}\n {%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != \"tool\") %}\n {{- '<|im_start|>user' }}\n {%- endif %}\n {{- '\\n<tool_response>\\n' }}\n {{- message.content }}\n {{- '\\n</tool_response>' }}\n {%- if loop.last or (messages[loop.index0 + 1].role != \"tool\") %}\n {{- '<|im_end|>\\n' }}\n {%- endif %}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|im_start|>assistant\\n' }}\n{%- endif %}\n",
|
|
6425
|
+
"stop_token_ids": [
|
|
6426
|
+
151643,
|
|
6427
|
+
151644,
|
|
6428
|
+
151645
|
|
6429
|
+
],
|
|
6430
|
+
"stop": [
|
|
6431
|
+
"<|endoftext|>",
|
|
6432
|
+
"<|im_start|>",
|
|
6433
|
+
"<|im_end|>"
|
|
6434
|
+
]
|
|
6435
|
+
},
|
|
6436
|
+
{
|
|
6437
|
+
"version": 1,
|
|
6438
|
+
"context_length": 131072,
|
|
6439
|
+
"model_name": "deepseek-r1-distill-qwen",
|
|
6440
|
+
"model_lang": [
|
|
6441
|
+
"en",
|
|
6442
|
+
"zh"
|
|
6443
|
+
],
|
|
6444
|
+
"model_ability": [
|
|
6445
|
+
"chat"
|
|
6446
|
+
],
|
|
6447
|
+
"model_description": "deepseek-r1-distill-qwen is distilled from DeepSeek-R1 based on Qwen",
|
|
6448
|
+
"model_specs": [
|
|
6449
|
+
{
|
|
6450
|
+
"model_format": "pytorch",
|
|
6451
|
+
"model_size_in_billions": "1_5",
|
|
6452
|
+
"quantizations": [
|
|
6453
|
+
"4-bit",
|
|
6454
|
+
"8-bit",
|
|
6455
|
+
"none"
|
|
6456
|
+
],
|
|
6457
|
+
"model_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B",
|
|
6458
|
+
"model_hub": "modelscope"
|
|
6459
|
+
},
|
|
6460
|
+
{
|
|
6461
|
+
"model_format": "ggufv2",
|
|
6462
|
+
"model_size_in_billions": "1_5",
|
|
6463
|
+
"quantizations": [
|
|
6464
|
+
"Q2_K",
|
|
6465
|
+
"Q2_K_L",
|
|
6466
|
+
"Q3_K_M",
|
|
6467
|
+
"Q4_K_M",
|
|
6468
|
+
"Q5_K_M",
|
|
6469
|
+
"Q6_K",
|
|
6470
|
+
"Q8_0"
|
|
6471
|
+
],
|
|
6472
|
+
"model_id": "unsloth/DeepSeek-R1-Distill-Qwen-1.5B-GGUF",
|
|
6473
|
+
"model_file_name_template": "DeepSeek-R1-Distill-Qwen-1.5B-{quantization}.gguf",
|
|
6474
|
+
"model_hub": "modelscope"
|
|
6475
|
+
},
|
|
6476
|
+
{
|
|
6477
|
+
"model_format": "pytorch",
|
|
6478
|
+
"model_size_in_billions": 7,
|
|
6479
|
+
"quantizations": [
|
|
6480
|
+
"4-bit",
|
|
6481
|
+
"8-bit",
|
|
6482
|
+
"none"
|
|
6483
|
+
],
|
|
6484
|
+
"model_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B",
|
|
6485
|
+
"model_hub": "modelscope"
|
|
6486
|
+
},
|
|
6487
|
+
{
|
|
6488
|
+
"model_format": "gptq",
|
|
6489
|
+
"model_size_in_billions": 7,
|
|
6490
|
+
"quantizations": [
|
|
6491
|
+
"Int4"
|
|
6492
|
+
],
|
|
6493
|
+
"model_id": "tclf90/deepseek-r1-distill-qwen-7b-gptq-int4",
|
|
6494
|
+
"model_hub": "modelscope"
|
|
6495
|
+
},
|
|
6496
|
+
{
|
|
6497
|
+
"model_format": "ggufv2",
|
|
6498
|
+
"model_size_in_billions": 7,
|
|
6499
|
+
"quantizations": [
|
|
6500
|
+
"Q2_K",
|
|
6501
|
+
"Q2_K_L",
|
|
6502
|
+
"Q3_K_M",
|
|
6503
|
+
"Q4_K_M",
|
|
6504
|
+
"Q5_K_M",
|
|
6505
|
+
"Q6_K",
|
|
6506
|
+
"Q8_0",
|
|
6507
|
+
"F16"
|
|
6508
|
+
],
|
|
6509
|
+
"model_id": "unsloth/DeepSeek-R1-Distill-Qwen-7B-GGUF",
|
|
6510
|
+
"model_file_name_template": "DeepSeek-R1-Distill-Qwen-7B-{quantization}.gguf",
|
|
6511
|
+
"model_hub": "modelscope"
|
|
6512
|
+
},
|
|
6513
|
+
{
|
|
6514
|
+
"model_format": "mlx",
|
|
6515
|
+
"model_size_in_billions": 7,
|
|
6516
|
+
"quantizations": [
|
|
6517
|
+
"3bit",
|
|
6518
|
+
"4bit",
|
|
6519
|
+
"6bit",
|
|
6520
|
+
"8bit"
|
|
6521
|
+
],
|
|
6522
|
+
"model_id": "okwinds/DeepSeek-R1-Distill-Qwen-7B-MLX-{quantization}",
|
|
6523
|
+
"model_hub": "modelscope"
|
|
6524
|
+
},
|
|
6525
|
+
{
|
|
6526
|
+
"model_format": "pytorch",
|
|
6527
|
+
"model_size_in_billions": 14,
|
|
6528
|
+
"quantizations": [
|
|
6529
|
+
"4-bit",
|
|
6530
|
+
"8-bit",
|
|
6531
|
+
"none"
|
|
6532
|
+
],
|
|
6533
|
+
"model_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-14B",
|
|
6534
|
+
"model_hub": "modelscope"
|
|
6535
|
+
},
|
|
6536
|
+
{
|
|
6537
|
+
"model_format": "ggufv2",
|
|
6538
|
+
"model_size_in_billions": 14,
|
|
6539
|
+
"quantizations": [
|
|
6540
|
+
"Q2_K",
|
|
6541
|
+
"Q2_K_L",
|
|
6542
|
+
"Q3_K_M",
|
|
6543
|
+
"Q4_K_M",
|
|
6544
|
+
"Q5_K_M",
|
|
6545
|
+
"Q6_K",
|
|
6546
|
+
"Q8_0",
|
|
6547
|
+
"F16"
|
|
6548
|
+
],
|
|
6549
|
+
"model_id": "unsloth/DeepSeek-R1-Distill-Qwen-14B-GGUF",
|
|
6550
|
+
"model_file_name_template": "DeepSeek-R1-Distill-Qwen-14B-{quantization}.gguf",
|
|
6551
|
+
"model_hub": "modelscope"
|
|
6552
|
+
},
|
|
6553
|
+
{
|
|
6554
|
+
"model_format": "mlx",
|
|
6555
|
+
"model_size_in_billions": 14,
|
|
6556
|
+
"quantizations": [
|
|
6557
|
+
"3bit",
|
|
6558
|
+
"4bit",
|
|
6559
|
+
"6bit",
|
|
6560
|
+
"8bit"
|
|
6561
|
+
],
|
|
6562
|
+
"model_id": "okwinds/DeepSeek-R1-Distill-Qwen-14B-MLX-{quantization}",
|
|
6563
|
+
"model_hub": "modelscope"
|
|
6564
|
+
},
|
|
6565
|
+
{
|
|
6566
|
+
"model_format": "pytorch",
|
|
6567
|
+
"model_size_in_billions": 32,
|
|
6568
|
+
"quantizations": [
|
|
6569
|
+
"4-bit",
|
|
6570
|
+
"8-bit",
|
|
6571
|
+
"none"
|
|
6572
|
+
],
|
|
6573
|
+
"model_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B",
|
|
6574
|
+
"model_hub": "modelscope"
|
|
6575
|
+
},
|
|
6576
|
+
{
|
|
6577
|
+
"model_format": "gptq",
|
|
6578
|
+
"model_size_in_billions": 32,
|
|
6579
|
+
"quantizations": [
|
|
6580
|
+
"Int4"
|
|
6581
|
+
],
|
|
6582
|
+
"model_id": "tclf90/deepseek-r1-distill-qwen-32b-gptq-int4",
|
|
6583
|
+
"model_hub": "modelscope"
|
|
6584
|
+
},
|
|
6585
|
+
{
|
|
6586
|
+
"model_format": "ggufv2",
|
|
6587
|
+
"model_size_in_billions": 32,
|
|
6588
|
+
"quantizations": [
|
|
6589
|
+
"Q2_K",
|
|
6590
|
+
"Q2_K_L",
|
|
6591
|
+
"Q3_K_M",
|
|
6592
|
+
"Q4_K_M",
|
|
6593
|
+
"Q5_K_M",
|
|
6594
|
+
"Q6_K",
|
|
6595
|
+
"Q8_0",
|
|
6596
|
+
"F16"
|
|
6597
|
+
],
|
|
6598
|
+
"model_id": "unsloth/DeepSeek-R1-Distill-Qwen-32B-GGUF",
|
|
6599
|
+
"model_file_name_template": "DeepSeek-R1-Distill-Qwen-32B-{quantization}.gguf",
|
|
6600
|
+
"model_hub": "modelscope"
|
|
6601
|
+
},
|
|
6602
|
+
{
|
|
6603
|
+
"model_format": "mlx",
|
|
6604
|
+
"model_size_in_billions": 32,
|
|
6605
|
+
"quantizations": [
|
|
6606
|
+
"2bit",
|
|
6607
|
+
"3bit",
|
|
6608
|
+
"4bit",
|
|
6609
|
+
"6bit",
|
|
6610
|
+
"8bit"
|
|
6611
|
+
],
|
|
6612
|
+
"model_id": "okwinds/DeepSeek-R1-Distill-Qwen-32B-MLX-{quantization}",
|
|
6613
|
+
"model_hub": "modelscope"
|
|
6614
|
+
}
|
|
6615
|
+
],
|
|
6616
|
+
"chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='') %}{%- for message in messages %}{%- if message['role'] == 'system' %}{% set ns.system_prompt = message['content'] %}{%- endif %}{%- endfor %}{{bos_token}}{{ns.system_prompt}}{%- for message in messages %}{%- if message['role'] == 'user' %}{%- set ns.is_tool = false -%}{{'<|User|>' + message['content']}}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is none %}{%- set ns.is_tool = false -%}{%- for tool in message['tool_calls']%}{%- if not ns.is_first %}{{'<|Assistant|><|tool▁calls▁begin|><|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<|tool▁call▁end|>'}}{%- set ns.is_first = true -%}{%- else %}{{'\\n' + '<|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<|tool▁call▁end|>'}}{{'<|tool▁calls▁end|><|end▁of▁sentence|>'}}{%- endif %}{%- endfor %}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is not none %}{%- if ns.is_tool %}{{'<|tool▁outputs▁end|>' + message['content'] + '<|end▁of▁sentence|>'}}{%- set ns.is_tool = false -%}{%- else %}{% set content = message['content'] %}{% if '</think>' in content %}{% set content = content.split('</think>')[-1] %}{% endif %}{{'<|Assistant|>' + content + '<|end▁of▁sentence|>'}}{%- endif %}{%- endif %}{%- if message['role'] == 'tool' %}{%- set ns.is_tool = true -%}{%- if ns.is_output_first %}{{'<|tool▁outputs▁begin|><|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- set ns.is_output_first = false %}{%- else %}{{'\\n<|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- endif %}{%- endif %}{%- endfor -%}{% if ns.is_tool %}{{'<|tool▁outputs▁end|>'}}{% endif %}{% if add_generation_prompt and not ns.is_tool %}{{'<|Assistant|>'}}{% endif %}",
|
|
6617
|
+
"stop_token_ids": [
|
|
6618
|
+
151643
|
|
6619
|
+
],
|
|
6620
|
+
"stop": [
|
|
6621
|
+
"<|end▁of▁sentence|>"
|
|
6622
|
+
]
|
|
6623
|
+
},
|
|
6624
|
+
{
|
|
6625
|
+
"version": 1,
|
|
6626
|
+
"context_length": 8192,
|
|
6627
|
+
"model_name": "glm-edge-chat",
|
|
6628
|
+
"model_lang": [
|
|
6629
|
+
"en",
|
|
6630
|
+
"zh"
|
|
6631
|
+
],
|
|
6632
|
+
"model_ability": [
|
|
6633
|
+
"chat"
|
|
6634
|
+
],
|
|
6635
|
+
"model_description": "The GLM-Edge series is our attempt to face the end-side real-life scenarios, which consists of two sizes of large-language dialogue models and multimodal comprehension models (GLM-Edge-1.5B-Chat, GLM-Edge-4B-Chat, GLM-Edge-V-2B, GLM-Edge-V-5B). Among them, the 1.5B / 2B model is mainly for platforms such as mobile phones and cars, and the 4B / 5B model is mainly for platforms such as PCs.",
|
|
6636
|
+
"model_specs": [
|
|
6637
|
+
{
|
|
6638
|
+
"model_format": "pytorch",
|
|
6639
|
+
"model_size_in_billions": "1_5",
|
|
6640
|
+
"quantizations": [
|
|
6641
|
+
"4-bit",
|
|
6642
|
+
"8-bit",
|
|
6643
|
+
"none"
|
|
6644
|
+
],
|
|
6645
|
+
"model_id": "ZhipuAI/glm-edge-1.5b-chat",
|
|
6646
|
+
"model_hub": "modelscope"
|
|
6647
|
+
},
|
|
6648
|
+
{
|
|
6649
|
+
"model_format": "pytorch",
|
|
6650
|
+
"model_size_in_billions": "4",
|
|
6651
|
+
"quantizations": [
|
|
6652
|
+
"4-bit",
|
|
6653
|
+
"8-bit",
|
|
6654
|
+
"none"
|
|
6655
|
+
],
|
|
6656
|
+
"model_id": "ZhipuAI/glm-edge-4b-chat",
|
|
6657
|
+
"model_hub": "modelscope"
|
|
6658
|
+
},
|
|
6659
|
+
{
|
|
6660
|
+
"model_format": "ggufv2",
|
|
6661
|
+
"model_size_in_billions": "1_5",
|
|
6662
|
+
"quantizations": [
|
|
6663
|
+
"Q4_0",
|
|
6664
|
+
"Q4_1",
|
|
6665
|
+
"Q4_K",
|
|
6666
|
+
"Q4_K_M",
|
|
6667
|
+
"Q4_K_S",
|
|
6668
|
+
"Q5_0",
|
|
6669
|
+
"Q5_1",
|
|
6670
|
+
"Q5_K",
|
|
6671
|
+
"Q5_K_M",
|
|
6672
|
+
"Q5_K_S",
|
|
6673
|
+
"Q6_K",
|
|
6674
|
+
"Q8_0"
|
|
6675
|
+
],
|
|
6676
|
+
"model_file_name_template": "ggml-model-{quantization}.gguf",
|
|
6677
|
+
"model_hub": "modelscope",
|
|
6678
|
+
"model_id": "ZhipuAI/glm-edge-1.5b-chat-gguf"
|
|
6679
|
+
},
|
|
6680
|
+
{
|
|
6681
|
+
"model_format": "ggufv2",
|
|
6682
|
+
"model_size_in_billions": "1_5",
|
|
6683
|
+
"quantizations": [
|
|
6684
|
+
"F16"
|
|
6685
|
+
],
|
|
6686
|
+
"model_file_name_template": "glm-edge-1.5B-chat-{quantization}.gguf",
|
|
6687
|
+
"model_hub": "modelscope",
|
|
6688
|
+
"model_id": "ZhipuAI/glm-edge-1.5b-chat-gguf"
|
|
6689
|
+
},
|
|
6690
|
+
{
|
|
6691
|
+
"model_format": "ggufv2",
|
|
6692
|
+
"model_size_in_billions": "4",
|
|
6693
|
+
"quantizations": [
|
|
6694
|
+
"Q4_0",
|
|
6695
|
+
"Q4_1",
|
|
6696
|
+
"Q4_K",
|
|
6697
|
+
"Q4_K_M",
|
|
6698
|
+
"Q4_K_S",
|
|
6699
|
+
"Q5_0",
|
|
6700
|
+
"Q5_1",
|
|
6701
|
+
"Q5_K",
|
|
6702
|
+
"Q5_K_M",
|
|
6703
|
+
"Q5_K_S",
|
|
6704
|
+
"Q6_K",
|
|
6705
|
+
"Q8_0"
|
|
6706
|
+
],
|
|
6707
|
+
"model_file_name_template": "ggml-model-{quantization}.gguf",
|
|
6708
|
+
"model_hub": "modelscope",
|
|
6709
|
+
"model_id": "ZhipuAI/glm-edge-4b-chat-gguf"
|
|
6710
|
+
},
|
|
6711
|
+
{
|
|
6712
|
+
"model_format": "ggufv2",
|
|
6713
|
+
"model_size_in_billions": "4",
|
|
6714
|
+
"quantizations": [
|
|
6715
|
+
"F16"
|
|
6716
|
+
],
|
|
6717
|
+
"model_file_name_template": "glm-edge-4B-chat-{quantization}.gguf",
|
|
6718
|
+
"model_hub": "modelscope",
|
|
6719
|
+
"model_id": "ZhipuAI/glm-edge-4b-chat-gguf"
|
|
6720
|
+
}
|
|
6721
|
+
],
|
|
6722
|
+
"chat_template": "{% for item in messages %}{% if item['role'] == 'system' %}<|system|>\n{{ item['content'] }}{% elif item['role'] == 'user' %}<|user|>\n{{ item['content'] }}{% elif item['role'] == 'assistant' %}<|assistant|>\n{{ item['content'] }}{% endif %}{% endfor %}{% if add_generation_prompt %}<|assistant|>\n{% endif %}",
|
|
6723
|
+
"stop_token_ids": [
|
|
6724
|
+
59246,
|
|
6725
|
+
59253,
|
|
6726
|
+
59255
|
|
6727
|
+
],
|
|
6728
|
+
"stop": [
|
|
6729
|
+
"<|endoftext|>",
|
|
6730
|
+
"<|user|>",
|
|
6731
|
+
"<|observation|>"
|
|
6732
|
+
]
|
|
6733
|
+
},
|
|
6734
|
+
{
|
|
6735
|
+
"version": 1,
|
|
6736
|
+
"context_length": 8192,
|
|
6737
|
+
"model_name": "glm-edge-v",
|
|
6738
|
+
"model_lang": [
|
|
6739
|
+
"en",
|
|
6740
|
+
"zh"
|
|
6741
|
+
],
|
|
6742
|
+
"model_ability": [
|
|
6743
|
+
"chat",
|
|
6744
|
+
"vision"
|
|
6745
|
+
],
|
|
6746
|
+
"model_description": "The GLM-Edge series is our attempt to face the end-side real-life scenarios, which consists of two sizes of large-language dialogue models and multimodal comprehension models (GLM-Edge-1.5B-Chat, GLM-Edge-4B-Chat, GLM-Edge-V-2B, GLM-Edge-V-5B). Among them, the 1.5B / 2B model is mainly for platforms such as mobile phones and cars, and the 4B / 5B model is mainly for platforms such as PCs.",
|
|
6747
|
+
"model_specs": [
|
|
6748
|
+
{
|
|
6749
|
+
"model_format": "pytorch",
|
|
6750
|
+
"model_size_in_billions": "2",
|
|
6751
|
+
"quantizations": [
|
|
6752
|
+
"4-bit",
|
|
6753
|
+
"8-bit",
|
|
6754
|
+
"none"
|
|
6755
|
+
],
|
|
6756
|
+
"model_id": "ZhipuAI/glm-edge-v-2b",
|
|
6757
|
+
"model_hub": "modelscope"
|
|
6758
|
+
},
|
|
6759
|
+
{
|
|
6760
|
+
"model_format": "pytorch",
|
|
6761
|
+
"model_size_in_billions": "5",
|
|
6762
|
+
"quantizations": [
|
|
6763
|
+
"4-bit",
|
|
6764
|
+
"8-bit",
|
|
6765
|
+
"none"
|
|
6766
|
+
],
|
|
6767
|
+
"model_id": "ZhipuAI/glm-edge-v-5b",
|
|
6768
|
+
"model_hub": "modelscope"
|
|
6769
|
+
},
|
|
6770
|
+
{
|
|
6771
|
+
"model_format": "ggufv2",
|
|
6772
|
+
"model_size_in_billions": "2",
|
|
6773
|
+
"quantizations": [
|
|
6774
|
+
"Q4_0",
|
|
6775
|
+
"Q4_1",
|
|
6776
|
+
"Q4_K",
|
|
6777
|
+
"Q4_K_M",
|
|
6778
|
+
"Q4_K_S",
|
|
6779
|
+
"Q5_0",
|
|
6780
|
+
"Q5_1",
|
|
6781
|
+
"Q5_K",
|
|
6782
|
+
"Q5_K_M",
|
|
6783
|
+
"Q5_K_S",
|
|
6784
|
+
"Q6_K",
|
|
6785
|
+
"Q8_0"
|
|
6786
|
+
],
|
|
6787
|
+
"model_file_name_template": "ggml-model-{quantization}.gguf",
|
|
6788
|
+
"model_hub": "modelscope",
|
|
6789
|
+
"model_id": "ZhipuAI/glm-edge-v-2b-gguf"
|
|
6790
|
+
},
|
|
6791
|
+
{
|
|
6792
|
+
"model_format": "ggufv2",
|
|
6793
|
+
"model_size_in_billions": "2",
|
|
6794
|
+
"quantizations": [
|
|
6795
|
+
"F16"
|
|
6796
|
+
],
|
|
6797
|
+
"model_file_name_template": "glm-edge-v-2B-{quantization}.gguf",
|
|
6798
|
+
"model_hub": "modelscope",
|
|
6799
|
+
"model_id": "ZhipuAI/glm-edge-v-2b-gguf"
|
|
6800
|
+
},
|
|
6801
|
+
{
|
|
6802
|
+
"model_format": "ggufv2",
|
|
6803
|
+
"model_size_in_billions": "2",
|
|
6804
|
+
"quantizations": [
|
|
6805
|
+
"f16"
|
|
6806
|
+
],
|
|
6807
|
+
"model_file_name_template": "mmproj-model-{quantization}.gguf",
|
|
6808
|
+
"model_hub": "modelscope",
|
|
6809
|
+
"model_id": "ZhipuAI/glm-edge-v-2b-gguf"
|
|
6810
|
+
},
|
|
6811
|
+
{
|
|
6812
|
+
"model_format": "ggufv2",
|
|
6813
|
+
"model_size_in_billions": "5",
|
|
6814
|
+
"quantizations": [
|
|
6815
|
+
"Q4_0",
|
|
6816
|
+
"Q4_1",
|
|
6817
|
+
"Q4_K",
|
|
6818
|
+
"Q4_K_M",
|
|
6819
|
+
"Q4_K_S",
|
|
6820
|
+
"Q5_0",
|
|
6821
|
+
"Q5_1",
|
|
6822
|
+
"Q5_K",
|
|
6823
|
+
"Q5_K_M",
|
|
6824
|
+
"Q5_K_S",
|
|
6825
|
+
"Q6_K",
|
|
6826
|
+
"Q8_0"
|
|
6827
|
+
],
|
|
6828
|
+
"model_file_name_template": "ggml-model-{quantization}.gguf",
|
|
6829
|
+
"model_hub": "modelscope",
|
|
6830
|
+
"model_id": "ZhipuAI/glm-edge-v-5b-gguf"
|
|
6831
|
+
},
|
|
6832
|
+
{
|
|
6833
|
+
"model_format": "ggufv2",
|
|
6834
|
+
"model_size_in_billions": "5",
|
|
6835
|
+
"quantizations": [
|
|
6836
|
+
"F16"
|
|
6837
|
+
],
|
|
6838
|
+
"model_file_name_template": "glm-edge-v-5B-{quantization}.gguf",
|
|
6839
|
+
"model_hub": "modelscope",
|
|
6840
|
+
"model_id": "ZhipuAI/glm-edge-v-5b-gguf"
|
|
6841
|
+
},
|
|
6842
|
+
{
|
|
6843
|
+
"model_format": "ggufv2",
|
|
6844
|
+
"model_size_in_billions": "5",
|
|
6845
|
+
"quantizations": [
|
|
6846
|
+
"f16"
|
|
6847
|
+
],
|
|
6848
|
+
"model_file_name_template": "mmproj-model-{quantization}.gguf",
|
|
6849
|
+
"model_hub": "modelscope",
|
|
6850
|
+
"model_id": "ZhipuAI/glm-edge-v-5b-gguf"
|
|
6851
|
+
}
|
|
6852
|
+
],
|
|
6853
|
+
"chat_template": "{% for item in messages %}{% if item['role'] != 'system' %}<|{{ item['role'] }}|>\n{% for content in item['content'] %}{% if content['type'] == 'image' %}{% for _ in range(578) %}<|begin_of_image|>{% endfor %}{% elif content['type'] == 'text' %}{{ content['text'] }}{% endif %}{% endfor %}\n{% endif %}{% endfor %}{% if add_generation_prompt %}<|assistant|>\n{% endif %}",
|
|
6854
|
+
"stop_token_ids": [
|
|
6855
|
+
59246,
|
|
6856
|
+
59253,
|
|
6857
|
+
59255
|
|
6858
|
+
],
|
|
6859
|
+
"stop": [
|
|
6860
|
+
"<|endoftext|>",
|
|
6861
|
+
"<|user|>",
|
|
6862
|
+
"<|observation|>"
|
|
6863
|
+
]
|
|
6864
|
+
},
|
|
6865
|
+
{
|
|
6866
|
+
"version": 1,
|
|
6867
|
+
"context_length": 32768,
|
|
6868
|
+
"model_name": "QvQ-72B-Preview",
|
|
6869
|
+
"model_lang": [
|
|
6870
|
+
"en",
|
|
6871
|
+
"zh"
|
|
6872
|
+
],
|
|
6873
|
+
"model_ability": [
|
|
6874
|
+
"chat",
|
|
6875
|
+
"vision"
|
|
6876
|
+
],
|
|
6877
|
+
"model_description": "QVQ-72B-Preview is an experimental research model developed by the Qwen team, focusing on enhancing visual reasoning capabilities.",
|
|
6878
|
+
"model_specs": [
|
|
6879
|
+
{
|
|
6880
|
+
"model_format": "pytorch",
|
|
6881
|
+
"model_size_in_billions": 72,
|
|
6882
|
+
"quantizations": [
|
|
6883
|
+
"4-bit",
|
|
6884
|
+
"8-bit",
|
|
6885
|
+
"none"
|
|
6886
|
+
],
|
|
6887
|
+
"model_id": "Qwen/QVQ-72B-Preview",
|
|
6888
|
+
"model_hub": "modelscope"
|
|
6889
|
+
},
|
|
6890
|
+
{
|
|
6891
|
+
"model_format": "mlx",
|
|
6892
|
+
"model_size_in_billions": 72,
|
|
6893
|
+
"quantizations": [
|
|
6894
|
+
"3bit",
|
|
6895
|
+
"4bit",
|
|
6896
|
+
"6bit",
|
|
6897
|
+
"8bit",
|
|
6898
|
+
"bf16"
|
|
6899
|
+
],
|
|
6900
|
+
"model_id": "mlx-community/QVQ-72B-Preview-{quantization}",
|
|
6901
|
+
"model_hub": "modelscope"
|
|
6902
|
+
}
|
|
6903
|
+
],
|
|
6904
|
+
"chat_template": "{% set image_count = namespace(value=0) %}{% set video_count = namespace(value=0) %}{% for message in messages %}{% if loop.first and message['role'] != 'system' %}<|im_start|>system\nYou are a helpful and harmless assistant. You are Qwen developed by Alibaba. You should think step-by-step.<|im_end|>\n{% endif %}<|im_start|>{{ message['role'] }}\n{% if message['content'] is string %}{{ message['content'] }}<|im_end|>\n{% else %}{% for content in message['content'] %}{% if content['type'] == 'image' or 'image' in content or 'image_url' in content %}{% set image_count.value = image_count.value + 1 %}{% if add_vision_id %}Picture {{ image_count.value }}: {% endif %}<|vision_start|><|image_pad|><|vision_end|>{% elif content['type'] == 'video' or 'video' in content %}{% set video_count.value = video_count.value + 1 %}{% if add_vision_id %}Video {{ video_count.value }}: {% endif %}<|vision_start|><|video_pad|><|vision_end|>{% elif 'text' in content %}{{ content['text'] }}{% endif %}{% endfor %}<|im_end|>\n{% endif %}{% endfor %}{% if add_generation_prompt %}<|im_start|>assistant\n{% endif %}",
|
|
6905
|
+
"stop_token_ids": [
|
|
6906
|
+
151645,
|
|
6907
|
+
151643
|
|
6908
|
+
],
|
|
6909
|
+
"stop": [
|
|
6910
|
+
"<|im_end|>",
|
|
6911
|
+
"<|endoftext|>"
|
|
6912
|
+
]
|
|
6913
|
+
},
|
|
6914
|
+
{
|
|
6915
|
+
"version": 1,
|
|
6916
|
+
"context_length": 32768,
|
|
6917
|
+
"model_name": "marco-o1",
|
|
6918
|
+
"model_lang": [
|
|
6919
|
+
"en",
|
|
6920
|
+
"zh"
|
|
6921
|
+
],
|
|
6922
|
+
"model_ability": [
|
|
6923
|
+
"chat",
|
|
6924
|
+
"tools"
|
|
6925
|
+
],
|
|
6926
|
+
"model_description": "Marco-o1: Towards Open Reasoning Models for Open-Ended Solutions",
|
|
6927
|
+
"model_specs": [
|
|
6928
|
+
{
|
|
6929
|
+
"model_format": "pytorch",
|
|
6930
|
+
"model_size_in_billions": 7,
|
|
6931
|
+
"quantizations": [
|
|
6932
|
+
"4-bit",
|
|
6933
|
+
"8-bit",
|
|
6934
|
+
"none"
|
|
6935
|
+
],
|
|
6936
|
+
"model_id": "AIDC-AI/Marco-o1",
|
|
6937
|
+
"model_hub": "modelscope"
|
|
6938
|
+
},
|
|
6939
|
+
{
|
|
6940
|
+
"model_format": "ggufv2",
|
|
6941
|
+
"model_size_in_billions": 7,
|
|
6942
|
+
"quantizations": [
|
|
6943
|
+
"Q2_K",
|
|
6944
|
+
"Q3_K_L",
|
|
6945
|
+
"Q3_K_M",
|
|
6946
|
+
"Q3_K_S",
|
|
6947
|
+
"Q4_0",
|
|
6948
|
+
"Q4_1",
|
|
6949
|
+
"Q4_K_M",
|
|
6950
|
+
"Q4_K_S",
|
|
6951
|
+
"Q5_0",
|
|
6952
|
+
"Q5_1",
|
|
6953
|
+
"Q5_K_M",
|
|
6954
|
+
"Q5_K_S",
|
|
6955
|
+
"Q6_K",
|
|
6956
|
+
"Q8_0"
|
|
6957
|
+
],
|
|
6958
|
+
"model_file_name_template": "Marco-o1.{quantization}.gguf",
|
|
6959
|
+
"model_hub": "modelscope",
|
|
6960
|
+
"model_id": "QuantFactory/Marco-o1-GGUF"
|
|
6961
|
+
}
|
|
6962
|
+
],
|
|
6963
|
+
"chat_template": "{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system\n\n你是一个经过良好训练的AI助手,你的名字是Marco-o1.由阿里国际数字商业集团的AI Business创造.\n \n## 重要!!!!!\n当你回答问题时,你的思考应该在<Thought>内完成,<Output>内输出你的结果。\n<Thought>应该尽可能是英文,但是有2个特例,一个是对原文中的引用,另一个是是数学应该使用markdown格式,<Output>内的输出需要遵循用户输入的语言。\n <|im_end|>\n' }}{% endif %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
|
|
6964
|
+
"stop_token_ids": [
|
|
6965
|
+
151643,
|
|
6966
|
+
151644,
|
|
6967
|
+
151645
|
|
6053
6968
|
],
|
|
6054
6969
|
"stop": [
|
|
6055
6970
|
"<|endoftext|>",
|
|
6056
6971
|
"<|im_start|>",
|
|
6057
6972
|
"<|im_end|>"
|
|
6058
6973
|
]
|
|
6974
|
+
},
|
|
6975
|
+
{
|
|
6976
|
+
"version": 1,
|
|
6977
|
+
"context_length": 4096,
|
|
6978
|
+
"model_name": "cogagent",
|
|
6979
|
+
"model_lang": [
|
|
6980
|
+
"en",
|
|
6981
|
+
"zh"
|
|
6982
|
+
],
|
|
6983
|
+
"model_ability": [
|
|
6984
|
+
"chat",
|
|
6985
|
+
"vision"
|
|
6986
|
+
],
|
|
6987
|
+
"model_description": "The CogAgent-9B-20241220 model is based on GLM-4V-9B, a bilingual open-source VLM base model. Through data collection and optimization, multi-stage training, and strategy improvements, CogAgent-9B-20241220 achieves significant advancements in GUI perception, inference prediction accuracy, action space completeness, and task generalizability. ",
|
|
6988
|
+
"model_specs": [
|
|
6989
|
+
{
|
|
6990
|
+
"model_format": "pytorch",
|
|
6991
|
+
"model_size_in_billions": "9",
|
|
6992
|
+
"quantizations": [
|
|
6993
|
+
"4-bit",
|
|
6994
|
+
"8-bit",
|
|
6995
|
+
"none"
|
|
6996
|
+
],
|
|
6997
|
+
"model_id": "ZhipuAI/cogagent-9b-20241220",
|
|
6998
|
+
"model_hub": "modelscope"
|
|
6999
|
+
}
|
|
7000
|
+
],
|
|
7001
|
+
"chat_template": "{% for message in messages %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
|
|
7002
|
+
"stop_token_ids": [
|
|
7003
|
+
151329,
|
|
7004
|
+
151336,
|
|
7005
|
+
151338
|
|
7006
|
+
],
|
|
7007
|
+
"stop": [
|
|
7008
|
+
"<|endoftext|>",
|
|
7009
|
+
"<|user|>",
|
|
7010
|
+
"<|observation|>"
|
|
7011
|
+
]
|
|
6059
7012
|
}
|
|
6060
7013
|
]
|