xinference 1.2.2__py3-none-any.whl → 1.3.0.post1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of xinference might be problematic. Click here for more details.

Files changed (68) hide show
  1. xinference/_version.py +3 -3
  2. xinference/client/restful/restful_client.py +9 -1
  3. xinference/core/model.py +19 -0
  4. xinference/core/resource.py +7 -1
  5. xinference/core/status_guard.py +1 -0
  6. xinference/core/supervisor.py +228 -19
  7. xinference/core/utils.py +1 -29
  8. xinference/core/worker.py +28 -2
  9. xinference/deploy/cmdline.py +33 -3
  10. xinference/deploy/test/test_cmdline.py +32 -0
  11. xinference/device_utils.py +43 -1
  12. xinference/model/audio/kokoro.py +19 -36
  13. xinference/model/audio/model_spec.json +1 -1
  14. xinference/model/image/stable_diffusion/core.py +15 -6
  15. xinference/model/llm/llm_family.json +521 -6
  16. xinference/model/llm/llm_family.py +3 -1
  17. xinference/model/llm/llm_family_modelscope.json +559 -6
  18. xinference/model/llm/reasoning_parsers/__init__.py +13 -0
  19. xinference/model/llm/reasoning_parsers/abs_reasoning_parsers.py +98 -0
  20. xinference/model/llm/reasoning_parsers/deepseek_r1_reasoning_parser.py +140 -0
  21. xinference/model/llm/sglang/core.py +99 -11
  22. xinference/model/llm/transformers/intern_vl.py +23 -14
  23. xinference/model/llm/utils.py +55 -18
  24. xinference/model/llm/vllm/core.py +23 -2
  25. xinference/model/llm/vllm/xavier/executor.py +2 -2
  26. xinference/model/llm/vllm/xavier/scheduler.py +3 -3
  27. xinference/thirdparty/internvl/conversation.py +26 -17
  28. xinference/types.py +2 -0
  29. xinference/web/ui/build/asset-manifest.json +6 -6
  30. xinference/web/ui/build/index.html +1 -1
  31. xinference/web/ui/build/static/css/main.f8177338.css +2 -0
  32. xinference/web/ui/build/static/css/main.f8177338.css.map +1 -0
  33. xinference/web/ui/build/static/js/main.ad42919c.js +3 -0
  34. xinference/web/ui/build/static/js/main.ad42919c.js.map +1 -0
  35. xinference/web/ui/node_modules/.cache/babel-loader/074a42304bbbaa79e1bfc3b28502457a390df55708de9006f4cc8e35c60aea87.json +1 -0
  36. xinference/web/ui/node_modules/.cache/babel-loader/0acb065326560592b10888234242f94f67efe28458b90f273d4d4fba9daa0cd2.json +1 -0
  37. xinference/web/ui/node_modules/.cache/babel-loader/279ace390216236a82b3d8995c78eca4d637ac9a523e9f521a2d9c76607a43d7.json +1 -0
  38. xinference/web/ui/node_modules/.cache/babel-loader/630a7bd592596cc6e291fc32238ce7c08238038a64ed8ccee0eb0c13c9902910.json +1 -0
  39. xinference/web/ui/node_modules/.cache/babel-loader/6cb9f6c62ab4042f0b11c5d75e51187188e9d6f5f08b1d63e796e051bafdb457.json +1 -0
  40. xinference/web/ui/node_modules/.cache/babel-loader/8f9af2979e45d4648f0cfae108363e58ee421c29a9d4e7329b6f06d9adfd4133.json +1 -0
  41. xinference/web/ui/node_modules/.cache/babel-loader/914c33e91c1012e3bcd3e96f3a25884cbef148290632d0266dab972b8cc1e95f.json +1 -0
  42. xinference/web/ui/node_modules/.cache/babel-loader/9c8b1a86e7c65b2b2599a205e30920652d6c2105f926508ef5bcf29a3ef4ce76.json +1 -0
  43. xinference/web/ui/node_modules/.cache/babel-loader/b7939cd3a48adf12fccfdd0803019b5cc235ff7de3a297dae70ce635e0eea13e.json +1 -0
  44. xinference/web/ui/node_modules/.cache/babel-loader/efe7cd132c27a8f9fd5352a394c491fd5fb0da0348cf9fcbd923164a32365eab.json +1 -0
  45. xinference/web/ui/node_modules/.cache/babel-loader/f04f666b77b44d7be3e16034d6b0074de2ba9c254f1fae15222b3148608fa8b3.json +1 -0
  46. xinference/web/ui/node_modules/.cache/babel-loader/fecf076bcd198a458c2a6ab0e85e40dc1c99994c353164e79c469be162cb74c9.json +1 -0
  47. xinference/web/ui/src/locales/en.json +14 -1
  48. xinference/web/ui/src/locales/zh.json +14 -1
  49. {xinference-1.2.2.dist-info → xinference-1.3.0.post1.dist-info}/METADATA +11 -11
  50. {xinference-1.2.2.dist-info → xinference-1.3.0.post1.dist-info}/RECORD +55 -49
  51. xinference/web/ui/build/static/css/main.51a587ff.css +0 -2
  52. xinference/web/ui/build/static/css/main.51a587ff.css.map +0 -1
  53. xinference/web/ui/build/static/js/main.b0936c54.js +0 -3
  54. xinference/web/ui/build/static/js/main.b0936c54.js.map +0 -1
  55. xinference/web/ui/node_modules/.cache/babel-loader/0c2fb5375667931c4a331c99e0d87dc145e8f327cea3f44d6e56f54c7c1d4020.json +0 -1
  56. xinference/web/ui/node_modules/.cache/babel-loader/185ceb8872d562e032b47e79df6a45670e06345b8ed70aad1a131e0476783c5c.json +0 -1
  57. xinference/web/ui/node_modules/.cache/babel-loader/3eefb411b24c2b3ce053570ef50daccf154022f0e168be5ed0fec21394baf9f4.json +0 -1
  58. xinference/web/ui/node_modules/.cache/babel-loader/63c8e07687ea53a4f8a910ee5e42e0eb26cd1acbfbe820f3e3248a786ee51401.json +0 -1
  59. xinference/web/ui/node_modules/.cache/babel-loader/a3ff866acddf34917a7ee399e0e571a4dfd8ba66d5057db885f243e16a6eb17d.json +0 -1
  60. xinference/web/ui/node_modules/.cache/babel-loader/a7f1a71f6580dfe810c685a9c1d68e318f71e1fa258fbe50b87a6ac37cc0a598.json +0 -1
  61. xinference/web/ui/node_modules/.cache/babel-loader/bdee44abeadc4abc17d41c52eb49c6e19a4b1a267b6e16876ce91bdeeebfc52d.json +0 -1
  62. xinference/web/ui/node_modules/.cache/babel-loader/d7664d18c4ddbad9c3a6a31b91f7c00fb0dde804608674a9860ee50f33e54708.json +0 -1
  63. xinference/web/ui/node_modules/.cache/babel-loader/ed57202cb79649bb716400436590245547df241988fc7c8e1d85d132299542d2.json +0 -1
  64. /xinference/web/ui/build/static/js/{main.b0936c54.js.LICENSE.txt → main.ad42919c.js.LICENSE.txt} +0 -0
  65. {xinference-1.2.2.dist-info → xinference-1.3.0.post1.dist-info}/LICENSE +0 -0
  66. {xinference-1.2.2.dist-info → xinference-1.3.0.post1.dist-info}/WHEEL +0 -0
  67. {xinference-1.2.2.dist-info → xinference-1.3.0.post1.dist-info}/entry_points.txt +0 -0
  68. {xinference-1.2.2.dist-info → xinference-1.3.0.post1.dist-info}/top_level.txt +0 -0
@@ -6772,6 +6772,151 @@
6772
6772
  "stop_token_ids": [],
6773
6773
  "stop": []
6774
6774
  },
6775
+ {
6776
+ "version": 1,
6777
+ "context_length": 16384,
6778
+ "model_name": "InternVL2.5",
6779
+ "model_lang": [
6780
+ "en",
6781
+ "zh"
6782
+ ],
6783
+ "model_ability": [
6784
+ "chat",
6785
+ "vision"
6786
+ ],
6787
+ "model_description": "InternVL 2.5 is an open-source multimodal large language model (MLLM) to bridge the capability gap between open-source and proprietary commercial models in multimodal understanding. ",
6788
+ "model_specs": [
6789
+ {
6790
+ "model_format": "pytorch",
6791
+ "model_size_in_billions": 1,
6792
+ "quantizations": [
6793
+ "4-bit",
6794
+ "8-bit",
6795
+ "none"
6796
+ ],
6797
+ "model_id": "OpenGVLab/InternVL2_5-1B"
6798
+ },
6799
+ {
6800
+ "model_format": "awq",
6801
+ "model_size_in_billions": 1,
6802
+ "quantizations": [
6803
+ "Int4"
6804
+ ],
6805
+ "model_id": "OpenGVLab/InternVL2_5-1B-AWQ"
6806
+ },
6807
+ {
6808
+ "model_format": "pytorch",
6809
+ "model_size_in_billions": 2,
6810
+ "quantizations": [
6811
+ "4-bit",
6812
+ "8-bit",
6813
+ "none"
6814
+ ],
6815
+ "model_id": "OpenGVLab/InternVL2_5-2B"
6816
+ },
6817
+ {
6818
+ "model_format": "awq",
6819
+ "model_size_in_billions": 2,
6820
+ "quantizations": [
6821
+ "Int4"
6822
+ ],
6823
+ "model_id": "OpenGVLab/InternVL2_5-2B-AWQ"
6824
+ },
6825
+ {
6826
+ "model_format": "pytorch",
6827
+ "model_size_in_billions": 4,
6828
+ "quantizations": [
6829
+ "4-bit",
6830
+ "8-bit",
6831
+ "none"
6832
+ ],
6833
+ "model_id": "OpenGVLab/InternVL2_5-4B"
6834
+ },
6835
+ {
6836
+ "model_format": "awq",
6837
+ "model_size_in_billions": 4,
6838
+ "quantizations": [
6839
+ "Int4"
6840
+ ],
6841
+ "model_id": "OpenGVLab/InternVL2_5-4B-AWQ"
6842
+ },
6843
+ {
6844
+ "model_format": "pytorch",
6845
+ "model_size_in_billions": 8,
6846
+ "quantizations": [
6847
+ "4-bit",
6848
+ "8-bit",
6849
+ "none"
6850
+ ],
6851
+ "model_id": "OpenGVLab/InternVL2_5-8B"
6852
+ },
6853
+ {
6854
+ "model_format": "awq",
6855
+ "model_size_in_billions": 8,
6856
+ "quantizations": [
6857
+ "Int4"
6858
+ ],
6859
+ "model_id": "OpenGVLab/InternVL2_5-8B-AWQ"
6860
+ },
6861
+ {
6862
+ "model_format": "pytorch",
6863
+ "model_size_in_billions": 26,
6864
+ "quantizations": [
6865
+ "4-bit",
6866
+ "8-bit",
6867
+ "none"
6868
+ ],
6869
+ "model_id": "OpenGVLab/InternVL2_5-26B"
6870
+ },
6871
+ {
6872
+ "model_format": "awq",
6873
+ "model_size_in_billions": 26,
6874
+ "quantizations": [
6875
+ "Int4"
6876
+ ],
6877
+ "model_id": "OpenGVLab/InternVL2_5-26B-AWQ"
6878
+ },
6879
+ {
6880
+ "model_format": "pytorch",
6881
+ "model_size_in_billions": 38,
6882
+ "quantizations": [
6883
+ "4-bit",
6884
+ "8-bit",
6885
+ "none"
6886
+ ],
6887
+ "model_id": "OpenGVLab/InternVL2_5-38B"
6888
+ },
6889
+ {
6890
+ "model_format": "awq",
6891
+ "model_size_in_billions": 38,
6892
+ "quantizations": [
6893
+ "Int4"
6894
+ ],
6895
+ "model_id": "OpenGVLab/InternVL2_5-38B-AWQ"
6896
+ },
6897
+ {
6898
+ "model_format": "pytorch",
6899
+ "model_size_in_billions": 78,
6900
+ "quantizations": [
6901
+ "4-bit",
6902
+ "8-bit",
6903
+ "none"
6904
+ ],
6905
+ "model_id": "OpenGVLab/InternVL2_5-78B"
6906
+ },
6907
+ {
6908
+ "model_format": "awq",
6909
+ "model_size_in_billions": 78,
6910
+ "quantizations": [
6911
+ "Int4"
6912
+ ],
6913
+ "model_id": "OpenGVLab/InternVL2_5-78B-AWQ"
6914
+ }
6915
+ ],
6916
+ "chat_template": "{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n' }}{% endif %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
6917
+ "stop_token_ids": [],
6918
+ "stop": []
6919
+ },
6775
6920
  {
6776
6921
  "version": 1,
6777
6922
  "context_length": 8192,
@@ -7472,6 +7617,370 @@
7472
7617
  "<|end▁of▁sentence|>"
7473
7618
  ]
7474
7619
  },
7620
+ {
7621
+ "version": 1,
7622
+ "context_length": 163840,
7623
+ "model_name": "deepseek-v3",
7624
+ "model_lang": [
7625
+ "en",
7626
+ "zh"
7627
+ ],
7628
+ "model_ability": [
7629
+ "chat"
7630
+ ],
7631
+ "model_description": "DeepSeek-V3, a strong Mixture-of-Experts (MoE) language model with 671B total parameters with 37B activated for each token. ",
7632
+ "model_specs": [
7633
+ {
7634
+ "model_format": "pytorch",
7635
+ "model_size_in_billions": 671,
7636
+ "quantizations": [
7637
+ "4-bit",
7638
+ "8-bit",
7639
+ "none"
7640
+ ],
7641
+ "model_id": "deepseek-ai/DeepSeek-V3",
7642
+ "model_revision": "1d044fd82b15f1cedb197a288e50cc96a2c27205"
7643
+ },
7644
+ {
7645
+ "model_format": "awq",
7646
+ "model_size_in_billions": 671,
7647
+ "quantizations": [
7648
+ "Int4"
7649
+ ],
7650
+ "model_id": "cognitivecomputations/DeepSeek-V3-AWQ"
7651
+ },
7652
+ {
7653
+ "model_format": "ggufv2",
7654
+ "model_size_in_billions": 671,
7655
+ "quantizations": [
7656
+ "Q2_K_L",
7657
+ "Q2_K_XS",
7658
+ "Q3_K_M",
7659
+ "Q4_K_M",
7660
+ "Q5_K_M",
7661
+ "Q6_K",
7662
+ "Q8_0"
7663
+ ],
7664
+ "model_id": "unsloth/DeepSeek-V3-GGUF",
7665
+ "model_file_name_template": "DeepSeek-V3-{quantization}/DeepSeek-V3-{quantization}.gguf",
7666
+ "model_file_name_split_template": "DeepSeek-V3-{quantization}/DeepSeek-V3-{quantization}-{part}.gguf",
7667
+ "quantization_parts": {
7668
+ "Q2_K_L": [
7669
+ "00001-of-00005",
7670
+ "00002-of-00005",
7671
+ "00003-of-00005",
7672
+ "00004-of-00005",
7673
+ "00005-of-00005"
7674
+ ],
7675
+ "Q2_K_XS": [
7676
+ "00001-of-00005",
7677
+ "00002-of-00005",
7678
+ "00003-of-00005",
7679
+ "00004-of-00005",
7680
+ "00005-of-00005"
7681
+ ],
7682
+ "Q3_K_M": [
7683
+ "00001-of-00007",
7684
+ "00002-of-00007",
7685
+ "00003-of-00007",
7686
+ "00004-of-00007",
7687
+ "00005-of-00007",
7688
+ "00006-of-00007",
7689
+ "00007-of-00007"
7690
+ ],
7691
+ "Q4_K_M": [
7692
+ "00001-of-00009",
7693
+ "00002-of-00009",
7694
+ "00003-of-00009",
7695
+ "00004-of-00009",
7696
+ "00005-of-00009",
7697
+ "00006-of-00009",
7698
+ "00007-of-00009",
7699
+ "00008-of-00009",
7700
+ "00009-of-00009"
7701
+ ],
7702
+ "Q5_K_M": [
7703
+ "00001-of-00010",
7704
+ "00002-of-00010",
7705
+ "00003-of-00010",
7706
+ "00004-of-00010",
7707
+ "00005-of-00010",
7708
+ "00006-of-00010",
7709
+ "00007-of-00010",
7710
+ "00008-of-00010",
7711
+ "00009-of-00010",
7712
+ "00010-of-00010"
7713
+ ],
7714
+ "Q6_K": [
7715
+ "00001-of-00012",
7716
+ "00002-of-00012",
7717
+ "00003-of-00012",
7718
+ "00004-of-00012",
7719
+ "00005-of-00012",
7720
+ "00006-of-00012",
7721
+ "00007-of-00012",
7722
+ "00008-of-00012",
7723
+ "00009-of-00012",
7724
+ "00010-of-00012",
7725
+ "00011-of-00012",
7726
+ "00012-of-00012"
7727
+ ],
7728
+ "Q8_0": [
7729
+ "00001-of-00016",
7730
+ "00002-of-00016",
7731
+ "00003-of-00016",
7732
+ "00004-of-00016",
7733
+ "00005-of-00016",
7734
+ "00006-of-00016",
7735
+ "00007-of-00016",
7736
+ "00008-of-00016",
7737
+ "00009-of-00016",
7738
+ "00010-of-00016",
7739
+ "00011-of-00016",
7740
+ "00012-of-00016",
7741
+ "00013-of-00016",
7742
+ "00014-of-00016",
7743
+ "00015-of-00016",
7744
+ "00016-of-00016"
7745
+ ]
7746
+ }
7747
+ },
7748
+ {
7749
+ "model_format": "mlx",
7750
+ "model_size_in_billions": 671,
7751
+ "quantizations": [
7752
+ "3bit",
7753
+ "4bit"
7754
+ ],
7755
+ "model_id": "mlx-community/DeepSeek-V3-{quantization}"
7756
+ }
7757
+ ],
7758
+ "chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='', is_first_sp=true) %}{%- for message in messages %}{%- if message['role'] == 'system' %}{%- if ns.is_first_sp %}{% set ns.system_prompt = ns.system_prompt + message['content'] %}{% set ns.is_first_sp = false %}{%- else %}{% set ns.system_prompt = ns.system_prompt + '\\n\\n' + message['content'] %}{%- endif %}{%- endif %}{%- endfor %}{{bos_token}}{{ns.system_prompt}}{%- for message in messages %}{%- if message['role'] == 'user' %}{%- set ns.is_tool = false -%}{{'<|User|>' + message['content']}}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is none %}{%- set ns.is_tool = false -%}{%- for tool in message['tool_calls']%}{%- if not ns.is_first %}{{'<|Assistant|><|tool▁calls▁begin|><|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<|tool▁call▁end|>'}}{%- set ns.is_first = true -%}{%- else %}{{'\\n' + '<|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<|tool▁call▁end|>'}}{{'<|tool▁calls▁end|><|end▁of▁sentence|>'}}{%- endif %}{%- endfor %}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is not none %}{%- if ns.is_tool %}{{'<|tool▁outputs▁end|>' + message['content'] + '<|end▁of▁sentence|>'}}{%- set ns.is_tool = false -%}{%- else %}{{'<|Assistant|>' + message['content'] + '<|end▁of▁sentence|>'}}{%- endif %}{%- endif %}{%- if message['role'] == 'tool' %}{%- set ns.is_tool = true -%}{%- if ns.is_output_first %}{{'<|tool▁outputs▁begin|><|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- set ns.is_output_first = false %}{%- else %}{{'\\n<|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- endif %}{%- endif %}{%- endfor -%}{% if ns.is_tool %}{{'<|tool▁outputs▁end|>'}}{% endif %}{% if add_generation_prompt and not ns.is_tool %}{{'<|Assistant|>'}}{% endif %}",
7759
+ "stop_token_ids": [
7760
+ 1
7761
+ ],
7762
+ "stop": [
7763
+ "<|end▁of▁sentence|>"
7764
+ ]
7765
+ },
7766
+ {
7767
+ "version": 1,
7768
+ "context_length": 163840,
7769
+ "model_name": "deepseek-r1",
7770
+ "model_lang": [
7771
+ "en",
7772
+ "zh"
7773
+ ],
7774
+ "model_ability": [
7775
+ "chat",
7776
+ "reasoning"
7777
+ ],
7778
+ "model_description": "DeepSeek-R1, which incorporates cold-start data before RL. DeepSeek-R1 achieves performance comparable to OpenAI-o1 across math, code, and reasoning tasks.",
7779
+ "model_specs": [
7780
+ {
7781
+ "model_format": "pytorch",
7782
+ "model_size_in_billions": 671,
7783
+ "quantizations": [
7784
+ "4-bit",
7785
+ "8-bit",
7786
+ "none"
7787
+ ],
7788
+ "model_id": "deepseek-ai/DeepSeek-R1",
7789
+ "model_revision": "8a58a132790c9935686eb97f042afa8013451c9f"
7790
+ },
7791
+ {
7792
+ "model_format": "awq",
7793
+ "model_size_in_billions": 671,
7794
+ "quantizations": [
7795
+ "Int4"
7796
+ ],
7797
+ "model_id": "cognitivecomputations/DeepSeek-R1-AWQ"
7798
+ },
7799
+ {
7800
+ "model_format": "ggufv2",
7801
+ "model_size_in_billions": 671,
7802
+ "quantizations": [
7803
+ "UD-IQ1_S",
7804
+ "UD-IQ1_M",
7805
+ "UD-IQ2_XXS",
7806
+ "UD-Q2_K_XL",
7807
+ "Q2_K",
7808
+ "Q2_K_L",
7809
+ "Q2_K_XS",
7810
+ "Q3_K_M",
7811
+ "Q4_K_M",
7812
+ "Q5_K_M",
7813
+ "Q6_K",
7814
+ "Q8_0",
7815
+ "BF16"
7816
+ ],
7817
+ "model_id": "unsloth/DeepSeek-R1-GGUF",
7818
+ "model_file_name_template": "DeepSeek-R1-{quantization}/DeepSeek-R1-{quantization}.gguf",
7819
+ "model_file_name_split_template": "DeepSeek-R1-{quantization}/DeepSeek-R1-{quantization}-{part}.gguf",
7820
+ "quantization_parts": {
7821
+ "UD-IQ1_S": [
7822
+ "00001-of-00003",
7823
+ "00002-of-00003",
7824
+ "00003-of-00003"
7825
+ ],
7826
+ "UD-IQ1_M": [
7827
+ "00001-of-00004",
7828
+ "00002-of-00004",
7829
+ "00003-of-00004",
7830
+ "00004-of-00004"
7831
+ ],
7832
+ "UD-IQ2_XXS": [
7833
+ "00001-of-00004",
7834
+ "00002-of-00004",
7835
+ "00003-of-00004",
7836
+ "00004-of-00004"
7837
+ ],
7838
+ "UD-Q2_K_XL": [
7839
+ "00001-of-00005",
7840
+ "00002-of-00005",
7841
+ "00003-of-00005",
7842
+ "00004-of-00005",
7843
+ "00005-of-00005"
7844
+ ],
7845
+ "Q2_K": [
7846
+ "00001-of-00005",
7847
+ "00002-of-00005",
7848
+ "00003-of-00005",
7849
+ "00004-of-00005",
7850
+ "00005-of-00005"
7851
+ ],
7852
+ "Q2_K_L": [
7853
+ "00001-of-00005",
7854
+ "00002-of-00005",
7855
+ "00003-of-00005",
7856
+ "00004-of-00005",
7857
+ "00005-of-00005"
7858
+ ],
7859
+ "Q2_K_XS": [
7860
+ "00001-of-00005",
7861
+ "00002-of-00005",
7862
+ "00003-of-00005",
7863
+ "00004-of-00005",
7864
+ "00005-of-00005"
7865
+ ],
7866
+ "Q3_K_M": [
7867
+ "00001-of-00007",
7868
+ "00002-of-00007",
7869
+ "00003-of-00007",
7870
+ "00004-of-00007",
7871
+ "00005-of-00007",
7872
+ "00006-of-00007",
7873
+ "00007-of-00007"
7874
+ ],
7875
+ "Q4_K_M": [
7876
+ "00001-of-00009",
7877
+ "00002-of-00009",
7878
+ "00003-of-00009",
7879
+ "00004-of-00009",
7880
+ "00005-of-00009",
7881
+ "00006-of-00009",
7882
+ "00007-of-00009",
7883
+ "00008-of-00009",
7884
+ "00009-of-00009"
7885
+ ],
7886
+ "Q5_K_M": [
7887
+ "00001-of-00010",
7888
+ "00002-of-00010",
7889
+ "00003-of-00010",
7890
+ "00004-of-00010",
7891
+ "00005-of-00010",
7892
+ "00006-of-00010",
7893
+ "00007-of-00010",
7894
+ "00008-of-00010",
7895
+ "00009-of-00010",
7896
+ "00010-of-00010"
7897
+ ],
7898
+ "Q6_K": [
7899
+ "00001-of-00012",
7900
+ "00002-of-00012",
7901
+ "00003-of-00012",
7902
+ "00004-of-00012",
7903
+ "00005-of-00012",
7904
+ "00006-of-00012",
7905
+ "00007-of-00012",
7906
+ "00008-of-00012",
7907
+ "00009-of-00012",
7908
+ "00010-of-00012",
7909
+ "00011-of-00012",
7910
+ "00012-of-00012"
7911
+ ],
7912
+ "Q8_0": [
7913
+ "00001-of-00015",
7914
+ "00002-of-00015",
7915
+ "00003-of-00015",
7916
+ "00004-of-00015",
7917
+ "00005-of-00015",
7918
+ "00006-of-00015",
7919
+ "00007-of-00015",
7920
+ "00008-of-00015",
7921
+ "00009-of-00015",
7922
+ "00010-of-00015",
7923
+ "00011-of-00015",
7924
+ "00012-of-00015",
7925
+ "00013-of-00015",
7926
+ "00014-of-00015",
7927
+ "00015-of-00015"
7928
+ ],
7929
+ "BF16": [
7930
+ "00001-of-00030",
7931
+ "00002-of-00030",
7932
+ "00003-of-00030",
7933
+ "00004-of-00030",
7934
+ "00005-of-00030",
7935
+ "00006-of-00030",
7936
+ "00007-of-00030",
7937
+ "00008-of-00030",
7938
+ "00009-of-00030",
7939
+ "00010-of-00030",
7940
+ "00011-of-00030",
7941
+ "00012-of-00030",
7942
+ "00013-of-00030",
7943
+ "00014-of-00030",
7944
+ "00015-of-00030",
7945
+ "00016-of-00030",
7946
+ "00017-of-00030",
7947
+ "00018-of-00030",
7948
+ "00019-of-00030",
7949
+ "00020-of-00030",
7950
+ "00021-of-00030",
7951
+ "00022-of-00030",
7952
+ "00023-of-00030",
7953
+ "00024-of-00030",
7954
+ "00025-of-00030",
7955
+ "00026-of-00030",
7956
+ "00027-of-00030",
7957
+ "00028-of-00030",
7958
+ "00029-of-00030",
7959
+ "00030-of-00030"
7960
+ ]
7961
+ }
7962
+ },
7963
+ {
7964
+ "model_format": "mlx",
7965
+ "model_size_in_billions": 671,
7966
+ "quantizations": [
7967
+ "2bit",
7968
+ "3bit",
7969
+ "4bit"
7970
+ ],
7971
+ "model_id": "mlx-community/DeepSeek-R1-{quantization}"
7972
+ }
7973
+ ],
7974
+ "chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='') %}{%- for message in messages %}{%- if message['role'] == 'system' %}{% set ns.system_prompt = message['content'] %}{%- endif %}{%- endfor %}{{bos_token}}{{ns.system_prompt}}{%- for message in messages %}{%- if message['role'] == 'user' %}{%- set ns.is_tool = false -%}{{'<|User|>' + message['content']}}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is none %}{%- set ns.is_tool = false -%}{%- for tool in message['tool_calls']%}{%- if not ns.is_first %}{{'<|Assistant|><|tool▁calls▁begin|><|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<|tool▁call▁end|>'}}{%- set ns.is_first = true -%}{%- else %}{{'\\n' + '<|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<|tool▁call▁end|>'}}{{'<|tool▁calls▁end|><|end▁of▁sentence|>'}}{%- endif %}{%- endfor %}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is not none %}{%- if ns.is_tool %}{{'<|tool▁outputs▁end|>' + message['content'] + '<|end▁of▁sentence|>'}}{%- set ns.is_tool = false -%}{%- else %}{% set content = message['content'] %}{% if '</think>' in content %}{% set content = content.split('</think>')[-1] %}{% endif %}{{'<|Assistant|>' + content + '<|end▁of▁sentence|>'}}{%- endif %}{%- endif %}{%- if message['role'] == 'tool' %}{%- set ns.is_tool = true -%}{%- if ns.is_output_first %}{{'<|tool▁outputs▁begin|><|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- set ns.is_output_first = false %}{%- else %}{{'\\n<|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- endif %}{%- endif %}{%- endfor -%}{% if ns.is_tool %}{{'<|tool▁outputs▁end|>'}}{% endif %}{% if add_generation_prompt and not ns.is_tool %}{{'<|Assistant|><think>\\n'}}{% endif %}",
7975
+ "stop_token_ids": [
7976
+ 1
7977
+ ],
7978
+ "stop": [
7979
+ "<|end▁of▁sentence|>"
7980
+ ],
7981
+ "reasoning_start_tag": "<think>",
7982
+ "reasoning_end_tag": "</think>"
7983
+ },
7475
7984
  {
7476
7985
  "version": 1,
7477
7986
  "context_length": 131072,
@@ -8810,7 +9319,8 @@
8810
9319
  "zh"
8811
9320
  ],
8812
9321
  "model_ability": [
8813
- "chat"
9322
+ "chat",
9323
+ "reasoning"
8814
9324
  ],
8815
9325
  "model_description": "deepseek-r1-distill-qwen is distilled from DeepSeek-R1 based on Qwen",
8816
9326
  "model_specs": [
@@ -9014,13 +9524,15 @@
9014
9524
  "model_id": "mlx-community/DeepSeek-R1-Distill-Qwen-32B-{quantization}"
9015
9525
  }
9016
9526
  ],
9017
- "chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='') %}{%- for message in messages %}{%- if message['role'] == 'system' %}{% set ns.system_prompt = message['content'] %}{%- endif %}{%- endfor %}{{bos_token}}{{ns.system_prompt}}{%- for message in messages %}{%- if message['role'] == 'user' %}{%- set ns.is_tool = false -%}{{'<|User|>' + message['content']}}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is none %}{%- set ns.is_tool = false -%}{%- for tool in message['tool_calls']%}{%- if not ns.is_first %}{{'<|Assistant|><|tool▁calls▁begin|><|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<|tool▁call▁end|>'}}{%- set ns.is_first = true -%}{%- else %}{{'\\n' + '<|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<|tool▁call▁end|>'}}{{'<|tool▁calls▁end|><|end▁of▁sentence|>'}}{%- endif %}{%- endfor %}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is not none %}{%- if ns.is_tool %}{{'<|tool▁outputs▁end|>' + message['content'] + '<|end▁of▁sentence|>'}}{%- set ns.is_tool = false -%}{%- else %}{% set content = message['content'] %}{% if '</think>' in content %}{% set content = content.split('</think>')[-1] %}{% endif %}{{'<|Assistant|>' + content + '<|end▁of▁sentence|>'}}{%- endif %}{%- endif %}{%- if message['role'] == 'tool' %}{%- set ns.is_tool = true -%}{%- if ns.is_output_first %}{{'<|tool▁outputs▁begin|><|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- set ns.is_output_first = false %}{%- else %}{{'\\n<|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- endif %}{%- endif %}{%- endfor -%}{% if ns.is_tool %}{{'<|tool▁outputs▁end|>'}}{% endif %}{% if add_generation_prompt and not ns.is_tool %}{{'<|Assistant|>'}}{% endif %}",
9527
+ "chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='', is_first_sp=true) %}{%- for message in messages %}{%- if message['role'] == 'system' %}{%- if ns.is_first_sp %}{% set ns.system_prompt = ns.system_prompt + message['content'] %}{% set ns.is_first_sp = false %}{%- else %}{% set ns.system_prompt = ns.system_prompt + '\\n\\n' + message['content'] %}{%- endif %}{%- endif %}{%- endfor %}{{ bos_token }}{{ ns.system_prompt }}{%- for message in messages %}{%- if message['role'] == 'user' %}{%- set ns.is_tool = false -%}{{'<|User|>' + message['content']}}{%- endif %}{%- if message['role'] == 'assistant' and 'tool_calls' in message %}{%- set ns.is_tool = false -%}{%- for tool in message['tool_calls'] %}{%- if not ns.is_first %}{%- if message['content'] is none %}{{'<|Assistant|><|tool▁calls▁begin|><|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<|tool▁call▁end|>'}}{%- else %}{{'<|Assistant|>' + message['content'] + '<|tool▁calls▁begin|><|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<|tool▁call▁end|>'}}{%- endif %}{%- set ns.is_first = true -%}{%- else %}{{'\\n' + '<|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<|tool▁call▁end|>'}}{%- endif %}{%- endfor %}{{'<|tool▁calls▁end|><|end▁of▁sentence|>'}}{%- endif %}{%- if message['role'] == 'assistant' and 'tool_calls' not in message %}{%- if ns.is_tool %}{{'<|tool▁outputs▁end|>' + message['content'] + '<|end▁of▁sentence|>'}}{%- set ns.is_tool = false -%}{%- else %}{% set content = message['content'] %}{% if '</think>' in content %}{% set content = content.split('</think>')[-1] %}{% endif %}{{'<|Assistant|>' + content + '<|end▁of▁sentence|>'}}{%- endif %}{%- endif %}{%- if message['role'] == 'tool' %}{%- set ns.is_tool = true -%}{%- if ns.is_output_first %}{{'<|tool▁outputs▁begin|><|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- set ns.is_output_first = false %}{%- else %}{{'<|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- endif %}{%- endif %}{%- endfor -%}{% if ns.is_tool %}{{'<|tool▁outputs▁end|>'}}{% endif %}{% if add_generation_prompt and not ns.is_tool %}{{'<|Assistant|><think>\\n'}}{% endif %}",
9018
9528
  "stop_token_ids": [
9019
9529
  151643
9020
9530
  ],
9021
9531
  "stop": [
9022
9532
  "<|end▁of▁sentence|>"
9023
- ]
9533
+ ],
9534
+ "reasoning_start_tag": "<think>",
9535
+ "reasoning_end_tag": "</think>"
9024
9536
  },
9025
9537
  {
9026
9538
  "version": 1,
@@ -9031,7 +9543,8 @@
9031
9543
  "zh"
9032
9544
  ],
9033
9545
  "model_ability": [
9034
- "chat"
9546
+ "chat",
9547
+ "reasoning"
9035
9548
  ],
9036
9549
  "model_description": "deepseek-r1-distill-llama is distilled from DeepSeek-R1 based on Llama",
9037
9550
  "model_specs": [
@@ -9159,13 +9672,15 @@
9159
9672
  "model_id": "mlx-community/DeepSeek-R1-Distill-Llama-70B-{quantization}"
9160
9673
  }
9161
9674
  ],
9162
- "chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='') %}{%- for message in messages %}{%- if message['role'] == 'system' %}{% set ns.system_prompt = message['content'] %}{%- endif %}{%- endfor %}{{bos_token}}{{ns.system_prompt}}{%- for message in messages %}{%- if message['role'] == 'user' %}{%- set ns.is_tool = false -%}{{'<|User|>' + message['content']}}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is none %}{%- set ns.is_tool = false -%}{%- for tool in message['tool_calls']%}{%- if not ns.is_first %}{{'<|Assistant|><|tool▁calls▁begin|><|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<|tool▁call▁end|>'}}{%- set ns.is_first = true -%}{%- else %}{{'\\n' + '<|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<|tool▁call▁end|>'}}{{'<|tool▁calls▁end|><|end▁of▁sentence|>'}}{%- endif %}{%- endfor %}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is not none %}{%- if ns.is_tool %}{{'<|tool▁outputs▁end|>' + message['content'] + '<|end▁of▁sentence|>'}}{%- set ns.is_tool = false -%}{%- else %}{% set content = message['content'] %}{% if '</think>' in content %}{% set content = content.split('</think>')[-1] %}{% endif %}{{'<|Assistant|>' + content + '<|end▁of▁sentence|>'}}{%- endif %}{%- endif %}{%- if message['role'] == 'tool' %}{%- set ns.is_tool = true -%}{%- if ns.is_output_first %}{{'<|tool▁outputs▁begin|><|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- set ns.is_output_first = false %}{%- else %}{{'\\n<|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- endif %}{%- endif %}{%- endfor -%}{% if ns.is_tool %}{{'<|tool▁outputs▁end|>'}}{% endif %}{% if add_generation_prompt and not ns.is_tool %}{{'<|Assistant|>'}}{% endif %}",
9675
+ "chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='') %}{%- for message in messages %}{%- if message['role'] == 'system' %}{% set ns.system_prompt = message['content'] %}{%- endif %}{%- endfor %}{{bos_token}}{{ns.system_prompt}}{%- for message in messages %}{%- if message['role'] == 'user' %}{%- set ns.is_tool = false -%}{{'<|User|>' + message['content']}}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is none %}{%- set ns.is_tool = false -%}{%- for tool in message['tool_calls']%}{%- if not ns.is_first %}{{'<|Assistant|><|tool▁calls▁begin|><|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<|tool▁call▁end|>'}}{%- set ns.is_first = true -%}{%- else %}{{'\\n' + '<|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<|tool▁call▁end|>'}}{{'<|tool▁calls▁end|><|end▁of▁sentence|>'}}{%- endif %}{%- endfor %}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is not none %}{%- if ns.is_tool %}{{'<|tool▁outputs▁end|>' + message['content'] + '<|end▁of▁sentence|>'}}{%- set ns.is_tool = false -%}{%- else %}{% set content = message['content'] %}{% if '</think>' in content %}{% set content = content.split('</think>')[-1] %}{% endif %}{{'<|Assistant|>' + content + '<|end▁of▁sentence|>'}}{%- endif %}{%- endif %}{%- if message['role'] == 'tool' %}{%- set ns.is_tool = true -%}{%- if ns.is_output_first %}{{'<|tool▁outputs▁begin|><|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- set ns.is_output_first = false %}{%- else %}{{'\\n<|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- endif %}{%- endif %}{%- endfor -%}{% if ns.is_tool %}{{'<|tool▁outputs▁end|>'}}{% endif %}{% if add_generation_prompt and not ns.is_tool %}{{'<|Assistant|><think>\\n'}}{% endif %}",
9163
9676
  "stop_token_ids": [
9164
9677
  151643
9165
9678
  ],
9166
9679
  "stop": [
9167
9680
  "<|end▁of▁sentence|>"
9168
- ]
9681
+ ],
9682
+ "reasoning_start_tag": "<think>",
9683
+ "reasoning_end_tag": "</think>"
9169
9684
  },
9170
9685
  {
9171
9686
  "version": 1,
@@ -134,7 +134,7 @@ class LLMFamilyV1(BaseModel):
134
134
  model_name: str
135
135
  model_lang: List[str]
136
136
  model_ability: List[
137
- Literal["embed", "generate", "chat", "tools", "vision", "audio"]
137
+ Literal["embed", "generate", "chat", "tools", "vision", "audio", "reasoning"]
138
138
  ]
139
139
  model_description: Optional[str]
140
140
  # reason for not required str here: legacy registration
@@ -143,6 +143,8 @@ class LLMFamilyV1(BaseModel):
143
143
  chat_template: Optional[str]
144
144
  stop_token_ids: Optional[List[int]]
145
145
  stop: Optional[List[str]]
146
+ reasoning_start_tag: Optional[str]
147
+ reasoning_end_tag: Optional[str]
146
148
 
147
149
 
148
150
  class CustomLLMFamilyV1(LLMFamilyV1):