xinference 1.9.0__py3-none-any.whl → 1.10.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of xinference might be problematic. Click here for more details.

Files changed (92) hide show
  1. xinference/_version.py +3 -3
  2. xinference/api/restful_api.py +415 -1
  3. xinference/constants.py +2 -0
  4. xinference/core/model.py +3 -4
  5. xinference/core/supervisor.py +29 -1
  6. xinference/core/worker.py +4 -1
  7. xinference/deploy/cmdline.py +2 -0
  8. xinference/deploy/test/test_cmdline.py +1 -1
  9. xinference/model/audio/core.py +5 -0
  10. xinference/model/audio/cosyvoice.py +0 -1
  11. xinference/model/audio/kokoro.py +1 -1
  12. xinference/model/audio/kokoro_zh.py +124 -0
  13. xinference/model/audio/model_spec.json +64 -20
  14. xinference/model/embedding/flag/core.py +5 -0
  15. xinference/model/embedding/llama_cpp/core.py +22 -19
  16. xinference/model/embedding/sentence_transformers/core.py +19 -4
  17. xinference/model/embedding/vllm/core.py +40 -8
  18. xinference/model/image/cache_manager.py +56 -0
  19. xinference/model/image/core.py +9 -0
  20. xinference/model/image/model_spec.json +116 -9
  21. xinference/model/image/stable_diffusion/core.py +141 -31
  22. xinference/model/llm/core.py +10 -0
  23. xinference/model/llm/llama_cpp/core.py +42 -40
  24. xinference/model/llm/llm_family.json +435 -23
  25. xinference/model/llm/llm_family.py +1 -0
  26. xinference/model/llm/mlx/core.py +52 -33
  27. xinference/model/llm/sglang/core.py +2 -44
  28. xinference/model/llm/tool_parsers/__init__.py +58 -0
  29. xinference/model/llm/tool_parsers/abstract_tool_parser.py +33 -0
  30. xinference/model/llm/tool_parsers/deepseek_r1_tool_parser.py +128 -0
  31. xinference/model/llm/tool_parsers/deepseek_v3_tool_parser.py +145 -0
  32. xinference/model/llm/tool_parsers/glm4_tool_parser.py +123 -0
  33. xinference/model/llm/tool_parsers/llama3_tool_parser.py +77 -0
  34. xinference/model/llm/tool_parsers/qwen_tool_parser.py +320 -0
  35. xinference/model/llm/transformers/core.py +6 -12
  36. xinference/model/llm/utils.py +128 -46
  37. xinference/model/llm/vllm/core.py +8 -61
  38. xinference/model/rerank/core.py +3 -0
  39. xinference/model/rerank/sentence_transformers/core.py +1 -1
  40. xinference/model/rerank/vllm/core.py +56 -6
  41. xinference/model/utils.py +1 -2
  42. xinference/model/video/model_spec.json +95 -1
  43. xinference/thirdparty/cosyvoice/bin/export_jit.py +3 -4
  44. xinference/thirdparty/cosyvoice/bin/export_onnx.py +49 -126
  45. xinference/thirdparty/cosyvoice/bin/{inference.py → inference_deprecated.py} +1 -0
  46. xinference/thirdparty/cosyvoice/bin/train.py +23 -3
  47. xinference/thirdparty/cosyvoice/cli/cosyvoice.py +8 -4
  48. xinference/thirdparty/cosyvoice/cli/frontend.py +4 -4
  49. xinference/thirdparty/cosyvoice/cli/model.py +53 -75
  50. xinference/thirdparty/cosyvoice/dataset/dataset.py +5 -18
  51. xinference/thirdparty/cosyvoice/dataset/processor.py +24 -25
  52. xinference/thirdparty/cosyvoice/flow/decoder.py +24 -433
  53. xinference/thirdparty/cosyvoice/flow/flow.py +6 -14
  54. xinference/thirdparty/cosyvoice/flow/flow_matching.py +33 -145
  55. xinference/thirdparty/cosyvoice/hifigan/generator.py +169 -1
  56. xinference/thirdparty/cosyvoice/llm/llm.py +108 -17
  57. xinference/thirdparty/cosyvoice/transformer/upsample_encoder.py +14 -115
  58. xinference/thirdparty/cosyvoice/utils/common.py +20 -0
  59. xinference/thirdparty/cosyvoice/utils/executor.py +8 -4
  60. xinference/thirdparty/cosyvoice/utils/file_utils.py +45 -1
  61. xinference/thirdparty/cosyvoice/utils/losses.py +37 -0
  62. xinference/thirdparty/cosyvoice/utils/mask.py +35 -1
  63. xinference/thirdparty/cosyvoice/utils/train_utils.py +24 -6
  64. xinference/thirdparty/cosyvoice/vllm/cosyvoice2.py +103 -0
  65. xinference/types.py +105 -2
  66. xinference/ui/gradio/chat_interface.py +2 -0
  67. xinference/ui/gradio/media_interface.py +353 -7
  68. xinference/ui/web/ui/build/asset-manifest.json +3 -3
  69. xinference/ui/web/ui/build/index.html +1 -1
  70. xinference/ui/web/ui/build/static/js/main.1086c759.js +3 -0
  71. xinference/ui/web/ui/build/static/js/main.1086c759.js.map +1 -0
  72. xinference/ui/web/ui/node_modules/.cache/babel-loader/3c5758bd12fa334294b1de0ff6b1a4bac8d963c45472eab9dc3e530d82aa6b3f.json +1 -0
  73. xinference/ui/web/ui/node_modules/.cache/babel-loader/a3eb18af328280b139693c9092dff2a0ef8c9a967e6c8956ceee0996611f1984.json +1 -0
  74. xinference/ui/web/ui/node_modules/.cache/babel-loader/d5c224be7081f18cba1678b7874a9782eba895df004874ff8f243f94ba79942a.json +1 -0
  75. xinference/ui/web/ui/node_modules/.cache/babel-loader/f7f18bfb539b036a6a342176dd98a85df5057a884a8da978d679f2a0264883d0.json +1 -0
  76. xinference/ui/web/ui/src/locales/en.json +2 -0
  77. xinference/ui/web/ui/src/locales/ja.json +2 -0
  78. xinference/ui/web/ui/src/locales/ko.json +2 -0
  79. xinference/ui/web/ui/src/locales/zh.json +2 -0
  80. {xinference-1.9.0.dist-info → xinference-1.10.0.dist-info}/METADATA +16 -12
  81. {xinference-1.9.0.dist-info → xinference-1.10.0.dist-info}/RECORD +86 -77
  82. xinference/ui/web/ui/build/static/js/main.4918643a.js +0 -3
  83. xinference/ui/web/ui/build/static/js/main.4918643a.js.map +0 -1
  84. xinference/ui/web/ui/node_modules/.cache/babel-loader/3d2a89f0eccc1f90fc5036c9a1d587c2120e6a6b128aae31d1db7d6bad52722b.json +0 -1
  85. xinference/ui/web/ui/node_modules/.cache/babel-loader/89179f8f51887b9167721860a12412549ff04f78162e921a7b6aa6532646deb2.json +0 -1
  86. xinference/ui/web/ui/node_modules/.cache/babel-loader/8e5cb82c2ff3299c6a44563fe6b1c5515c9750613c51bb63abee0b1d70fc5019.json +0 -1
  87. xinference/ui/web/ui/node_modules/.cache/babel-loader/9dc5cfc67dd0617b0272aeef8651f1589b2155a4ff1fd72ad3166b217089b619.json +0 -1
  88. /xinference/ui/web/ui/build/static/js/{main.4918643a.js.LICENSE.txt → main.1086c759.js.LICENSE.txt} +0 -0
  89. {xinference-1.9.0.dist-info → xinference-1.10.0.dist-info}/WHEEL +0 -0
  90. {xinference-1.9.0.dist-info → xinference-1.10.0.dist-info}/entry_points.txt +0 -0
  91. {xinference-1.9.0.dist-info → xinference-1.10.0.dist-info}/licenses/LICENSE +0 -0
  92. {xinference-1.9.0.dist-info → xinference-1.10.0.dist-info}/top_level.txt +0 -0
@@ -1008,7 +1008,8 @@
1008
1008
  "<|endoftext|>",
1009
1009
  "<|im_start|>",
1010
1010
  "<|im_end|>"
1011
- ]
1011
+ ],
1012
+ "tool_parser":"qwen"
1012
1013
  },
1013
1014
  {
1014
1015
  "version": 2,
@@ -1070,7 +1071,8 @@
1070
1071
  "<|end_of_text|>",
1071
1072
  "<|eot_id|>",
1072
1073
  "<|eom_id|>"
1073
- ]
1074
+ ],
1075
+ "tool_parser": "llama3"
1074
1076
  },
1075
1077
  {
1076
1078
  "version": 2,
@@ -1133,7 +1135,8 @@
1133
1135
  "<|endoftext|>",
1134
1136
  "<|im_start|>",
1135
1137
  "<|im_end|>"
1136
- ]
1138
+ ],
1139
+ "tool_parser":"qwen"
1137
1140
  },
1138
1141
  {
1139
1142
  "version": 2,
@@ -1946,7 +1949,8 @@
1946
1949
  "<|im_end|>"
1947
1950
  ],
1948
1951
  "reasoning_start_tag": "<think>",
1949
- "reasoning_end_tag": "</think>"
1952
+ "reasoning_end_tag": "</think>",
1953
+ "tool_parser":"qwen"
1950
1954
  },
1951
1955
  {
1952
1956
  "version": 2,
@@ -2209,7 +2213,8 @@
2209
2213
  "<|endoftext|>",
2210
2214
  "<|im_start|>",
2211
2215
  "<|im_end|>"
2212
- ]
2216
+ ],
2217
+ "tool_parser":"qwen"
2213
2218
  },
2214
2219
  {
2215
2220
  "version": 2,
@@ -4767,6 +4772,7 @@
4767
4772
  {
4768
4773
  "model_format": "pytorch",
4769
4774
  "model_size_in_billions": 671,
4775
+ "activated_size_in_billions": 37,
4770
4776
  "model_src": {
4771
4777
  "huggingface": {
4772
4778
  "quantizations": [
@@ -4846,6 +4852,7 @@
4846
4852
  {
4847
4853
  "model_format": "pytorch",
4848
4854
  "model_size_in_billions": 671,
4855
+ "activated_size_in_billions": 37,
4849
4856
  "model_src": {
4850
4857
  "huggingface": {
4851
4858
  "quantizations": [
@@ -4866,6 +4873,7 @@
4866
4873
  {
4867
4874
  "model_format": "awq",
4868
4875
  "model_size_in_billions": 671,
4876
+ "activated_size_in_billions": 37,
4869
4877
  "model_src": {
4870
4878
  "huggingface": {
4871
4879
  "quantizations": [
@@ -4885,6 +4893,7 @@
4885
4893
  {
4886
4894
  "model_format": "ggufv2",
4887
4895
  "model_size_in_billions": 671,
4896
+ "activated_size_in_billions": 37,
4888
4897
  "model_src": {
4889
4898
  "huggingface": {
4890
4899
  "quantizations": [
@@ -5215,6 +5224,7 @@
5215
5224
  {
5216
5225
  "model_format": "mlx",
5217
5226
  "model_size_in_billions": 671,
5227
+ "activated_size_in_billions": 37,
5218
5228
  "model_src": {
5219
5229
  "huggingface": {
5220
5230
  "quantizations": [
@@ -5263,6 +5273,7 @@
5263
5273
  {
5264
5274
  "model_format": "pytorch",
5265
5275
  "model_size_in_billions": 671,
5276
+ "activated_size_in_billions": 37,
5266
5277
  "model_src": {
5267
5278
  "huggingface": {
5268
5279
  "quantizations": [
@@ -5281,6 +5292,7 @@
5281
5292
  {
5282
5293
  "model_format": "gptq",
5283
5294
  "model_size_in_billions": 671,
5295
+ "activated_size_in_billions": 37,
5284
5296
  "model_src": {
5285
5297
  "huggingface": {
5286
5298
  "quantizations": [
@@ -5311,6 +5323,116 @@
5311
5323
  "reasoning_start_tag": "<think>",
5312
5324
  "reasoning_end_tag": "</think>"
5313
5325
  },
5326
+ {
5327
+ "version": 2,
5328
+ "context_length": 131072,
5329
+ "model_name": "Deepseek-V3.1",
5330
+ "model_lang": [
5331
+ "en",
5332
+ "zh"
5333
+ ],
5334
+ "model_ability": [
5335
+ "chat",
5336
+ "reasoning",
5337
+ "hybrid",
5338
+ "tools"
5339
+ ],
5340
+ "model_description": "DeepSeek-V3.1 is a hybrid model that supports both thinking mode and non-thinking mode.",
5341
+ "model_specs": [
5342
+ {
5343
+ "model_format": "pytorch",
5344
+ "model_size_in_billions": 671,
5345
+ "activated_size_in_billions": 37,
5346
+ "model_src": {
5347
+ "huggingface": {
5348
+ "quantizations": [
5349
+ "none"
5350
+ ],
5351
+ "model_id": "deepseek-ai/DeepSeek-V3.1"
5352
+ },
5353
+ "modelscope": {
5354
+ "quantizations": [
5355
+ "none"
5356
+ ],
5357
+ "model_id": "deepseek-ai/DeepSeek-V3.1"
5358
+ }
5359
+ }
5360
+ },
5361
+ {
5362
+ "model_format": "gptq",
5363
+ "model_size_in_billions": 671,
5364
+ "activated_size_in_billions": 37,
5365
+ "model_src": {
5366
+ "huggingface": {
5367
+ "quantizations": [
5368
+ "Int4"
5369
+ ],
5370
+ "model_id": "cpatonn/DeepSeek-V3.1-GPTQ-4bit"
5371
+ },
5372
+ "modelscope": {
5373
+ "quantizations": [
5374
+ "Int4"
5375
+ ],
5376
+ "model_id": "cpatonn/DeepSeek-V3.1-GPTQ-4bit"
5377
+ }
5378
+ }
5379
+ },
5380
+ {
5381
+ "model_format": "awq",
5382
+ "model_size_in_billions": 671,
5383
+ "activated_size_in_billions": 37,
5384
+ "model_src": {
5385
+ "huggingface": {
5386
+ "quantizations": [
5387
+ "Int4"
5388
+ ],
5389
+ "model_id": "QuantTrio/DeepSeek-V3.1-AWQ"
5390
+ },
5391
+ "modelscope": {
5392
+ "quantizations": [
5393
+ "Int4"
5394
+ ],
5395
+ "model_id": "tclf90/DeepSeek-V3.1-AWQ"
5396
+ }
5397
+ }
5398
+ },
5399
+ {
5400
+ "model_format": "mlx",
5401
+ "model_size_in_billions": 671,
5402
+ "activated_size_in_billions": 37,
5403
+ "model_src": {
5404
+ "huggingface": {
5405
+ "quantizations": [
5406
+ "8bit",
5407
+ "4bit"
5408
+ ],
5409
+ "model_id": "mlx-community/DeepSeek-V3.1-{quantization}"
5410
+ },
5411
+ "modelscope": {
5412
+ "quantizations": [
5413
+ "8bit",
5414
+ "4bit"
5415
+ ],
5416
+ "model_id": "mlx-community/DeepSeek-V3.1-{quantization}"
5417
+ }
5418
+ }
5419
+ }
5420
+ ],
5421
+ "chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% if not thinking is defined %}{% set thinking = false %}{% endif %}{% set ns = namespace(is_first=false, is_tool=false, system_prompt='', is_first_sp=true, is_last_user=false) %}{%- for message in messages %}{%- if message['role'] == 'system' %}{%- if ns.is_first_sp %}{% set ns.system_prompt = ns.system_prompt + message['content'] %}{% set ns.is_first_sp = false %}{%- else %}{% set ns.system_prompt = ns.system_prompt + '\n\n' + message['content'] %}{%- endif %}{%- endif %}{%- endfor %}{{ bos_token }}{{ ns.system_prompt }}{%- for message in messages %}{%- if message['role'] == 'user' %}{%- set ns.is_tool = false -%}{%- set ns.is_first = false -%}{%- set ns.is_last_user = true -%}{{'<|User|>' + message['content']}}{%- endif %}{%- if message['role'] == 'assistant' and message['tool_calls'] is defined and message['tool_calls'] is not none %}{%- if ns.is_last_user %}{{'<|Assistant|></think>'}}{%- endif %}{%- set ns.is_last_user = false -%}{%- set ns.is_first = false %}{%- set ns.is_tool = false -%}{%- for tool in message['tool_calls'] %}{%- if not ns.is_first %}{%- if message['content'] is none %}{{'<|tool▁calls▁begin|><|tool▁call▁begin|>'+ tool['function']['name'] + '<|tool▁sep|>' + tool['function']['arguments'] + '<|tool▁call▁end|>'}}{%- else %}{{message['content'] + '<|tool▁calls▁begin|><|tool▁call▁begin|>' + tool['function']['name'] + '<|tool▁sep|>' + tool['function']['arguments'] + '<|tool▁call▁end|>'}}{%- endif %}{%- set ns.is_first = true -%}{%- else %}{{'<|tool▁call▁begin|>'+ tool['function']['name'] + '<|tool▁sep|>' + tool['function']['arguments'] + '<|tool▁call▁end|>'}}{%- endif %}{%- endfor %}{{'<|tool▁calls▁end|><|end▁of▁sentence|>'}}{%- endif %}{%- if message['role'] == 'assistant' and (message['tool_calls'] is not defined or message['tool_calls'] is none) %}{%- if ns.is_last_user %}{{'<|Assistant|>'}}{%- if message['prefix'] is defined and message['prefix'] and thinking %}{{'<think>'}} {%- else %}{{'</think>'}}{%- endif %}{%- endif %}{%- set ns.is_last_user = false -%}{%- if ns.is_tool %}{{message['content'] + '<|end▁of▁sentence|>'}}{%- set ns.is_tool = false -%}{%- else %}{%- set content = message['content'] -%}{%- if '</think>' in content %}{%- set content = content.split('</think>', 1)[1] -%}{%- endif %}{{content + '<|end▁of▁sentence|>'}}{%- endif %}{%- endif %}{%- if message['role'] == 'tool' %}{%- set ns.is_last_user = false -%}{%- set ns.is_tool = true -%}{{'<|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- endif %}{%- endfor -%}{%- if add_generation_prompt and ns.is_last_user and not ns.is_tool %}{{'<|Assistant|>'}}{%- if not thinking %}{{'</think>'}}{%- else %}{{'<think>'}}{%- endif %}{% endif %}",
5422
+ "stop_token_ids": [
5423
+ 1
5424
+ ],
5425
+ "stop": [
5426
+ "<|end▁of▁sentence|>"
5427
+ ],
5428
+ "reasoning_start_tag": "<think>",
5429
+ "reasoning_end_tag": "</think>",
5430
+ "virtualenv": {
5431
+ "packages": [
5432
+ "transformers==4.53.0"
5433
+ ]
5434
+ }
5435
+ },
5314
5436
  {
5315
5437
  "version": 2,
5316
5438
  "context_length": 131072,
@@ -5655,7 +5777,8 @@
5655
5777
  "<|end▁of▁sentence|>"
5656
5778
  ],
5657
5779
  "reasoning_start_tag": "<think>",
5658
- "reasoning_end_tag": "</think>"
5780
+ "reasoning_end_tag": "</think>",
5781
+ "tool_parser": "deepseek_r1"
5659
5782
  },
5660
5783
  {
5661
5784
  "version": 2,
@@ -6242,6 +6365,7 @@
6242
6365
  {
6243
6366
  "model_format": "pytorch",
6244
6367
  "model_size_in_billions": 671,
6368
+ "activated_size_in_billions": 37,
6245
6369
  "model_src": {
6246
6370
  "huggingface": {
6247
6371
  "quantizations": [
@@ -6262,6 +6386,7 @@
6262
6386
  {
6263
6387
  "model_format": "awq",
6264
6388
  "model_size_in_billions": 671,
6389
+ "activated_size_in_billions": 37,
6265
6390
  "model_src": {
6266
6391
  "huggingface": {
6267
6392
  "quantizations": [
@@ -6281,6 +6406,7 @@
6281
6406
  {
6282
6407
  "model_format": "ggufv2",
6283
6408
  "model_size_in_billions": 671,
6409
+ "activated_size_in_billions": 37,
6284
6410
  "model_src": {
6285
6411
  "huggingface": {
6286
6412
  "quantizations": [
@@ -6475,6 +6601,7 @@
6475
6601
  {
6476
6602
  "model_format": "mlx",
6477
6603
  "model_size_in_billions": 671,
6604
+ "activated_size_in_billions": 37,
6478
6605
  "model_src": {
6479
6606
  "huggingface": {
6480
6607
  "quantizations": [
@@ -6499,7 +6626,8 @@
6499
6626
  ],
6500
6627
  "stop": [
6501
6628
  "<|end▁of▁sentence|>"
6502
- ]
6629
+ ],
6630
+ "tool_parser": "deepseek_v3"
6503
6631
  },
6504
6632
  {
6505
6633
  "version": 2,
@@ -6517,6 +6645,7 @@
6517
6645
  {
6518
6646
  "model_format": "pytorch",
6519
6647
  "model_size_in_billions": 671,
6648
+ "activated_size_in_billions": 37,
6520
6649
  "model_src": {
6521
6650
  "huggingface": {
6522
6651
  "quantizations": [
@@ -6535,6 +6664,7 @@
6535
6664
  {
6536
6665
  "model_format": "awq",
6537
6666
  "model_size_in_billions": 671,
6667
+ "activated_size_in_billions": 37,
6538
6668
  "model_src": {
6539
6669
  "huggingface": {
6540
6670
  "quantizations": [
@@ -6553,6 +6683,7 @@
6553
6683
  {
6554
6684
  "model_format": "mlx",
6555
6685
  "model_size_in_billions": 671,
6686
+ "activated_size_in_billions": 37,
6556
6687
  "model_src": {
6557
6688
  "huggingface": {
6558
6689
  "quantizations": [
@@ -7687,7 +7818,7 @@
7687
7818
  "packages": [
7688
7819
  "transformers>=4.51.3",
7689
7820
  "mlx-lm>=0.23.1 ; sys_platform=='darwin'",
7690
- "numpy==1.26.4"
7821
+ "#system_numpy#"
7691
7822
  ]
7692
7823
  }
7693
7824
  },
@@ -7796,7 +7927,8 @@
7796
7927
  "<|endoftext|>",
7797
7928
  "<|user|>",
7798
7929
  "<|observation|>"
7799
- ]
7930
+ ],
7931
+ "tool_parser":"glm4"
7800
7932
  },
7801
7933
  {
7802
7934
  "version": 2,
@@ -7903,7 +8035,8 @@
7903
8035
  "<|endoftext|>",
7904
8036
  "<|user|>",
7905
8037
  "<|observation|>"
7906
- ]
8038
+ ],
8039
+ "tool_parser":"glm4"
7907
8040
  },
7908
8041
  {
7909
8042
  "version": 2,
@@ -9065,7 +9198,8 @@
9065
9198
  "<|end_of_text|>",
9066
9199
  "<|eot_id|>",
9067
9200
  "<|eom_id|>"
9068
- ]
9201
+ ],
9202
+ "tool_parser": "llama3"
9069
9203
  },
9070
9204
  {
9071
9205
  "version": 2,
@@ -11794,7 +11928,8 @@
11794
11928
  "<|endoftext|>",
11795
11929
  "<|im_start|>",
11796
11930
  "<|im_end|>"
11797
- ]
11931
+ ],
11932
+ "tool_parser":"qwen"
11798
11933
  },
11799
11934
  {
11800
11935
  "version": 2,
@@ -11857,7 +11992,8 @@
11857
11992
  "<|endoftext|>",
11858
11993
  "<|im_start|>",
11859
11994
  "<|im_end|>"
11860
- ]
11995
+ ],
11996
+ "tool_parser":"qwen"
11861
11997
  },
11862
11998
  {
11863
11999
  "version": 2,
@@ -12581,7 +12717,8 @@
12581
12717
  "<|endoftext|>",
12582
12718
  "<|im_start|>",
12583
12719
  "<|im_end|>"
12584
- ]
12720
+ ],
12721
+ "tool_parser":"qwen"
12585
12722
  },
12586
12723
  {
12587
12724
  "version": 2,
@@ -12702,7 +12839,8 @@
12702
12839
  "<|endoftext|>",
12703
12840
  "<|im_start|>",
12704
12841
  "<|im_end|>"
12705
- ]
12842
+ ],
12843
+ "tool_parser":"qwen"
12706
12844
  },
12707
12845
  {
12708
12846
  "version": 2,
@@ -13884,7 +14022,8 @@
13884
14022
  "<|endoftext|>",
13885
14023
  "<|im_start|>",
13886
14024
  "<|im_end|>"
13887
- ]
14025
+ ],
14026
+ "tool_parser":"qwen"
13888
14027
  },
13889
14028
  {
13890
14029
  "version": 2,
@@ -15394,7 +15533,8 @@
15394
15533
  "<|endoftext|>",
15395
15534
  "<|im_start|>",
15396
15535
  "<|im_end|>"
15397
- ]
15536
+ ],
15537
+ "tool_parser":"qwen"
15398
15538
  },
15399
15539
  {
15400
15540
  "version": 2,
@@ -15521,7 +15661,7 @@
15521
15661
  "virtualenv": {
15522
15662
  "packages": [
15523
15663
  "git+https://github.com/huggingface/transformers@v4.51.3-Qwen2.5-Omni-preview",
15524
- "numpy==1.26.4",
15664
+ "#system_numpy#",
15525
15665
  "qwen_omni_utils",
15526
15666
  "soundfile"
15527
15667
  ]
@@ -17302,9 +17442,10 @@
17302
17442
  "packages": [
17303
17443
  "transformers>=4.51.0",
17304
17444
  "mlx-lm>=0.24.0 ; sys_platform=='darwin'",
17305
- "numpy==1.26.4"
17445
+ "#system_numpy#"
17306
17446
  ]
17307
- }
17447
+ },
17448
+ "tool_parser": "qwen"
17308
17449
  },
17309
17450
  {
17310
17451
  "version": 2,
@@ -17919,7 +18060,8 @@
17919
18060
  "<|endoftext|>",
17920
18061
  "<|im_start|>",
17921
18062
  "<|im_end|>"
17922
- ]
18063
+ ],
18064
+ "tool_parser":"qwen"
17923
18065
  },
17924
18066
  {
17925
18067
  "version": 2,
@@ -18531,7 +18673,8 @@
18531
18673
  "<|im_end|>"
18532
18674
  ],
18533
18675
  "reasoning_start_tag": "<think>",
18534
- "reasoning_end_tag": "</think>"
18676
+ "reasoning_end_tag": "</think>",
18677
+ "tool_parser":"qwen"
18535
18678
  },
18536
18679
  {
18537
18680
  "version": 2,
@@ -19314,7 +19457,8 @@
19314
19457
  "stop": [
19315
19458
  "<|endoftext|>",
19316
19459
  "<|im_end|>"
19317
- ]
19460
+ ],
19461
+ "tool_parser":"qwen"
19318
19462
  },
19319
19463
  {
19320
19464
  "version": 2,
@@ -21137,5 +21281,273 @@
21137
21281
  "#system_numpy#"
21138
21282
  ]
21139
21283
  }
21284
+ },
21285
+ {
21286
+ "version": 2,
21287
+ "context_length": 131072,
21288
+ "model_name": "KAT-V1",
21289
+ "model_lang": [
21290
+ "en",
21291
+ "zh"
21292
+ ],
21293
+ "model_ability": [
21294
+ "chat"
21295
+ ],
21296
+ "model_description": "Kwaipilot-AutoThink ranks first among all open-source models on LiveCodeBench Pro, a challenging benchmark explicitly designed to prevent data leakage, and even surpasses strong proprietary systems such as Seed and o3-mini.",
21297
+ "model_specs": [
21298
+ {
21299
+ "model_format": "pytorch",
21300
+ "model_size_in_billions": 40,
21301
+ "model_src": {
21302
+ "huggingface": {
21303
+ "quantizations": [
21304
+ "none"
21305
+ ],
21306
+ "model_id": "Kwaipilot/KAT-V1-40B"
21307
+ },
21308
+ "modelscope": {
21309
+ "quantizations": [
21310
+ "none"
21311
+ ],
21312
+ "model_id": "Kwaipilot/KAT-V1-40B"
21313
+ }
21314
+ }
21315
+ },
21316
+ {
21317
+ "model_format": "gptq",
21318
+ "model_size_in_billions": 40,
21319
+ "model_src": {
21320
+ "huggingface": {
21321
+ "quantizations": [
21322
+ "Int4-Int8Mix"
21323
+ ],
21324
+ "model_id": "QuantTrio/KAT-V1-40B-GPTQ-Int4-Int8Mix"
21325
+ },
21326
+ "modelscope": {
21327
+ "quantizations": [
21328
+ "Int4-Int8Mix"
21329
+ ],
21330
+ "model_id": "tclf90/KAT-V1-40B-GPTQ-Int4-Int8Mix"
21331
+ }
21332
+ }
21333
+ },
21334
+ {
21335
+ "model_format": "awq",
21336
+ "model_size_in_billions": 40,
21337
+ "model_src": {
21338
+ "huggingface": {
21339
+ "quantizations": [
21340
+ "Int4"
21341
+ ],
21342
+ "model_id": "QuantTrio/KAT-V1-40B-AWQ"
21343
+ },
21344
+ "modelscope": {
21345
+ "quantizations": [
21346
+ "Int4"
21347
+ ],
21348
+ "model_id": "tclf90/KAT-V1-40B-AWQ"
21349
+ }
21350
+ }
21351
+ }
21352
+ ],
21353
+ "chat_template": "{%- if tools %}\n {{- '<|im_start|>system\\n' }}\n {%- if messages[0]['role'] == 'system' %}\n {{- messages[0]['content'] }}\n {%- else %}\n {{- '' }}\n {%- endif %}\n {{- \"\\n\\n# Tools\\n\\nYou may call one or more functions to assist with the user query.\\n\\nYou are provided with function signatures within <tools></tools> XML tags:\\n<tools>\" }}\n {%- for tool in tools %}\n {{- \"\\n\" }}\n {{- tool | tojson }}\n {%- endfor %}\n {{- \"\\n</tools>\\n\\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\\n<tool_call>\\n{\\\"name\\\": <function-name>, \\\"arguments\\\": <args-json-object>}\\n</tool_call><|im_end|>\\n\" }}\n{%- else %}\n {%- if messages[0]['role'] == 'system' %}\n {{- '<|im_start|>system\\n' + messages[0]['content'] + '<|im_end|>\\n' }}\n {%- else %}\n {{- '<|im_start|>system\\nYou are a helpful assistant.<|im_end|>\\n' }}\n {%- endif %}\n{%- endif %}\n{%- for message in messages %}\n {%- if (message.role == \"user\") or (message.role == \"system\" and not loop.first) %}\n {{- '<|im_start|>' + message.role + '\\n' + message.content + '<|im_end|>' + '\\n' }}\n {%- elif message.role == \"assistant\" and not message.tool_calls %}\n {%- set content = message.content %}\n {%- if not loop.last %}\n {%- set answer_blocks = message.content.split('<answer>\\n') %}\n {%- if answer_blocks|length > 1 %}\n {%- set last_answer_block = answer_blocks[-1] %}\n {%- if '\\n</answer>' in last_answer_block %}\n {%- set content = last_answer_block.split('\\n</answer>')[0] %}\n {%- else %}\n {%- set content = message.content.split('<think_off>')[-1].lstrip('\\n') %}\n {%- set content = content.split('</think>')[-1].lstrip('\\n') %}\n {%- endif %}\n {%- else %}\n {%- set content = message.content.split('<think_off>')[-1].lstrip('\\n') %}\n {%- set content = content.split('</think>')[-1].lstrip('\\n') %}\n {%- endif %}\n {%- endif %}\n {{- '<|im_start|>' + message.role + '\\n' + content + '<|im_end|>' + '\\n' }}\n {%- elif message.role == \"assistant\" %}\n {%- set content = message.content %}\n {%- if not loop.last %}\n {%- set answer_blocks = message.content.split('<answer>\\n') %}\n {%- if answer_blocks|length > 1 %}\n {%- set last_answer_block = answer_blocks[-1] %}\n {%- if '\\n</answer>' in last_answer_block %}\n {%- set content = last_answer_block.split('\\n</answer>')[0] %}\n {%- else %}\n {%- set content = message.content.split('<think_off>')[-1].lstrip('\\n') %}\n {%- set content = content.split('</think>')[-1].lstrip('\\n') %}\n {%- endif %}\n {%- else %}\n {%- set content = message.content.split('<think_off>')[-1].lstrip('\\n') %}\n {%- set content = content.split('</think>')[-1].lstrip('\\n') %}\n {%- endif %}\n {%- endif %}\n {{- '<|im_start|>' + message.role }}\n {%- if message.content %}\n {{- '\\n' + content }}\n {%- endif %}\n {%- for tool_call in message.tool_calls %}\n {%- if tool_call.function is defined %}\n {%- set tool_call = tool_call.function %}\n {%- endif %}\n {{- '\\n<tool_call>\\n{\\\"name\\\": \\\"' }}\n {{- tool_call.name }}\n {{- '\\\", \\\"arguments\\\": ' }}\n {{- tool_call.arguments | tojson }}\n {{- '}\\n</tool_call>' }}\n {%- endfor %}\n {{- '<|im_end|>\\n' }}\n {%- elif message.role == \"tool\" %}\n {%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != \"tool\") %}\n {{- '<|im_start|>user' }}\n {%- endif %}\n {{- '\\n<tool_response>\\n' }}\n {{- message.content }}\n {{- '\\n</tool_response>' }}\n {%- if loop.last or (messages[loop.index0 + 1].role != \"tool\") %}\n {{- '<|im_end|>\\n' }}\n {%- endif %}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|im_start|>assistant\\n<judge>\\n' }}\n{%- endif %}",
21354
+ "stop_token_ids": [
21355
+ 151643,
21356
+ 151645
21357
+ ],
21358
+ "stop": [
21359
+ "<|endoftext|>",
21360
+ "<|im_end|>"
21361
+ ]
21362
+ },
21363
+ {
21364
+ "version": 2,
21365
+ "context_length": 524288,
21366
+ "model_name": "seed-oss",
21367
+ "model_lang": [
21368
+ "en",
21369
+ "zh"
21370
+ ],
21371
+ "model_ability": [
21372
+ "chat",
21373
+ "reasoning",
21374
+ "tools"
21375
+ ],
21376
+ "model_description": "Seed-OSS is a series of open-source large language models developed by ByteDance's Seed Team, designed for powerful long-context, reasoning, agent and general capabilities, and versatile developer-friendly features. Although trained with only 12T tokens, Seed-OSS achieves excellent performance on several popular open benchmarks.",
21377
+ "model_specs": [
21378
+ {
21379
+ "model_format": "pytorch",
21380
+ "model_size_in_billions": 36,
21381
+ "model_src": {
21382
+ "huggingface": {
21383
+ "quantizations": [
21384
+ "none"
21385
+ ],
21386
+ "model_id": "ByteDance-Seed/Seed-OSS-36B-Instruct"
21387
+ },
21388
+ "modelscope": {
21389
+ "quantizations": [
21390
+ "none"
21391
+ ],
21392
+ "model_id": "ByteDance-Seed/Seed-OSS-36B-Instruct"
21393
+ }
21394
+ }
21395
+ },
21396
+ {
21397
+ "model_format": "gptq",
21398
+ "model_size_in_billions": 36,
21399
+ "model_src": {
21400
+ "huggingface": {
21401
+ "quantizations": [
21402
+ "Int8",
21403
+ "Int4",
21404
+ "Int3"
21405
+ ],
21406
+ "model_id": "QuantTrio/Seed-OSS-36B-Instruct-GPTQ-{quantization}"
21407
+ },
21408
+ "modelscope": {
21409
+ "quantizations": [
21410
+ "Int8",
21411
+ "Int4",
21412
+ "Int3"
21413
+ ],
21414
+ "model_id": "tclf90/Seed-OSS-36B-Instruct-GPTQ-{quantization}"
21415
+ }
21416
+ }
21417
+ },
21418
+ {
21419
+ "model_format": "awq",
21420
+ "model_size_in_billions": 36,
21421
+ "model_src": {
21422
+ "huggingface": {
21423
+ "quantizations": [
21424
+ "Int4"
21425
+ ],
21426
+ "model_id": "QuantTrio/Seed-OSS-36B-Instruct-AWQ"
21427
+ },
21428
+ "modelscope": {
21429
+ "quantizations": [
21430
+ "Int4"
21431
+ ],
21432
+ "model_id": "tclf90/Seed-OSS-36B-Instruct-AWQ"
21433
+ }
21434
+ }
21435
+ },
21436
+ {
21437
+ "model_format": "mlx",
21438
+ "model_size_in_billions": 36,
21439
+ "model_src": {
21440
+ "huggingface": {
21441
+ "quantizations": [
21442
+ "4bit"
21443
+ ],
21444
+ "model_id": "mlx-community/Seed-OSS-36B-Instruct-4bit"
21445
+ },
21446
+ "modelscope": {
21447
+ "quantizations": [
21448
+ "4bit"
21449
+ ],
21450
+ "model_id": "mlx-community/Seed-OSS-36B-Instruct-4bit"
21451
+ }
21452
+ }
21453
+ },
21454
+ {
21455
+ "model_format": "ggufv2",
21456
+ "model_size_in_billions": 36,
21457
+ "model_src": {
21458
+ "huggingface": {
21459
+ "quantizations": [
21460
+ "BF16",
21461
+ "IQ4_NL",
21462
+ "IQ4_XS",
21463
+ "Q2_K",
21464
+ "Q2_K_L",
21465
+ "Q3_K_M",
21466
+ "Q3_K_S",
21467
+ "Q4_0",
21468
+ "Q4_1",
21469
+ "Q4_K_M",
21470
+ "Q4_K_S",
21471
+ "Q5_K_M",
21472
+ "Q5_K_S",
21473
+ "Q6_K",
21474
+ "Q8_0",
21475
+ "UD-IQ1_M",
21476
+ "UD-IQ1_S",
21477
+ "UD-IQ2_M",
21478
+ "UD-IQ2_XXS",
21479
+ "UD-IQ3_XXS",
21480
+ "UD-Q2_K_XL",
21481
+ "UD-Q3_K_XL",
21482
+ "UD-Q4_K_XL",
21483
+ "UD-Q5_K_XL",
21484
+ "UD-Q6_K_XL",
21485
+ "UD-Q8_K_XL"
21486
+ ],
21487
+ "quantization_parts": {
21488
+ "BF16": [
21489
+ "00001-of-00002",
21490
+ "00002-of-00002"
21491
+ ]
21492
+ },
21493
+ "model_id": "unsloth/Seed-OSS-36B-Instruct-GGUF",
21494
+ "model_file_name_template": "Seed-OSS-36B-Instruct-{quantization}.gguf",
21495
+ "model_file_name_split_template": "{quantization}/Seed-OSS-36B-Instruct-{quantization}-{part}.gguf"
21496
+ },
21497
+ "modelscope": {
21498
+ "quantizations": [
21499
+ "BF16",
21500
+ "IQ4_NL",
21501
+ "IQ4_XS",
21502
+ "Q2_K",
21503
+ "Q2_K_L",
21504
+ "Q3_K_M",
21505
+ "Q3_K_S",
21506
+ "Q4_0",
21507
+ "Q4_1",
21508
+ "Q4_K_M",
21509
+ "Q4_K_S",
21510
+ "Q5_K_M",
21511
+ "Q5_K_S",
21512
+ "Q6_K",
21513
+ "Q8_0",
21514
+ "UD-IQ1_M",
21515
+ "UD-IQ1_S",
21516
+ "UD-IQ2_M",
21517
+ "UD-IQ2_XXS",
21518
+ "UD-IQ3_XXS",
21519
+ "UD-Q2_K_XL",
21520
+ "UD-Q3_K_XL",
21521
+ "UD-Q4_K_XL",
21522
+ "UD-Q5_K_XL",
21523
+ "UD-Q6_K_XL",
21524
+ "UD-Q8_K_XL"
21525
+ ],
21526
+ "quantization_parts": {
21527
+ "BF16": [
21528
+ "00001-of-00002",
21529
+ "00002-of-00002"
21530
+ ]
21531
+ },
21532
+ "model_id": "unsloth/Seed-OSS-36B-Instruct-GGUF",
21533
+ "model_file_name_template": "Seed-OSS-36B-Instruct-{quantization}.gguf",
21534
+ "model_file_name_split_template": "{quantization}/Seed-OSS-36B-Instruct-{quantization}-{part}.gguf"
21535
+ }
21536
+ }
21537
+ }
21538
+ ],
21539
+ "chat_template": "{# ------------- special token variables ------------- #}{%- set bos_token = '<seed:bos>' -%}{%- set eos_token = '<seed:eos>' -%}{%- set pad_token = '<seed:pad>' -%}{%- set toolcall_begin_token = '<seed:tool_call>' -%}{%- set toolcall_end_token = '</seed:tool_call>' -%}{%- set think_begin_token = '<seed:think>' -%}{%- set think_end_token = '</seed:think>' -%}{%- set budget_begin_token = '<seed:cot_budget_reflect>'-%}{%- set budget_end_token = '</seed:cot_budget_reflect>'-%}{# -------------- reflection-interval lookup -------------- #}{%- if not thinking_budget is defined %}{%- set thinking_budget = -1 -%}{%- endif -%}{%- set budget_reflections_v05 = { 0: 0, 512: 128, 1024: 256, 2048: 512, 4096: 512, 8192: 1024, 16384: 1024} -%}{%- set ns = namespace(interval = None) -%}{%- for k, v in budget_reflections_v05 | dictsort -%} {%- if ns.interval is none and thinking_budget <= k -%} {%- set ns.interval = v -%} {%- endif -%}{%- endfor -%}{%- if ns.interval is none -%} {%- set ns.interval = budget_reflections_v05[16384] -%}{%- endif -%}{%- if messages[0][\"role\"] == \"system\" %}{%- set system_message = messages[0][\"content\"] %}{%- set loop_messages = messages[1:] %}{%- else %}{%- set loop_messages = messages %}{%- endif %}{%- if not tools is defined or tools is none %}{%- set tools = [] %}{%- endif %}{%- macro py_type(t) -%} {%- if t == \"string\" -%}str {%- elif t in (\"number\", \"integer\") -%}int {%- elif t == \"boolean\" -%}bool {%- elif t == \"array\" -%}list {%- else -%}Any{%- endif -%}{%- endmacro -%}{%- if system_message is defined %}{{ bos_token + \"system\\n\" + system_message }}{%- else %}{%- if tools is iterable and tools | length > 0 %}{{ bos_token + \"system\\nYou are Doubao, a helpful AI assistant. You may call one or more functions to assist with the user query.\" }}{%- endif %}{%- endif %}{%- if use_json_tooldef is defined and use_json_tooldef %}{{\"Tool List:\\nYou are authorized to use the following tools (described in JSON Schema format). Before performing any task, you must decide how to call them based on the descriptions and parameters of these tools.\"}}{{ tools | tojson(ensure_ascii=False) }}{%- else %}{%- for item in tools if item.type == \"function\" %}Function:def {{ item.function.name }}({%- for name, spec in item.function.parameters.properties.items() %} {{- name }}: {{ py_type(spec.type) }}{% if not loop.last %},{% endif %}{%- endfor %}): \"\"\" {{ item.function.description | trim }} {%- if item.function.parameters.properties %} Args: {%- for name, spec in item.function.parameters.properties.items() %} - {{ name }} ({{ py_type(spec.type) }}) {%- if name in item.function.parameters.required %} [必填]{% else %} [选填]{% endif %}: {{- \" \" ~ (spec.description or \"\") }} {%- endfor %} {%- endif %} {%- if item.function.returns is defined and item.function.returns.properties is defined and item.function.returns.properties %} Returns: {%- for name, spec in item.function.returns.properties.items() %} - {{ name }} ({{ py_type(spec.type) }}): {{- \" \" ~ (spec.description or \"\") }} {%- endfor %} {%- endif %} \"\"\"{%- endfor %}{%- endif %}{%- if tools is iterable and tools | length > 0 %}{{\"工具调用请遵循如下格式:\\n<seed:tool_call>\\n<function=example_function_name>\\n<parameter=example_parameter_1>value_1</parameter>\\n<parameter=example_parameter_2>This is the value for the second parameter\\nthat can span\\nmultiple lines</parameter>\\n</function>\\n</seed:tool_call>\\n\"}}{%- endif %}{%- if system_message is defined or tools is iterable and tools | length > 0 %}{{ eos_token }}{%- endif %}{%- if thinking_budget is defined %}{%- if thinking_budget == 0 %}{{ bos_token+\"system\" }}{{ \"You are an intelligent assistant that can answer questions in one step without the need for reasoning and thinking, that is, your thinking budget is 0. Next, please skip the thinking process and directly start answering the user's questions.\" }}{{ eos_token }}{%- elif not thinking_budget == -1 %}{{ bos_token+\"system\" }}{{ \"You are an intelligent assistant with reflective ability. In the process of thinking and reasoning, you need to strictly follow the thinking budget, which is \"}}{{thinking_budget}}{{\". That is, you need to complete your thinking within \"}}{{thinking_budget}}{{\" tokens and start answering the user's questions. You will reflect on your thinking process every \"}}{{ns.interval}}{{\" tokens, stating how many tokens have been used and how many are left.\"}}{{ eos_token }}{%- endif %}{%- endif %}{%- for message in loop_messages %}{%- if message.role == \"assistant\" and message.tool_calls is defined and message.tool_calls is iterable and message.tool_calls | length > 0 %}{{ bos_token + message.role }}{%- if message.reasoning_content is defined and message.reasoning_content is string and message.reasoning_content | trim | length > 0 %}{{ \"\\n\" + think_begin_token + message.reasoning_content | trim + think_end_token }}{%- endif %}{%- if message.content is defined and message.content is string and message.content | trim | length > 0 %}{{ \"\\n\" + message.content | trim + \"\\n\" }}{%- endif %}{%- for tool_call in message.tool_calls %}{%- if tool_call.function is defined %}{% set tool_call = tool_call.function %}{% endif %}{{ \"\\n\" + toolcall_begin_token + \"\\n<function=\" + tool_call.name + \">\\n\" }}{%- if tool_call.arguments is defined %}{%- for arg_name, arg_value in tool_call.arguments | items %}{{ \"<parameter=\" + arg_name + \">\" }}{%- set arg_value = arg_value if arg_value is string else arg_value | string %}{{ arg_value+\"</parameter>\\n\" }}{%- endfor %}{%- endif %}{{ \"</function>\\n\" + toolcall_end_token }}{%- endfor %}{{ eos_token }}{%- elif message.role in [\"user\", \"system\"] %}{{ bos_token + message.role + \"\\n\" + message.content + eos_token }}{%- elif message.role == \"assistant\" %}{{ bos_token + message.role }}{%- if message.reasoning_content is defined and message.reasoning_content is string and message.reasoning_content | trim | length > 0 %}{{ \"\\n\" + think_begin_token + message.reasoning_content | trim + think_end_token }}{%- endif %}{%- if message.content is defined and message.content is string and message.content | trim | length > 0 %}{{ \"\\n\" + message.content | trim + eos_token }}{%- endif %}{%- else %}{{ bos_token + message.role + \"\\n\" + message.content + eos_token }}{%- endif %}{%- endfor %}{%- if add_generation_prompt %}{{ bos_token+\"assistant\\n\" }}{%- if thinking_budget == 0 %}{{ think_begin_token + \"\\n\" + budget_begin_token + \"The current thinking budget is 0, so I will directly start answering the question.\" + budget_end_token + \"\\n\" + think_end_token }}{%- endif %}{%- endif %}",
21540
+ "stop_token_ids": [
21541
+ 0,
21542
+ 1,
21543
+ 2
21544
+ ],
21545
+ "stop": [
21546
+ "<seed:bos>",
21547
+ "<seed:pad>",
21548
+ "<seed:eos>"
21549
+ ],
21550
+ "reasoning_start_tag": "<think>",
21551
+ "reasoning_end_tag": "</think>"
21140
21552
  }
21141
21553
  ]