xinference 0.15.2__py3-none-any.whl → 0.15.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of xinference might be problematic. Click here for more details.

Files changed (57) hide show
  1. xinference/_version.py +3 -3
  2. xinference/api/restful_api.py +29 -2
  3. xinference/client/restful/restful_client.py +10 -0
  4. xinference/constants.py +4 -0
  5. xinference/core/image_interface.py +76 -23
  6. xinference/core/model.py +80 -39
  7. xinference/core/progress_tracker.py +187 -0
  8. xinference/core/supervisor.py +11 -0
  9. xinference/core/worker.py +1 -0
  10. xinference/model/audio/chattts.py +2 -1
  11. xinference/model/audio/core.py +0 -2
  12. xinference/model/audio/model_spec.json +8 -0
  13. xinference/model/audio/model_spec_modelscope.json +9 -0
  14. xinference/model/embedding/core.py +14 -5
  15. xinference/model/embedding/model_spec.json +7 -0
  16. xinference/model/embedding/model_spec_modelscope.json +9 -1
  17. xinference/model/image/core.py +6 -7
  18. xinference/model/image/sdapi.py +35 -4
  19. xinference/model/image/stable_diffusion/core.py +212 -70
  20. xinference/model/llm/llm_family.json +28 -40
  21. xinference/model/llm/llm_family_modelscope.json +18 -22
  22. xinference/model/llm/transformers/cogvlm2.py +2 -1
  23. xinference/model/llm/transformers/cogvlm2_video.py +2 -0
  24. xinference/model/llm/transformers/core.py +6 -2
  25. xinference/model/llm/transformers/deepseek_vl.py +2 -0
  26. xinference/model/llm/transformers/glm4v.py +2 -1
  27. xinference/model/llm/transformers/intern_vl.py +2 -0
  28. xinference/model/llm/transformers/minicpmv25.py +2 -0
  29. xinference/model/llm/transformers/minicpmv26.py +2 -0
  30. xinference/model/llm/transformers/omnilmm.py +2 -0
  31. xinference/model/llm/transformers/qwen2_audio.py +11 -4
  32. xinference/model/llm/transformers/qwen2_vl.py +2 -28
  33. xinference/model/llm/transformers/qwen_vl.py +2 -1
  34. xinference/model/llm/transformers/utils.py +35 -2
  35. xinference/model/llm/transformers/yi_vl.py +2 -0
  36. xinference/model/llm/utils.py +72 -17
  37. xinference/model/llm/vllm/core.py +69 -9
  38. xinference/model/llm/vllm/utils.py +41 -0
  39. xinference/model/rerank/core.py +19 -0
  40. xinference/model/rerank/model_spec.json +8 -0
  41. xinference/model/rerank/model_spec_modelscope.json +8 -0
  42. xinference/model/utils.py +7 -29
  43. xinference/model/video/core.py +0 -2
  44. xinference/web/ui/build/asset-manifest.json +3 -3
  45. xinference/web/ui/build/index.html +1 -1
  46. xinference/web/ui/build/static/js/{main.29578905.js → main.e51a356d.js} +3 -3
  47. xinference/web/ui/build/static/js/main.e51a356d.js.map +1 -0
  48. xinference/web/ui/node_modules/.cache/babel-loader/4385c1095eefbff0a8ec3b2964ba6e5a66a05ab31be721483ca2f43e2a91f6ff.json +1 -0
  49. {xinference-0.15.2.dist-info → xinference-0.15.4.dist-info}/METADATA +6 -5
  50. {xinference-0.15.2.dist-info → xinference-0.15.4.dist-info}/RECORD +55 -53
  51. xinference/web/ui/build/static/js/main.29578905.js.map +0 -1
  52. xinference/web/ui/node_modules/.cache/babel-loader/68bede6d95bb5ef0b35bbb3ec5b8c937eaf6862c6cdbddb5ef222a7776aaf336.json +0 -1
  53. /xinference/web/ui/build/static/js/{main.29578905.js.LICENSE.txt → main.e51a356d.js.LICENSE.txt} +0 -0
  54. {xinference-0.15.2.dist-info → xinference-0.15.4.dist-info}/LICENSE +0 -0
  55. {xinference-0.15.2.dist-info → xinference-0.15.4.dist-info}/WHEEL +0 -0
  56. {xinference-0.15.2.dist-info → xinference-0.15.4.dist-info}/entry_points.txt +0 -0
  57. {xinference-0.15.2.dist-info → xinference-0.15.4.dist-info}/top_level.txt +0 -0
@@ -1111,7 +1111,8 @@
1111
1111
  "th"
1112
1112
  ],
1113
1113
  "model_ability": [
1114
- "chat"
1114
+ "chat",
1115
+ "tools"
1115
1116
  ],
1116
1117
  "model_description": "The Llama 3.1 instruction tuned models are optimized for dialogue use cases and outperform many of the available open source chat models on common industry benchmarks..",
1117
1118
  "model_specs": [
@@ -1299,14 +1300,16 @@
1299
1300
  "model_id": "hugging-quants/Meta-Llama-3.1-405B-Instruct-AWQ-INT4"
1300
1301
  }
1301
1302
  ],
1302
- "chat_template": "{{- '<|begin_of_text|>' }}\n{%- if custom_tools is defined %}\n {%- set tools = custom_tools %}\n{%- endif %}\n{%- if not tools_in_user_message is defined %}\n {%- set tools_in_user_message = true %}\n{%- endif %}\n{%- if not date_string is defined %}\n {%- set date_string = \"26 Jul 2024\" %}\n{%- endif %}\n{%- if not tools is defined %}\n {%- set tools = none %}\n{%- endif %}\n\n{#- This block extracts the system message, so we can slot it into the right place. #}\n{%- if messages[0]['role'] == 'system' %}\n {%- set system_message = messages[0]['content']|trim %}\n {%- set messages = messages[1:] %}\n{%- else %}\n {%- set system_message = \"\" %}\n{%- endif %}\n\n{#- System message + builtin tools #}\n{{- \"<|start_header_id|>system<|end_header_id|>\n\n\" }}\n{%- if builtin_tools is defined or tools is not none %}\n {{- \"Environment: ipython\n\" }}\n{%- endif %}\n{%- if builtin_tools is defined %}\n {{- \"Tools: \" + builtin_tools | reject('equalto', 'code_interpreter') | join(\", \") + \"\n\n\"}}\n{%- endif %}\n{{- \"Cutting Knowledge Date: December 2023\n\" }}\n{{- \"Today Date: \" + date_string + \"\n\n\" }}\n{%- if tools is not none and not tools_in_user_message %}\n {{- \"You have access to the following functions. To call a function, please respond with JSON for a function call.\" }}\n {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n {{- \"Do not use variables.\n\n\" }}\n {%- for t in tools %}\n {{- t | tojson(indent=4) }}\n {{- \"\n\n\" }}\n {%- endfor %}\n{%- endif %}\n{{- system_message }}\n{{- \"<|eot_id|>\" }}\n\n{#- Custom tools are passed in a user message with some extra guidance #}\n{%- if tools_in_user_message and not tools is none %}\n {#- Extract the first user message so we can plug it in here #}\n {%- if messages | length != 0 %}\n {%- set first_user_message = messages[0]['content']|trim %}\n {%- set messages = messages[1:] %}\n {%- else %}\n {{- raise_exception(\"Cannot put tools in the first user message when there's no first user message!\") }}\n{%- endif %}\n {{- '<|start_header_id|>user<|end_header_id|>\n\n' -}}\n {{- \"Given the following functions, please respond with a JSON for a function call \" }}\n {{- \"with its proper arguments that best answers the given prompt.\n\n\" }}\n {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n {{- \"Do not use variables.\n\n\" }}\n {%- for t in tools %}\n {{- t | tojson(indent=4) }}\n {{- \"\n\n\" }}\n {%- endfor %}\n {{- first_user_message + \"<|eot_id|>\"}}\n{%- endif %}\n\n{%- for message in messages %}\n {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %}\n {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' }}\n {%- elif 'tool_calls' in message %}\n {%- if not message.tool_calls|length == 1 %}\n {{- raise_exception(\"This model only supports single tool-calls at once!\") }}\n {%- endif %}\n {%- set tool_call = message.tool_calls[0].function %}\n {%- if builtin_tools is defined and tool_call.name in builtin_tools %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}}\n {{- \"<|python_tag|>\" + tool_call.name + \".call(\" }}\n {%- for arg_name, arg_val in tool_call.arguments | items %}\n {{- arg_name + '=\"' + arg_val + '\"' }}\n {%- if not loop.last %}\n {{- \", \" }}\n {%- endif %}\n {%- endfor %}\n {{- \")\" }}\n {%- else %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}}\n {{- '{\"name\": \"' + tool_call.name + '\", ' }}\n {{- '\"parameters\": ' }}\n {{- tool_call.arguments | tojson }}\n {{- \"}\" }}\n {%- endif %}\n {%- if builtin_tools is defined %}\n {#- This means we're in ipython mode #}\n {{- \"<|eom_id|>\" }}\n {%- else %}\n {{- \"<|eot_id|>\" }}\n {%- endif %}\n {%- elif message.role == \"tool\" or message.role == \"ipython\" %}\n {{- \"<|start_header_id|>ipython<|end_header_id|>\n\n\" }}\n {%- if message.content is mapping or message.content is iterable %}\n {{- message.content | tojson }}\n {%- else %}\n {{- message.content }}\n {%- endif %}\n {{- \"<|eot_id|>\" }}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }}\n{%- endif %}\n",
1303
+ "chat_template": "{{- bos_token }}\n{%- if custom_tools is defined %}\n {%- set tools = custom_tools %}\n{%- endif %}\n{%- if not tools_in_user_message is defined %}\n {%- set tools_in_user_message = true %}\n{%- endif %}\n{%- if not date_string is defined %}\n {%- set date_string = \"26 Jul 2024\" %}\n{%- endif %}\n{%- if not tools is defined %}\n {%- set tools = none %}\n{%- endif %}\n\n{#- This block extracts the system message, so we can slot it into the right place. #}\n{%- if messages[0]['role'] == 'system' %}\n {%- set system_message = messages[0]['content']|trim %}\n {%- set messages = messages[1:] %}\n{%- else %}\n {%- set system_message = \"\" %}\n{%- endif %}\n\n{#- System message + builtin tools #}\n{{- \"<|start_header_id|>system<|end_header_id|>\\n\\n\" }}\n{%- if builtin_tools is defined or tools is not none %}\n {{- \"Environment: ipython\\n\" }}\n{%- endif %}\n{%- if builtin_tools is defined %}\n {{- \"Tools: \" + builtin_tools | reject('equalto', 'code_interpreter') | join(\", \") + \"\\n\\n\"}}\n{%- endif %}\n{{- \"Cutting Knowledge Date: December 2023\\n\" }}\n{{- \"Today Date: \" + date_string + \"\\n\\n\" }}\n{%- if tools is not none and not tools_in_user_message %}\n {{- \"You have access to the following functions. To call a function, please respond with JSON for a function call.\" }}\n {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n {{- \"Do not use variables.\\n\\n\" }}\n {%- for t in tools %}\n {{- t | tojson(indent=4) }}\n {{- \"\\n\\n\" }}\n {%- endfor %}\n{%- endif %}\n{{- system_message }}\n{{- \"<|eot_id|>\" }}\n\n{#- Custom tools are passed in a user message with some extra guidance #}\n{%- if tools_in_user_message and not tools is none %}\n {#- Extract the first user message so we can plug it in here #}\n {%- if messages | length != 0 %}\n {%- set first_user_message = messages[0]['content']|trim %}\n {%- set messages = messages[1:] %}\n {%- else %}\n {{- raise_exception(\"Cannot put tools in the first user message when there's no first user message!\") }}\n{%- endif %}\n {{- '<|start_header_id|>user<|end_header_id|>\\n\\n' -}}\n {{- \"Given the following functions, please respond with a JSON for a function call \" }}\n {{- \"with its proper arguments that best answers the given prompt.\\n\\n\" }}\n {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n {{- \"Do not use variables.\\n\\n\" }}\n {%- for t in tools %}\n {{- t | tojson(indent=4) }}\n {{- \"\\n\\n\" }}\n {%- endfor %}\n {{- first_user_message + \"<|eot_id|>\"}}\n{%- endif %}\n\n{%- for message in messages %}\n {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %}\n {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\\n\\n'+ message['content'] | trim + '<|eot_id|>' }}\n {%- elif 'tool_calls' in message %}\n {%- if not message.tool_calls|length == 1 %}\n {{- raise_exception(\"This model only supports single tool-calls at once!\") }}\n {%- endif %}\n {%- set tool_call = message.tool_calls[0].function %}\n {%- if builtin_tools is defined and tool_call.name in builtin_tools %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' -}}\n {{- \"<|python_tag|>\" + tool_call.name + \".call(\" }}\n {%- for arg_name, arg_val in tool_call.arguments | items %}\n {{- arg_name + '=\"' + arg_val + '\"' }}\n {%- if not loop.last %}\n {{- \", \" }}\n {%- endif %}\n {%- endfor %}\n {{- \")\" }}\n {%- else %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' -}}\n {{- '{\"name\": \"' + tool_call.name + '\", ' }}\n {{- '\"parameters\": ' }}\n {{- tool_call.arguments | tojson }}\n {{- \"}\" }}\n {%- endif %}\n {%- if builtin_tools is defined %}\n {#- This means we're in ipython mode #}\n {{- \"<|eom_id|>\" }}\n {%- else %}\n {{- \"<|eot_id|>\" }}\n {%- endif %}\n {%- elif message.role == \"tool\" or message.role == \"ipython\" %}\n {{- \"<|start_header_id|>ipython<|end_header_id|>\\n\\n\" }}\n {%- if message.content is mapping or message.content is iterable %}\n {{- message.content | tojson }}\n {%- else %}\n {{- message.content }}\n {%- endif %}\n {{- \"<|eot_id|>\" }}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' }}\n{%- endif %}\n",
1303
1304
  "stop_token_ids": [
1304
1305
  128001,
1306
+ 128008,
1305
1307
  128009
1306
1308
  ],
1307
1309
  "stop": [
1308
1310
  "<|end_of_text|>",
1309
- "<|eot_id|>"
1311
+ "<|eot_id|>",
1312
+ "<|eom_id|>"
1310
1313
  ]
1311
1314
  },
1312
1315
  {
@@ -6483,8 +6486,7 @@
6483
6486
  "8-bit",
6484
6487
  "none"
6485
6488
  ],
6486
- "model_id": "OpenGVLab/InternVL2-1B",
6487
- "model_revision": "a9fc14aea824b6ea1d44f8778cad6b35512c4ce1"
6489
+ "model_id": "OpenGVLab/InternVL2-1B"
6488
6490
  },
6489
6491
  {
6490
6492
  "model_format": "pytorch",
@@ -6494,8 +6496,7 @@
6494
6496
  "8-bit",
6495
6497
  "none"
6496
6498
  ],
6497
- "model_id": "OpenGVLab/InternVL2-2B",
6498
- "model_revision": "422ad7c6335917bfb514958233955512338485a6"
6499
+ "model_id": "OpenGVLab/InternVL2-2B"
6499
6500
  },
6500
6501
  {
6501
6502
  "model_format": "awq",
@@ -6503,8 +6504,7 @@
6503
6504
  "quantizations": [
6504
6505
  "Int4"
6505
6506
  ],
6506
- "model_id": "OpenGVLab/InternVL2-2B-AWQ",
6507
- "model_revision": "701bc3fc098a8a3b686b3b4135cfb77202be89e0"
6507
+ "model_id": "OpenGVLab/InternVL2-2B-AWQ"
6508
6508
  },
6509
6509
  {
6510
6510
  "model_format": "pytorch",
@@ -6514,8 +6514,7 @@
6514
6514
  "8-bit",
6515
6515
  "none"
6516
6516
  ],
6517
- "model_id": "OpenGVLab/InternVL2-4B",
6518
- "model_revision": "b50544dafada6c41e80bfde2f57cc9b0140fc21c"
6517
+ "model_id": "OpenGVLab/InternVL2-4B"
6519
6518
  },
6520
6519
  {
6521
6520
  "model_format": "pytorch",
@@ -6525,8 +6524,7 @@
6525
6524
  "8-bit",
6526
6525
  "none"
6527
6526
  ],
6528
- "model_id": "OpenGVLab/InternVL2-8B",
6529
- "model_revision": "3bfd3664dea4f3da628785f5125d30f889701253"
6527
+ "model_id": "OpenGVLab/InternVL2-8B"
6530
6528
  },
6531
6529
  {
6532
6530
  "model_format": "awq",
@@ -6534,8 +6532,7 @@
6534
6532
  "quantizations": [
6535
6533
  "Int4"
6536
6534
  ],
6537
- "model_id": "OpenGVLab/InternVL2-8B-AWQ",
6538
- "model_revision": "9f1a4756b7ae18eb26d8a22b618dfc283e8193b3"
6535
+ "model_id": "OpenGVLab/InternVL2-8B-AWQ"
6539
6536
  },
6540
6537
  {
6541
6538
  "model_format": "pytorch",
@@ -6545,8 +6542,7 @@
6545
6542
  "8-bit",
6546
6543
  "none"
6547
6544
  ],
6548
- "model_id": "OpenGVLab/InternVL2-26B",
6549
- "model_revision": "b9f3c7e6d575b0115e076a3ffc46fd20b7586899"
6545
+ "model_id": "OpenGVLab/InternVL2-26B"
6550
6546
  },
6551
6547
  {
6552
6548
  "model_format": "awq",
@@ -6554,8 +6550,7 @@
6554
6550
  "quantizations": [
6555
6551
  "Int4"
6556
6552
  ],
6557
- "model_id": "OpenGVLab/InternVL2-26B-AWQ",
6558
- "model_revision": "469e0019ffd251e22ff6501a5c2321964e86ef0d"
6553
+ "model_id": "OpenGVLab/InternVL2-26B-AWQ"
6559
6554
  },
6560
6555
  {
6561
6556
  "model_format": "pytorch",
@@ -6565,8 +6560,7 @@
6565
6560
  "8-bit",
6566
6561
  "none"
6567
6562
  ],
6568
- "model_id": "OpenGVLab/InternVL2-40B",
6569
- "model_revision": "725a12063bb855c966e30a0617d0ccd9e870d772"
6563
+ "model_id": "OpenGVLab/InternVL2-40B"
6570
6564
  },
6571
6565
  {
6572
6566
  "model_format": "awq",
@@ -6574,8 +6568,7 @@
6574
6568
  "quantizations": [
6575
6569
  "Int4"
6576
6570
  ],
6577
- "model_id": "OpenGVLab/InternVL2-40B-AWQ",
6578
- "model_revision": "d92e140f6dfe8ea9679924c6a31898f42c4e1846"
6571
+ "model_id": "OpenGVLab/InternVL2-40B-AWQ"
6579
6572
  },
6580
6573
  {
6581
6574
  "model_format": "pytorch",
@@ -6585,8 +6578,7 @@
6585
6578
  "8-bit",
6586
6579
  "none"
6587
6580
  ],
6588
- "model_id": "OpenGVLab/InternVL2-Llama3-76B",
6589
- "model_revision": "cf7914905f78e9e3560ddbd6f5dfc39becac494f"
6581
+ "model_id": "OpenGVLab/InternVL2-Llama3-76B"
6590
6582
  },
6591
6583
  {
6592
6584
  "model_format": "awq",
@@ -6594,8 +6586,7 @@
6594
6586
  "quantizations": [
6595
6587
  "Int4"
6596
6588
  ],
6597
- "model_id": "OpenGVLab/InternVL2-Llama3-76B-AWQ",
6598
- "model_revision": "1bc796bf80f2ebc7d6a14c15f55217a4600d50a4"
6589
+ "model_id": "OpenGVLab/InternVL2-Llama3-76B-AWQ"
6599
6590
  }
6600
6591
  ],
6601
6592
  "chat_template": "{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n' }}{% endif %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
@@ -6918,18 +6909,15 @@
6918
6909
  "model_id":"Qwen/Qwen2-VL-72B-Instruct-GPTQ-{quantization}"
6919
6910
  }
6920
6911
  ],
6921
- "prompt_style":{
6922
- "style_name":"QWEN",
6923
- "system_prompt":"You are a helpful assistant",
6924
- "roles":[
6925
- "user",
6926
- "assistant"
6927
- ],
6928
- "stop": [
6929
- "<|im_end|>",
6930
- "<|endoftext|>"
6931
- ]
6932
- }
6912
+ "chat_template": "{% set image_count = namespace(value=0) %}{% set video_count = namespace(value=0) %}{% for message in messages %}{% if loop.first and message['role'] != 'system' %}<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n{% endif %}<|im_start|>{{ message['role'] }}\n{% if message['content'] is string %}{{ message['content'] }}<|im_end|>\n{% else %}{% for content in message['content'] %}{% if content['type'] == 'image' or 'image' in content or 'image_url' in content %}{% set image_count.value = image_count.value + 1 %}{% if add_vision_id %}Picture {{ image_count.value }}: {% endif %}<|vision_start|><|image_pad|><|vision_end|>{% elif content['type'] == 'video' or 'video' in content %}{% set video_count.value = video_count.value + 1 %}{% if add_vision_id %}Video {{ video_count.value }}: {% endif %}<|vision_start|><|video_pad|><|vision_end|>{% elif 'text' in content %}{{ content['text'] }}{% endif %}{% endfor %}<|im_end|>\n{% endif %}{% endfor %}{% if add_generation_prompt %}<|im_start|>assistant\n{% endif %}",
6913
+ "stop_token_ids": [
6914
+ 151645,
6915
+ 151643
6916
+ ],
6917
+ "stop": [
6918
+ "<|im_end|>",
6919
+ "<|endoftext|>"
6920
+ ]
6933
6921
  },
6934
6922
  {
6935
6923
  "version": 1,
@@ -7937,7 +7925,7 @@
7937
7925
  }
7938
7926
  }
7939
7927
  ],
7940
- "chat_template": "{%- if tools %}\n {{- '<|im_start|>system\\n' }}\n {%- if messages[0]['role'] == 'system' %}\n {{- messages[0]['content'] }}\n {%- else %}\n {{- 'You are Qwen, created by Alibaba Cloud. You are a helpful assistant.' }}\n {%- endif %}\n {{- \"\\n\\n# Tools\\n\\nYou may call one or more functions to assist with the user query.\\n\\nYou are provided with function signatures within <tools></tools> XML tags:\\n<tools>\" }}\n {%- for tool in tools %}\n {{- \"\\n\" }}\n {{- tool | tojson }}\n {%- endfor %}\n {{- \"\\n</tools>\\n\\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\\n<tool_call>\\n{{\\\"name\\\": <function-name>, \\\"arguments\\\": <args-json-object>}}\\n</tool_call><|im_end|>\\n\" }}\n{%- else %}\n {%- if messages[0]['role'] == 'system' %}\n {{- '<|im_start|>system\\n' + messages[0]['content'] + '<|im_end|>\\n' }}\n {%- else %}\n {{- '<|im_start|>system\\nYou are Qwen, created by Alibaba Cloud. You are a helpful assistant.<|im_end|>\\n' }}\n {%- endif %}\n{%- endif %}\n{%- for message in messages %}\n {%- if (message.role == \"user\") or (message.role == \"system\" and not loop.first) or (message.role == \"assistant\" and not message.tool_calls) %}\n {{- '<|im_start|>' + message.role + '\\n' + message.content + '<|im_end|>' + '\\n' }}\n {%- elif message.role == \"assistant\" %}\n {{- '<|im_start|>' + message.role }}\n {%- if message.content %}\n {{- '\\n' + message.content }}\n {%- endif %}\n {%- for tool_call in message.tool_calls %}\n {%- if tool_call.function is defined %}\n {%- set tool_call = tool_call.function %}\n {%- endif %}\n {{- '\\n<tool_call>\\n{\"name\": \"' }}\n {{- tool_call.name }}\n {{- '\", \"arguments\": ' }}\n {{- tool_call.arguments | tojson }}\n {{- '}\\n</tool_call>' }}\n {%- endfor %}\n {{- '<|im_end|>\\n' }}\n {%- elif message.role == \"tool\" %}\n {%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != \"tool\") %}\n {{- '<|im_start|>user' }}\n {%- endif %}\n {{- '\\n<tool_response>\\n' }}\n {{- message.content }}\n {{- '\\n</tool_response>' }}\n {%- if loop.last or (messages[loop.index0 + 1].role != \"tool\") %}\n {{- '<|im_end|>\\n' }}\n {%- endif %}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|im_start|>assistant\\n' }}\n{%- endif %}\n",
7928
+ "chat_template": "{%- if tools %}\n {{- '<|im_start|>system\\n' }}\n {%- if messages[0]['role'] == 'system' %}\n {{- messages[0]['content'] }}\n {%- else %}\n {{- 'You are Qwen, created by Alibaba Cloud. You are a helpful assistant.' }}\n {%- endif %}\n {{- \"\\n\\n# Tools\\n\\nYou may call one or more functions to assist with the user query.\\n\\nYou are provided with function signatures within <tools></tools> XML tags:\\n<tools>\" }}\n {%- for tool in tools %}\n {{- \"\\n\" }}\n {{- tool | tojson }}\n {%- endfor %}\n {{- \"\\n</tools>\\n\\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\\n<tool_call>\\n{\\\"name\\\": <function-name>, \\\"arguments\\\": <args-json-object>}\\n</tool_call><|im_end|>\\n\" }}\n{%- else %}\n {%- if messages[0]['role'] == 'system' %}\n {{- '<|im_start|>system\\n' + messages[0]['content'] + '<|im_end|>\\n' }}\n {%- else %}\n {{- '<|im_start|>system\\nYou are Qwen, created by Alibaba Cloud. You are a helpful assistant.<|im_end|>\\n' }}\n {%- endif %}\n{%- endif %}\n{%- for message in messages %}\n {%- if (message.role == \"user\") or (message.role == \"system\" and not loop.first) or (message.role == \"assistant\" and not message.tool_calls) %}\n {{- '<|im_start|>' + message.role + '\\n' + message.content + '<|im_end|>' + '\\n' }}\n {%- elif message.role == \"assistant\" %}\n {{- '<|im_start|>' + message.role }}\n {%- if message.content %}\n {{- '\\n' + message.content }}\n {%- endif %}\n {%- for tool_call in message.tool_calls %}\n {%- if tool_call.function is defined %}\n {%- set tool_call = tool_call.function %}\n {%- endif %}\n {{- '\\n<tool_call>\\n{\"name\": \"' }}\n {{- tool_call.name }}\n {{- '\", \"arguments\": ' }}\n {{- tool_call.arguments | tojson }}\n {{- '}\\n</tool_call>' }}\n {%- endfor %}\n {{- '<|im_end|>\\n' }}\n {%- elif message.role == \"tool\" %}\n {%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != \"tool\") %}\n {{- '<|im_start|>user' }}\n {%- endif %}\n {{- '\\n<tool_response>\\n' }}\n {{- message.content }}\n {{- '\\n</tool_response>' }}\n {%- if loop.last or (messages[loop.index0 + 1].role != \"tool\") %}\n {{- '<|im_end|>\\n' }}\n {%- endif %}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|im_start|>assistant\\n' }}\n{%- endif %}\n",
7941
7929
  "stop_token_ids": [
7942
7930
  151643,
7943
7931
  151644,
@@ -246,7 +246,8 @@
246
246
  "th"
247
247
  ],
248
248
  "model_ability": [
249
- "chat"
249
+ "chat",
250
+ "tools"
250
251
  ],
251
252
  "model_description": "The Llama 3.1 instruction tuned models are optimized for dialogue use cases and outperform many of the available open source chat models on common industry benchmarks..",
252
253
  "model_specs": [
@@ -350,14 +351,16 @@
350
351
  "model_hub": "modelscope"
351
352
  }
352
353
  ],
353
- "chat_template": "{{- '<|begin_of_text|>' }}\n{%- if custom_tools is defined %}\n {%- set tools = custom_tools %}\n{%- endif %}\n{%- if not tools_in_user_message is defined %}\n {%- set tools_in_user_message = true %}\n{%- endif %}\n{%- if not date_string is defined %}\n {%- set date_string = \"26 Jul 2024\" %}\n{%- endif %}\n{%- if not tools is defined %}\n {%- set tools = none %}\n{%- endif %}\n\n{#- This block extracts the system message, so we can slot it into the right place. #}\n{%- if messages[0]['role'] == 'system' %}\n {%- set system_message = messages[0]['content']|trim %}\n {%- set messages = messages[1:] %}\n{%- else %}\n {%- set system_message = \"\" %}\n{%- endif %}\n\n{#- System message + builtin tools #}\n{{- \"<|start_header_id|>system<|end_header_id|>\n\n\" }}\n{%- if builtin_tools is defined or tools is not none %}\n {{- \"Environment: ipython\n\" }}\n{%- endif %}\n{%- if builtin_tools is defined %}\n {{- \"Tools: \" + builtin_tools | reject('equalto', 'code_interpreter') | join(\", \") + \"\n\n\"}}\n{%- endif %}\n{{- \"Cutting Knowledge Date: December 2023\n\" }}\n{{- \"Today Date: \" + date_string + \"\n\n\" }}\n{%- if tools is not none and not tools_in_user_message %}\n {{- \"You have access to the following functions. To call a function, please respond with JSON for a function call.\" }}\n {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n {{- \"Do not use variables.\n\n\" }}\n {%- for t in tools %}\n {{- t | tojson(indent=4) }}\n {{- \"\n\n\" }}\n {%- endfor %}\n{%- endif %}\n{{- system_message }}\n{{- \"<|eot_id|>\" }}\n\n{#- Custom tools are passed in a user message with some extra guidance #}\n{%- if tools_in_user_message and not tools is none %}\n {#- Extract the first user message so we can plug it in here #}\n {%- if messages | length != 0 %}\n {%- set first_user_message = messages[0]['content']|trim %}\n {%- set messages = messages[1:] %}\n {%- else %}\n {{- raise_exception(\"Cannot put tools in the first user message when there's no first user message!\") }}\n{%- endif %}\n {{- '<|start_header_id|>user<|end_header_id|>\n\n' -}}\n {{- \"Given the following functions, please respond with a JSON for a function call \" }}\n {{- \"with its proper arguments that best answers the given prompt.\n\n\" }}\n {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n {{- \"Do not use variables.\n\n\" }}\n {%- for t in tools %}\n {{- t | tojson(indent=4) }}\n {{- \"\n\n\" }}\n {%- endfor %}\n {{- first_user_message + \"<|eot_id|>\"}}\n{%- endif %}\n\n{%- for message in messages %}\n {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %}\n {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' }}\n {%- elif 'tool_calls' in message %}\n {%- if not message.tool_calls|length == 1 %}\n {{- raise_exception(\"This model only supports single tool-calls at once!\") }}\n {%- endif %}\n {%- set tool_call = message.tool_calls[0].function %}\n {%- if builtin_tools is defined and tool_call.name in builtin_tools %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}}\n {{- \"<|python_tag|>\" + tool_call.name + \".call(\" }}\n {%- for arg_name, arg_val in tool_call.arguments | items %}\n {{- arg_name + '=\"' + arg_val + '\"' }}\n {%- if not loop.last %}\n {{- \", \" }}\n {%- endif %}\n {%- endfor %}\n {{- \")\" }}\n {%- else %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}}\n {{- '{\"name\": \"' + tool_call.name + '\", ' }}\n {{- '\"parameters\": ' }}\n {{- tool_call.arguments | tojson }}\n {{- \"}\" }}\n {%- endif %}\n {%- if builtin_tools is defined %}\n {#- This means we're in ipython mode #}\n {{- \"<|eom_id|>\" }}\n {%- else %}\n {{- \"<|eot_id|>\" }}\n {%- endif %}\n {%- elif message.role == \"tool\" or message.role == \"ipython\" %}\n {{- \"<|start_header_id|>ipython<|end_header_id|>\n\n\" }}\n {%- if message.content is mapping or message.content is iterable %}\n {{- message.content | tojson }}\n {%- else %}\n {{- message.content }}\n {%- endif %}\n {{- \"<|eot_id|>\" }}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }}\n{%- endif %}\n",
354
+ "chat_template": "{{- bos_token }}\n{%- if custom_tools is defined %}\n {%- set tools = custom_tools %}\n{%- endif %}\n{%- if not tools_in_user_message is defined %}\n {%- set tools_in_user_message = true %}\n{%- endif %}\n{%- if not date_string is defined %}\n {%- set date_string = \"26 Jul 2024\" %}\n{%- endif %}\n{%- if not tools is defined %}\n {%- set tools = none %}\n{%- endif %}\n\n{#- This block extracts the system message, so we can slot it into the right place. #}\n{%- if messages[0]['role'] == 'system' %}\n {%- set system_message = messages[0]['content']|trim %}\n {%- set messages = messages[1:] %}\n{%- else %}\n {%- set system_message = \"\" %}\n{%- endif %}\n\n{#- System message + builtin tools #}\n{{- \"<|start_header_id|>system<|end_header_id|>\\n\\n\" }}\n{%- if builtin_tools is defined or tools is not none %}\n {{- \"Environment: ipython\\n\" }}\n{%- endif %}\n{%- if builtin_tools is defined %}\n {{- \"Tools: \" + builtin_tools | reject('equalto', 'code_interpreter') | join(\", \") + \"\\n\\n\"}}\n{%- endif %}\n{{- \"Cutting Knowledge Date: December 2023\\n\" }}\n{{- \"Today Date: \" + date_string + \"\\n\\n\" }}\n{%- if tools is not none and not tools_in_user_message %}\n {{- \"You have access to the following functions. To call a function, please respond with JSON for a function call.\" }}\n {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n {{- \"Do not use variables.\\n\\n\" }}\n {%- for t in tools %}\n {{- t | tojson(indent=4) }}\n {{- \"\\n\\n\" }}\n {%- endfor %}\n{%- endif %}\n{{- system_message }}\n{{- \"<|eot_id|>\" }}\n\n{#- Custom tools are passed in a user message with some extra guidance #}\n{%- if tools_in_user_message and not tools is none %}\n {#- Extract the first user message so we can plug it in here #}\n {%- if messages | length != 0 %}\n {%- set first_user_message = messages[0]['content']|trim %}\n {%- set messages = messages[1:] %}\n {%- else %}\n {{- raise_exception(\"Cannot put tools in the first user message when there's no first user message!\") }}\n{%- endif %}\n {{- '<|start_header_id|>user<|end_header_id|>\\n\\n' -}}\n {{- \"Given the following functions, please respond with a JSON for a function call \" }}\n {{- \"with its proper arguments that best answers the given prompt.\\n\\n\" }}\n {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n {{- \"Do not use variables.\\n\\n\" }}\n {%- for t in tools %}\n {{- t | tojson(indent=4) }}\n {{- \"\\n\\n\" }}\n {%- endfor %}\n {{- first_user_message + \"<|eot_id|>\"}}\n{%- endif %}\n\n{%- for message in messages %}\n {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %}\n {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\\n\\n'+ message['content'] | trim + '<|eot_id|>' }}\n {%- elif 'tool_calls' in message %}\n {%- if not message.tool_calls|length == 1 %}\n {{- raise_exception(\"This model only supports single tool-calls at once!\") }}\n {%- endif %}\n {%- set tool_call = message.tool_calls[0].function %}\n {%- if builtin_tools is defined and tool_call.name in builtin_tools %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' -}}\n {{- \"<|python_tag|>\" + tool_call.name + \".call(\" }}\n {%- for arg_name, arg_val in tool_call.arguments | items %}\n {{- arg_name + '=\"' + arg_val + '\"' }}\n {%- if not loop.last %}\n {{- \", \" }}\n {%- endif %}\n {%- endfor %}\n {{- \")\" }}\n {%- else %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' -}}\n {{- '{\"name\": \"' + tool_call.name + '\", ' }}\n {{- '\"parameters\": ' }}\n {{- tool_call.arguments | tojson }}\n {{- \"}\" }}\n {%- endif %}\n {%- if builtin_tools is defined %}\n {#- This means we're in ipython mode #}\n {{- \"<|eom_id|>\" }}\n {%- else %}\n {{- \"<|eot_id|>\" }}\n {%- endif %}\n {%- elif message.role == \"tool\" or message.role == \"ipython\" %}\n {{- \"<|start_header_id|>ipython<|end_header_id|>\\n\\n\" }}\n {%- if message.content is mapping or message.content is iterable %}\n {{- message.content | tojson }}\n {%- else %}\n {{- message.content }}\n {%- endif %}\n {{- \"<|eot_id|>\" }}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' }}\n{%- endif %}\n",
354
355
  "stop_token_ids": [
355
356
  128001,
357
+ 128008,
356
358
  128009
357
359
  ],
358
360
  "stop": [
359
361
  "<|end_of_text|>",
360
- "<|eot_id|>"
362
+ "<|eot_id|>",
363
+ "<|eom_id|>"
361
364
  ]
362
365
  },
363
366
  {
@@ -4334,16 +4337,8 @@
4334
4337
  }
4335
4338
  ],
4336
4339
  "chat_template": "{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n' }}{% endif %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
4337
- "stop_token_ids": [
4338
- 151643,
4339
- 151644,
4340
- 151645
4341
- ],
4342
- "stop": [
4343
- "<|endoftext|>",
4344
- "<|im_start|>",
4345
- "<|im_end|>"
4346
- ]
4340
+ "stop_token_ids": [],
4341
+ "stop": []
4347
4342
  },
4348
4343
  {
4349
4344
  "version": 1,
@@ -4632,14 +4627,15 @@
4632
4627
  "model_hub": "modelscope"
4633
4628
  }
4634
4629
  ],
4635
- "prompt_style": {
4636
- "style_name": "QWEN",
4637
- "system_prompt": "You are a helpful assistant",
4638
- "roles": [
4639
- "user",
4640
- "assistant"
4641
- ]
4642
- }
4630
+ "chat_template": "{% set image_count = namespace(value=0) %}{% set video_count = namespace(value=0) %}{% for message in messages %}{% if loop.first and message['role'] != 'system' %}<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n{% endif %}<|im_start|>{{ message['role'] }}\n{% if message['content'] is string %}{{ message['content'] }}<|im_end|>\n{% else %}{% for content in message['content'] %}{% if content['type'] == 'image' or 'image' in content or 'image_url' in content %}{% set image_count.value = image_count.value + 1 %}{% if add_vision_id %}Picture {{ image_count.value }}: {% endif %}<|vision_start|><|image_pad|><|vision_end|>{% elif content['type'] == 'video' or 'video' in content %}{% set video_count.value = video_count.value + 1 %}{% if add_vision_id %}Video {{ video_count.value }}: {% endif %}<|vision_start|><|video_pad|><|vision_end|>{% elif 'text' in content %}{{ content['text'] }}{% endif %}{% endfor %}<|im_end|>\n{% endif %}{% endfor %}{% if add_generation_prompt %}<|im_start|>assistant\n{% endif %}",
4631
+ "stop_token_ids": [
4632
+ 151645,
4633
+ 151643
4634
+ ],
4635
+ "stop": [
4636
+ "<|im_end|>",
4637
+ "<|endoftext|>"
4638
+ ]
4643
4639
  },
4644
4640
  {
4645
4641
  "version": 1,
@@ -5687,7 +5683,7 @@
5687
5683
  }
5688
5684
  }
5689
5685
  ],
5690
- "chat_template": "{%- if tools %}\n {{- '<|im_start|>system\\n' }}\n {%- if messages[0]['role'] == 'system' %}\n {{- messages[0]['content'] }}\n {%- else %}\n {{- 'You are Qwen, created by Alibaba Cloud. You are a helpful assistant.' }}\n {%- endif %}\n {{- \"\\n\\n# Tools\\n\\nYou may call one or more functions to assist with the user query.\\n\\nYou are provided with function signatures within <tools></tools> XML tags:\\n<tools>\" }}\n {%- for tool in tools %}\n {{- \"\\n\" }}\n {{- tool | tojson }}\n {%- endfor %}\n {{- \"\\n</tools>\\n\\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\\n<tool_call>\\n{{\\\"name\\\": <function-name>, \\\"arguments\\\": <args-json-object>}}\\n</tool_call><|im_end|>\\n\" }}\n{%- else %}\n {%- if messages[0]['role'] == 'system' %}\n {{- '<|im_start|>system\\n' + messages[0]['content'] + '<|im_end|>\\n' }}\n {%- else %}\n {{- '<|im_start|>system\\nYou are Qwen, created by Alibaba Cloud. You are a helpful assistant.<|im_end|>\\n' }}\n {%- endif %}\n{%- endif %}\n{%- for message in messages %}\n {%- if (message.role == \"user\") or (message.role == \"system\" and not loop.first) or (message.role == \"assistant\" and not message.tool_calls) %}\n {{- '<|im_start|>' + message.role + '\\n' + message.content + '<|im_end|>' + '\\n' }}\n {%- elif message.role == \"assistant\" %}\n {{- '<|im_start|>' + message.role }}\n {%- if message.content %}\n {{- '\\n' + message.content }}\n {%- endif %}\n {%- for tool_call in message.tool_calls %}\n {%- if tool_call.function is defined %}\n {%- set tool_call = tool_call.function %}\n {%- endif %}\n {{- '\\n<tool_call>\\n{\"name\": \"' }}\n {{- tool_call.name }}\n {{- '\", \"arguments\": ' }}\n {{- tool_call.arguments | tojson }}\n {{- '}\\n</tool_call>' }}\n {%- endfor %}\n {{- '<|im_end|>\\n' }}\n {%- elif message.role == \"tool\" %}\n {%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != \"tool\") %}\n {{- '<|im_start|>user' }}\n {%- endif %}\n {{- '\\n<tool_response>\\n' }}\n {{- message.content }}\n {{- '\\n</tool_response>' }}\n {%- if loop.last or (messages[loop.index0 + 1].role != \"tool\") %}\n {{- '<|im_end|>\\n' }}\n {%- endif %}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|im_start|>assistant\\n' }}\n{%- endif %}\n",
5686
+ "chat_template": "{%- if tools %}\n {{- '<|im_start|>system\\n' }}\n {%- if messages[0]['role'] == 'system' %}\n {{- messages[0]['content'] }}\n {%- else %}\n {{- 'You are Qwen, created by Alibaba Cloud. You are a helpful assistant.' }}\n {%- endif %}\n {{- \"\\n\\n# Tools\\n\\nYou may call one or more functions to assist with the user query.\\n\\nYou are provided with function signatures within <tools></tools> XML tags:\\n<tools>\" }}\n {%- for tool in tools %}\n {{- \"\\n\" }}\n {{- tool | tojson }}\n {%- endfor %}\n {{- \"\\n</tools>\\n\\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\\n<tool_call>\\n{\\\"name\\\": <function-name>, \\\"arguments\\\": <args-json-object>}\\n</tool_call><|im_end|>\\n\" }}\n{%- else %}\n {%- if messages[0]['role'] == 'system' %}\n {{- '<|im_start|>system\\n' + messages[0]['content'] + '<|im_end|>\\n' }}\n {%- else %}\n {{- '<|im_start|>system\\nYou are Qwen, created by Alibaba Cloud. You are a helpful assistant.<|im_end|>\\n' }}\n {%- endif %}\n{%- endif %}\n{%- for message in messages %}\n {%- if (message.role == \"user\") or (message.role == \"system\" and not loop.first) or (message.role == \"assistant\" and not message.tool_calls) %}\n {{- '<|im_start|>' + message.role + '\\n' + message.content + '<|im_end|>' + '\\n' }}\n {%- elif message.role == \"assistant\" %}\n {{- '<|im_start|>' + message.role }}\n {%- if message.content %}\n {{- '\\n' + message.content }}\n {%- endif %}\n {%- for tool_call in message.tool_calls %}\n {%- if tool_call.function is defined %}\n {%- set tool_call = tool_call.function %}\n {%- endif %}\n {{- '\\n<tool_call>\\n{\"name\": \"' }}\n {{- tool_call.name }}\n {{- '\", \"arguments\": ' }}\n {{- tool_call.arguments | tojson }}\n {{- '}\\n</tool_call>' }}\n {%- endfor %}\n {{- '<|im_end|>\\n' }}\n {%- elif message.role == \"tool\" %}\n {%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != \"tool\") %}\n {{- '<|im_start|>user' }}\n {%- endif %}\n {{- '\\n<tool_response>\\n' }}\n {{- message.content }}\n {{- '\\n</tool_response>' }}\n {%- if loop.last or (messages[loop.index0 + 1].role != \"tool\") %}\n {{- '<|im_end|>\\n' }}\n {%- endif %}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|im_start|>assistant\\n' }}\n{%- endif %}\n",
5691
5687
  "stop_token_ids": [
5692
5688
  151643,
5693
5689
  151644,
@@ -29,7 +29,7 @@ from ..utils import (
29
29
  parse_messages,
30
30
  )
31
31
  from .core import PytorchChatModel, PytorchGenerateConfig
32
- from .utils import get_max_src_len
32
+ from .utils import cache_clean, get_max_src_len
33
33
 
34
34
  logger = logging.getLogger(__name__)
35
35
 
@@ -176,6 +176,7 @@ class CogVLM2Model(PytorchChatModel):
176
176
  query = content
177
177
  return query, image, history
178
178
 
179
+ @cache_clean
179
180
  def chat(
180
181
  self,
181
182
  messages: List[Dict],
@@ -28,6 +28,7 @@ from ..utils import (
28
28
  parse_messages,
29
29
  )
30
30
  from .core import PytorchChatModel, PytorchGenerateConfig
31
+ from .utils import cache_clean
31
32
 
32
33
  logger = logging.getLogger(__name__)
33
34
 
@@ -227,6 +228,7 @@ class CogVLM2VideoModel(PytorchChatModel):
227
228
 
228
229
  return query, image, video, history
229
230
 
231
+ @cache_clean
230
232
  def chat(
231
233
  self,
232
234
  messages: List[Dict],
@@ -40,7 +40,7 @@ from ....types import (
40
40
  from ...utils import select_device
41
41
  from ..core import LLM
42
42
  from ..llm_family import LLMFamilyV1, LLMSpecV1
43
- from ..utils import QWEN_TOOL_CALL_FAMILY, ChatModelMixin
43
+ from ..utils import LLAMA3_TOOL_CALL_FAMILY, QWEN_TOOL_CALL_FAMILY, ChatModelMixin
44
44
  from .utils import get_context_length, get_max_src_len, pad_prefill_tokens
45
45
 
46
46
  logger = logging.getLogger(__name__)
@@ -733,7 +733,11 @@ class PytorchChatModel(PytorchModel, ChatModelMixin):
733
733
  tools = generate_config.pop("tools", []) if generate_config else None
734
734
  model_family = self.model_family.model_family or self.model_family.model_name
735
735
  full_context_kwargs = {}
736
- if tools and model_family in QWEN_TOOL_CALL_FAMILY:
736
+ if (
737
+ tools
738
+ and model_family in QWEN_TOOL_CALL_FAMILY
739
+ or model_family in LLAMA3_TOOL_CALL_FAMILY
740
+ ):
737
741
  full_context_kwargs["tools"] = tools
738
742
  assert self.model_family.chat_template is not None
739
743
  full_prompt = self.get_full_context(
@@ -28,6 +28,7 @@ from ....types import ChatCompletion, ChatCompletionChunk, CompletionChunk
28
28
  from ..llm_family import LLMFamilyV1, LLMSpecV1
29
29
  from ..utils import generate_chat_completion, generate_completion_chunk
30
30
  from .core import PytorchChatModel, PytorchGenerateConfig
31
+ from .utils import cache_clean
31
32
 
32
33
  logger = logging.getLogger(__name__)
33
34
 
@@ -137,6 +138,7 @@ class DeepSeekVLChatModel(PytorchChatModel):
137
138
  return "".join(new_content), images
138
139
  return content, []
139
140
 
141
+ @cache_clean
140
142
  def chat(
141
143
  self,
142
144
  messages: List[Dict],
@@ -26,7 +26,7 @@ from ...utils import select_device
26
26
  from ..llm_family import LLMFamilyV1, LLMSpecV1
27
27
  from ..utils import _decode_image, generate_chat_completion, generate_completion_chunk
28
28
  from .core import PytorchChatModel, PytorchGenerateConfig
29
- from .utils import get_max_src_len
29
+ from .utils import cache_clean, get_max_src_len
30
30
 
31
31
  logger = logging.getLogger(__name__)
32
32
 
@@ -129,6 +129,7 @@ class Glm4VModel(PytorchChatModel):
129
129
  res.append({"role": role, "content": text})
130
130
  return res
131
131
 
132
+ @cache_clean
132
133
  def chat(
133
134
  self,
134
135
  messages: List[Dict],
@@ -27,6 +27,7 @@ from ..utils import (
27
27
  parse_messages,
28
28
  )
29
29
  from .core import PytorchChatModel, PytorchGenerateConfig
30
+ from .utils import cache_clean
30
31
 
31
32
  logger = logging.getLogger(__name__)
32
33
 
@@ -326,6 +327,7 @@ class InternVLChatModel(PytorchChatModel):
326
327
  use_fast=False,
327
328
  )
328
329
 
330
+ @cache_clean
329
331
  def chat(
330
332
  self,
331
333
  messages: List[Dict],
@@ -29,6 +29,7 @@ from ..utils import (
29
29
  parse_messages,
30
30
  )
31
31
  from .core import PytorchChatModel, PytorchGenerateConfig
32
+ from .utils import cache_clean
32
33
 
33
34
  logger = logging.getLogger(__name__)
34
35
 
@@ -119,6 +120,7 @@ class MiniCPMV25Model(PytorchChatModel):
119
120
  raise RuntimeError("Only one image per message is supported")
120
121
  return content, []
121
122
 
123
+ @cache_clean
122
124
  def chat(
123
125
  self,
124
126
  messages: List[Dict],
@@ -30,6 +30,7 @@ from ..utils import (
30
30
  parse_messages,
31
31
  )
32
32
  from .core import PytorchChatModel, PytorchGenerateConfig
33
+ from .utils import cache_clean
33
34
 
34
35
  logger = logging.getLogger(__name__)
35
36
 
@@ -198,6 +199,7 @@ class MiniCPMV26Model(PytorchChatModel):
198
199
  msgs.append({"role": "user", "content": images_chat + [content]})
199
200
  return msgs, video_existed
200
201
 
202
+ @cache_clean
201
203
  def chat(
202
204
  self,
203
205
  messages: List[Dict],
@@ -24,6 +24,7 @@ from ...utils import select_device
24
24
  from ..llm_family import LLMFamilyV1, LLMSpecV1
25
25
  from ..utils import generate_chat_completion, parse_messages
26
26
  from .core import PytorchChatModel, PytorchGenerateConfig
27
+ from .utils import cache_clean
27
28
 
28
29
  logger = logging.getLogger(__name__)
29
30
 
@@ -87,6 +88,7 @@ class OmniLMMModel(PytorchChatModel):
87
88
  return images, other_content
88
89
  return [], [{"type": "text", "text": content}]
89
90
 
91
+ @cache_clean
90
92
  def chat(
91
93
  self,
92
94
  messages: List[Dict],
@@ -14,16 +14,22 @@
14
14
  import logging
15
15
  import uuid
16
16
  from io import BytesIO
17
- from typing import Dict, Iterator, List, Optional, Union
17
+ from typing import Iterator, List, Optional, Union
18
18
  from urllib.request import urlopen
19
19
 
20
20
  import numpy as np
21
21
 
22
22
  from ....model.utils import select_device
23
- from ....types import ChatCompletion, ChatCompletionChunk, CompletionChunk
23
+ from ....types import (
24
+ ChatCompletion,
25
+ ChatCompletionChunk,
26
+ ChatCompletionMessage,
27
+ CompletionChunk,
28
+ )
24
29
  from ..llm_family import LLMFamilyV1, LLMSpecV1
25
30
  from ..utils import generate_chat_completion, generate_completion_chunk
26
31
  from .core import PytorchChatModel, PytorchGenerateConfig
32
+ from .utils import cache_clean
27
33
 
28
34
  logger = logging.getLogger(__name__)
29
35
 
@@ -68,7 +74,7 @@ class Qwen2AudioChatModel(PytorchChatModel):
68
74
 
69
75
  def _transform_messages(
70
76
  self,
71
- messages: List[Dict],
77
+ messages: List[ChatCompletionMessage],
72
78
  ):
73
79
  import librosa
74
80
 
@@ -89,9 +95,10 @@ class Qwen2AudioChatModel(PytorchChatModel):
89
95
 
90
96
  return text, audios
91
97
 
98
+ @cache_clean
92
99
  def chat(
93
100
  self,
94
- messages: List[Dict],
101
+ messages: List[ChatCompletionMessage],
95
102
  generate_config: Optional[PytorchGenerateConfig] = None,
96
103
  ) -> Union[ChatCompletion, Iterator[ChatCompletionChunk]]:
97
104
  text, audios = self._transform_messages(messages)
@@ -27,6 +27,7 @@ from ....types import (
27
27
  from ..llm_family import LLMFamilyV1, LLMSpecV1
28
28
  from ..utils import generate_chat_completion, generate_completion_chunk
29
29
  from .core import PytorchChatModel, PytorchGenerateConfig
30
+ from .utils import cache_clean
30
31
 
31
32
  logger = logging.getLogger(__name__)
32
33
 
@@ -75,34 +76,7 @@ class Qwen2VLChatModel(PytorchChatModel):
75
76
  self.model_path, device_map=device, trust_remote_code=True
76
77
  ).eval()
77
78
 
78
- def _transform_messages(
79
- self,
80
- messages: List[ChatCompletionMessage],
81
- ):
82
- transformed_messages = []
83
- for msg in messages:
84
- new_content = []
85
- role = msg["role"]
86
- content = msg["content"]
87
- if isinstance(content, str):
88
- new_content.append({"type": "text", "text": content})
89
- elif isinstance(content, List):
90
- for item in content: # type: ignore
91
- if "text" in item:
92
- new_content.append({"type": "text", "text": item["text"]})
93
- elif "image_url" in item:
94
- new_content.append(
95
- {"type": "image", "image": item["image_url"]["url"]}
96
- )
97
- elif "video_url" in item:
98
- new_content.append(
99
- {"type": "video", "video": item["video_url"]["url"]}
100
- )
101
- new_message = {"role": role, "content": new_content}
102
- transformed_messages.append(new_message)
103
-
104
- return transformed_messages
105
-
79
+ @cache_clean
106
80
  def chat(
107
81
  self,
108
82
  messages: List[ChatCompletionMessage], # type: ignore
@@ -28,7 +28,7 @@ from ....types import ChatCompletion, ChatCompletionChunk, CompletionChunk
28
28
  from ..llm_family import LLMFamilyV1, LLMSpecV1
29
29
  from ..utils import generate_chat_completion, generate_completion_chunk
30
30
  from .core import PytorchChatModel, PytorchGenerateConfig
31
- from .utils import pad_prefill_tokens
31
+ from .utils import cache_clean, pad_prefill_tokens
32
32
 
33
33
  logger = logging.getLogger(__name__)
34
34
 
@@ -137,6 +137,7 @@ class QwenVLChatModel(PytorchChatModel):
137
137
  prompt = self._message_content_to_qwen(messages[-1]["content"])
138
138
  return prompt, qwen_history
139
139
 
140
+ @cache_clean
140
141
  def chat(
141
142
  self,
142
143
  messages: List[Dict],
@@ -11,7 +11,8 @@
11
11
  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
-
14
+ import asyncio
15
+ import functools
15
16
  import gc
16
17
  import logging
17
18
  import os
@@ -126,7 +127,8 @@ def generate_stream(
126
127
  echo = bool(generate_config.get("echo", False))
127
128
  stop_str = generate_config.get("stop", None)
128
129
  stop_token_ids = generate_config.get("stop_token_ids", None) or []
129
- stop_token_ids.append(tokenizer.eos_token_id)
130
+ if tokenizer.eos_token_id not in stop_token_ids:
131
+ stop_token_ids.append(tokenizer.eos_token_id)
130
132
  chunk_id = str(uuid.uuid4())
131
133
 
132
134
  logits_processor = prepare_logits_processor(
@@ -776,3 +778,34 @@ def batch_inference_one_step(
776
778
  for r in req_list:
777
779
  r.stopped = True
778
780
  r.error_msg = str(e)
781
+
782
+
783
+ def cache_clean(fn):
784
+ @functools.wraps(fn)
785
+ async def _async_wrapper(self, *args, **kwargs):
786
+ import gc
787
+
788
+ from ....device_utils import empty_cache
789
+
790
+ result = await fn(self, *args, **kwargs)
791
+
792
+ gc.collect()
793
+ empty_cache()
794
+ return result
795
+
796
+ @functools.wraps(fn)
797
+ def _wrapper(self, *args, **kwargs):
798
+ import gc
799
+
800
+ from ....device_utils import empty_cache
801
+
802
+ result = fn(self, *args, **kwargs)
803
+
804
+ gc.collect()
805
+ empty_cache()
806
+ return result
807
+
808
+ if asyncio.iscoroutinefunction(fn):
809
+ return _async_wrapper
810
+ else:
811
+ return _wrapper