xinference 0.15.3__py3-none-any.whl → 0.15.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of xinference might be problematic. Click here for more details.

Files changed (43) hide show
  1. xinference/_version.py +3 -3
  2. xinference/api/restful_api.py +29 -2
  3. xinference/client/restful/restful_client.py +10 -0
  4. xinference/constants.py +4 -0
  5. xinference/core/image_interface.py +76 -23
  6. xinference/core/model.py +80 -39
  7. xinference/core/progress_tracker.py +187 -0
  8. xinference/core/supervisor.py +11 -0
  9. xinference/core/worker.py +1 -0
  10. xinference/model/audio/chattts.py +2 -1
  11. xinference/model/audio/core.py +0 -2
  12. xinference/model/audio/model_spec.json +8 -0
  13. xinference/model/audio/model_spec_modelscope.json +9 -0
  14. xinference/model/image/core.py +6 -7
  15. xinference/model/image/sdapi.py +35 -4
  16. xinference/model/image/stable_diffusion/core.py +208 -78
  17. xinference/model/llm/llm_family.json +16 -16
  18. xinference/model/llm/llm_family_modelscope.json +16 -12
  19. xinference/model/llm/transformers/cogvlm2.py +2 -1
  20. xinference/model/llm/transformers/cogvlm2_video.py +2 -0
  21. xinference/model/llm/transformers/core.py +6 -2
  22. xinference/model/llm/transformers/deepseek_vl.py +2 -0
  23. xinference/model/llm/transformers/glm4v.py +2 -1
  24. xinference/model/llm/transformers/intern_vl.py +2 -0
  25. xinference/model/llm/transformers/minicpmv25.py +2 -0
  26. xinference/model/llm/transformers/minicpmv26.py +2 -0
  27. xinference/model/llm/transformers/omnilmm.py +2 -0
  28. xinference/model/llm/transformers/qwen2_audio.py +11 -4
  29. xinference/model/llm/transformers/qwen2_vl.py +2 -28
  30. xinference/model/llm/transformers/qwen_vl.py +2 -1
  31. xinference/model/llm/transformers/utils.py +35 -2
  32. xinference/model/llm/transformers/yi_vl.py +2 -0
  33. xinference/model/llm/utils.py +58 -14
  34. xinference/model/llm/vllm/core.py +52 -8
  35. xinference/model/llm/vllm/utils.py +0 -1
  36. xinference/model/utils.py +7 -4
  37. xinference/model/video/core.py +0 -2
  38. {xinference-0.15.3.dist-info → xinference-0.15.4.dist-info}/METADATA +3 -3
  39. {xinference-0.15.3.dist-info → xinference-0.15.4.dist-info}/RECORD +43 -42
  40. {xinference-0.15.3.dist-info → xinference-0.15.4.dist-info}/LICENSE +0 -0
  41. {xinference-0.15.3.dist-info → xinference-0.15.4.dist-info}/WHEEL +0 -0
  42. {xinference-0.15.3.dist-info → xinference-0.15.4.dist-info}/entry_points.txt +0 -0
  43. {xinference-0.15.3.dist-info → xinference-0.15.4.dist-info}/top_level.txt +0 -0
@@ -1111,7 +1111,8 @@
1111
1111
  "th"
1112
1112
  ],
1113
1113
  "model_ability": [
1114
- "chat"
1114
+ "chat",
1115
+ "tools"
1115
1116
  ],
1116
1117
  "model_description": "The Llama 3.1 instruction tuned models are optimized for dialogue use cases and outperform many of the available open source chat models on common industry benchmarks..",
1117
1118
  "model_specs": [
@@ -1299,14 +1300,16 @@
1299
1300
  "model_id": "hugging-quants/Meta-Llama-3.1-405B-Instruct-AWQ-INT4"
1300
1301
  }
1301
1302
  ],
1302
- "chat_template": "{{- '<|begin_of_text|>' }}\n{%- if custom_tools is defined %}\n {%- set tools = custom_tools %}\n{%- endif %}\n{%- if not tools_in_user_message is defined %}\n {%- set tools_in_user_message = true %}\n{%- endif %}\n{%- if not date_string is defined %}\n {%- set date_string = \"26 Jul 2024\" %}\n{%- endif %}\n{%- if not tools is defined %}\n {%- set tools = none %}\n{%- endif %}\n\n{#- This block extracts the system message, so we can slot it into the right place. #}\n{%- if messages[0]['role'] == 'system' %}\n {%- set system_message = messages[0]['content']|trim %}\n {%- set messages = messages[1:] %}\n{%- else %}\n {%- set system_message = \"\" %}\n{%- endif %}\n\n{#- System message + builtin tools #}\n{{- \"<|start_header_id|>system<|end_header_id|>\n\n\" }}\n{%- if builtin_tools is defined or tools is not none %}\n {{- \"Environment: ipython\n\" }}\n{%- endif %}\n{%- if builtin_tools is defined %}\n {{- \"Tools: \" + builtin_tools | reject('equalto', 'code_interpreter') | join(\", \") + \"\n\n\"}}\n{%- endif %}\n{{- \"Cutting Knowledge Date: December 2023\n\" }}\n{{- \"Today Date: \" + date_string + \"\n\n\" }}\n{%- if tools is not none and not tools_in_user_message %}\n {{- \"You have access to the following functions. To call a function, please respond with JSON for a function call.\" }}\n {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n {{- \"Do not use variables.\n\n\" }}\n {%- for t in tools %}\n {{- t | tojson(indent=4) }}\n {{- \"\n\n\" }}\n {%- endfor %}\n{%- endif %}\n{{- system_message }}\n{{- \"<|eot_id|>\" }}\n\n{#- Custom tools are passed in a user message with some extra guidance #}\n{%- if tools_in_user_message and not tools is none %}\n {#- Extract the first user message so we can plug it in here #}\n {%- if messages | length != 0 %}\n {%- set first_user_message = messages[0]['content']|trim %}\n {%- set messages = messages[1:] %}\n {%- else %}\n {{- raise_exception(\"Cannot put tools in the first user message when there's no first user message!\") }}\n{%- endif %}\n {{- '<|start_header_id|>user<|end_header_id|>\n\n' -}}\n {{- \"Given the following functions, please respond with a JSON for a function call \" }}\n {{- \"with its proper arguments that best answers the given prompt.\n\n\" }}\n {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n {{- \"Do not use variables.\n\n\" }}\n {%- for t in tools %}\n {{- t | tojson(indent=4) }}\n {{- \"\n\n\" }}\n {%- endfor %}\n {{- first_user_message + \"<|eot_id|>\"}}\n{%- endif %}\n\n{%- for message in messages %}\n {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %}\n {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' }}\n {%- elif 'tool_calls' in message %}\n {%- if not message.tool_calls|length == 1 %}\n {{- raise_exception(\"This model only supports single tool-calls at once!\") }}\n {%- endif %}\n {%- set tool_call = message.tool_calls[0].function %}\n {%- if builtin_tools is defined and tool_call.name in builtin_tools %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}}\n {{- \"<|python_tag|>\" + tool_call.name + \".call(\" }}\n {%- for arg_name, arg_val in tool_call.arguments | items %}\n {{- arg_name + '=\"' + arg_val + '\"' }}\n {%- if not loop.last %}\n {{- \", \" }}\n {%- endif %}\n {%- endfor %}\n {{- \")\" }}\n {%- else %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}}\n {{- '{\"name\": \"' + tool_call.name + '\", ' }}\n {{- '\"parameters\": ' }}\n {{- tool_call.arguments | tojson }}\n {{- \"}\" }}\n {%- endif %}\n {%- if builtin_tools is defined %}\n {#- This means we're in ipython mode #}\n {{- \"<|eom_id|>\" }}\n {%- else %}\n {{- \"<|eot_id|>\" }}\n {%- endif %}\n {%- elif message.role == \"tool\" or message.role == \"ipython\" %}\n {{- \"<|start_header_id|>ipython<|end_header_id|>\n\n\" }}\n {%- if message.content is mapping or message.content is iterable %}\n {{- message.content | tojson }}\n {%- else %}\n {{- message.content }}\n {%- endif %}\n {{- \"<|eot_id|>\" }}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }}\n{%- endif %}\n",
1303
+ "chat_template": "{{- bos_token }}\n{%- if custom_tools is defined %}\n {%- set tools = custom_tools %}\n{%- endif %}\n{%- if not tools_in_user_message is defined %}\n {%- set tools_in_user_message = true %}\n{%- endif %}\n{%- if not date_string is defined %}\n {%- set date_string = \"26 Jul 2024\" %}\n{%- endif %}\n{%- if not tools is defined %}\n {%- set tools = none %}\n{%- endif %}\n\n{#- This block extracts the system message, so we can slot it into the right place. #}\n{%- if messages[0]['role'] == 'system' %}\n {%- set system_message = messages[0]['content']|trim %}\n {%- set messages = messages[1:] %}\n{%- else %}\n {%- set system_message = \"\" %}\n{%- endif %}\n\n{#- System message + builtin tools #}\n{{- \"<|start_header_id|>system<|end_header_id|>\\n\\n\" }}\n{%- if builtin_tools is defined or tools is not none %}\n {{- \"Environment: ipython\\n\" }}\n{%- endif %}\n{%- if builtin_tools is defined %}\n {{- \"Tools: \" + builtin_tools | reject('equalto', 'code_interpreter') | join(\", \") + \"\\n\\n\"}}\n{%- endif %}\n{{- \"Cutting Knowledge Date: December 2023\\n\" }}\n{{- \"Today Date: \" + date_string + \"\\n\\n\" }}\n{%- if tools is not none and not tools_in_user_message %}\n {{- \"You have access to the following functions. To call a function, please respond with JSON for a function call.\" }}\n {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n {{- \"Do not use variables.\\n\\n\" }}\n {%- for t in tools %}\n {{- t | tojson(indent=4) }}\n {{- \"\\n\\n\" }}\n {%- endfor %}\n{%- endif %}\n{{- system_message }}\n{{- \"<|eot_id|>\" }}\n\n{#- Custom tools are passed in a user message with some extra guidance #}\n{%- if tools_in_user_message and not tools is none %}\n {#- Extract the first user message so we can plug it in here #}\n {%- if messages | length != 0 %}\n {%- set first_user_message = messages[0]['content']|trim %}\n {%- set messages = messages[1:] %}\n {%- else %}\n {{- raise_exception(\"Cannot put tools in the first user message when there's no first user message!\") }}\n{%- endif %}\n {{- '<|start_header_id|>user<|end_header_id|>\\n\\n' -}}\n {{- \"Given the following functions, please respond with a JSON for a function call \" }}\n {{- \"with its proper arguments that best answers the given prompt.\\n\\n\" }}\n {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n {{- \"Do not use variables.\\n\\n\" }}\n {%- for t in tools %}\n {{- t | tojson(indent=4) }}\n {{- \"\\n\\n\" }}\n {%- endfor %}\n {{- first_user_message + \"<|eot_id|>\"}}\n{%- endif %}\n\n{%- for message in messages %}\n {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %}\n {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\\n\\n'+ message['content'] | trim + '<|eot_id|>' }}\n {%- elif 'tool_calls' in message %}\n {%- if not message.tool_calls|length == 1 %}\n {{- raise_exception(\"This model only supports single tool-calls at once!\") }}\n {%- endif %}\n {%- set tool_call = message.tool_calls[0].function %}\n {%- if builtin_tools is defined and tool_call.name in builtin_tools %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' -}}\n {{- \"<|python_tag|>\" + tool_call.name + \".call(\" }}\n {%- for arg_name, arg_val in tool_call.arguments | items %}\n {{- arg_name + '=\"' + arg_val + '\"' }}\n {%- if not loop.last %}\n {{- \", \" }}\n {%- endif %}\n {%- endfor %}\n {{- \")\" }}\n {%- else %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' -}}\n {{- '{\"name\": \"' + tool_call.name + '\", ' }}\n {{- '\"parameters\": ' }}\n {{- tool_call.arguments | tojson }}\n {{- \"}\" }}\n {%- endif %}\n {%- if builtin_tools is defined %}\n {#- This means we're in ipython mode #}\n {{- \"<|eom_id|>\" }}\n {%- else %}\n {{- \"<|eot_id|>\" }}\n {%- endif %}\n {%- elif message.role == \"tool\" or message.role == \"ipython\" %}\n {{- \"<|start_header_id|>ipython<|end_header_id|>\\n\\n\" }}\n {%- if message.content is mapping or message.content is iterable %}\n {{- message.content | tojson }}\n {%- else %}\n {{- message.content }}\n {%- endif %}\n {{- \"<|eot_id|>\" }}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' }}\n{%- endif %}\n",
1303
1304
  "stop_token_ids": [
1304
1305
  128001,
1306
+ 128008,
1305
1307
  128009
1306
1308
  ],
1307
1309
  "stop": [
1308
1310
  "<|end_of_text|>",
1309
- "<|eot_id|>"
1311
+ "<|eot_id|>",
1312
+ "<|eom_id|>"
1310
1313
  ]
1311
1314
  },
1312
1315
  {
@@ -6906,18 +6909,15 @@
6906
6909
  "model_id":"Qwen/Qwen2-VL-72B-Instruct-GPTQ-{quantization}"
6907
6910
  }
6908
6911
  ],
6909
- "prompt_style":{
6910
- "style_name":"QWEN",
6911
- "system_prompt":"You are a helpful assistant",
6912
- "roles":[
6913
- "user",
6914
- "assistant"
6915
- ],
6916
- "stop": [
6917
- "<|im_end|>",
6918
- "<|endoftext|>"
6919
- ]
6920
- }
6912
+ "chat_template": "{% set image_count = namespace(value=0) %}{% set video_count = namespace(value=0) %}{% for message in messages %}{% if loop.first and message['role'] != 'system' %}<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n{% endif %}<|im_start|>{{ message['role'] }}\n{% if message['content'] is string %}{{ message['content'] }}<|im_end|>\n{% else %}{% for content in message['content'] %}{% if content['type'] == 'image' or 'image' in content or 'image_url' in content %}{% set image_count.value = image_count.value + 1 %}{% if add_vision_id %}Picture {{ image_count.value }}: {% endif %}<|vision_start|><|image_pad|><|vision_end|>{% elif content['type'] == 'video' or 'video' in content %}{% set video_count.value = video_count.value + 1 %}{% if add_vision_id %}Video {{ video_count.value }}: {% endif %}<|vision_start|><|video_pad|><|vision_end|>{% elif 'text' in content %}{{ content['text'] }}{% endif %}{% endfor %}<|im_end|>\n{% endif %}{% endfor %}{% if add_generation_prompt %}<|im_start|>assistant\n{% endif %}",
6913
+ "stop_token_ids": [
6914
+ 151645,
6915
+ 151643
6916
+ ],
6917
+ "stop": [
6918
+ "<|im_end|>",
6919
+ "<|endoftext|>"
6920
+ ]
6921
6921
  },
6922
6922
  {
6923
6923
  "version": 1,
@@ -7925,7 +7925,7 @@
7925
7925
  }
7926
7926
  }
7927
7927
  ],
7928
- "chat_template": "{%- if tools %}\n {{- '<|im_start|>system\\n' }}\n {%- if messages[0]['role'] == 'system' %}\n {{- messages[0]['content'] }}\n {%- else %}\n {{- 'You are Qwen, created by Alibaba Cloud. You are a helpful assistant.' }}\n {%- endif %}\n {{- \"\\n\\n# Tools\\n\\nYou may call one or more functions to assist with the user query.\\n\\nYou are provided with function signatures within <tools></tools> XML tags:\\n<tools>\" }}\n {%- for tool in tools %}\n {{- \"\\n\" }}\n {{- tool | tojson }}\n {%- endfor %}\n {{- \"\\n</tools>\\n\\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\\n<tool_call>\\n{{\\\"name\\\": <function-name>, \\\"arguments\\\": <args-json-object>}}\\n</tool_call><|im_end|>\\n\" }}\n{%- else %}\n {%- if messages[0]['role'] == 'system' %}\n {{- '<|im_start|>system\\n' + messages[0]['content'] + '<|im_end|>\\n' }}\n {%- else %}\n {{- '<|im_start|>system\\nYou are Qwen, created by Alibaba Cloud. You are a helpful assistant.<|im_end|>\\n' }}\n {%- endif %}\n{%- endif %}\n{%- for message in messages %}\n {%- if (message.role == \"user\") or (message.role == \"system\" and not loop.first) or (message.role == \"assistant\" and not message.tool_calls) %}\n {{- '<|im_start|>' + message.role + '\\n' + message.content + '<|im_end|>' + '\\n' }}\n {%- elif message.role == \"assistant\" %}\n {{- '<|im_start|>' + message.role }}\n {%- if message.content %}\n {{- '\\n' + message.content }}\n {%- endif %}\n {%- for tool_call in message.tool_calls %}\n {%- if tool_call.function is defined %}\n {%- set tool_call = tool_call.function %}\n {%- endif %}\n {{- '\\n<tool_call>\\n{\"name\": \"' }}\n {{- tool_call.name }}\n {{- '\", \"arguments\": ' }}\n {{- tool_call.arguments | tojson }}\n {{- '}\\n</tool_call>' }}\n {%- endfor %}\n {{- '<|im_end|>\\n' }}\n {%- elif message.role == \"tool\" %}\n {%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != \"tool\") %}\n {{- '<|im_start|>user' }}\n {%- endif %}\n {{- '\\n<tool_response>\\n' }}\n {{- message.content }}\n {{- '\\n</tool_response>' }}\n {%- if loop.last or (messages[loop.index0 + 1].role != \"tool\") %}\n {{- '<|im_end|>\\n' }}\n {%- endif %}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|im_start|>assistant\\n' }}\n{%- endif %}\n",
7928
+ "chat_template": "{%- if tools %}\n {{- '<|im_start|>system\\n' }}\n {%- if messages[0]['role'] == 'system' %}\n {{- messages[0]['content'] }}\n {%- else %}\n {{- 'You are Qwen, created by Alibaba Cloud. You are a helpful assistant.' }}\n {%- endif %}\n {{- \"\\n\\n# Tools\\n\\nYou may call one or more functions to assist with the user query.\\n\\nYou are provided with function signatures within <tools></tools> XML tags:\\n<tools>\" }}\n {%- for tool in tools %}\n {{- \"\\n\" }}\n {{- tool | tojson }}\n {%- endfor %}\n {{- \"\\n</tools>\\n\\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\\n<tool_call>\\n{\\\"name\\\": <function-name>, \\\"arguments\\\": <args-json-object>}\\n</tool_call><|im_end|>\\n\" }}\n{%- else %}\n {%- if messages[0]['role'] == 'system' %}\n {{- '<|im_start|>system\\n' + messages[0]['content'] + '<|im_end|>\\n' }}\n {%- else %}\n {{- '<|im_start|>system\\nYou are Qwen, created by Alibaba Cloud. You are a helpful assistant.<|im_end|>\\n' }}\n {%- endif %}\n{%- endif %}\n{%- for message in messages %}\n {%- if (message.role == \"user\") or (message.role == \"system\" and not loop.first) or (message.role == \"assistant\" and not message.tool_calls) %}\n {{- '<|im_start|>' + message.role + '\\n' + message.content + '<|im_end|>' + '\\n' }}\n {%- elif message.role == \"assistant\" %}\n {{- '<|im_start|>' + message.role }}\n {%- if message.content %}\n {{- '\\n' + message.content }}\n {%- endif %}\n {%- for tool_call in message.tool_calls %}\n {%- if tool_call.function is defined %}\n {%- set tool_call = tool_call.function %}\n {%- endif %}\n {{- '\\n<tool_call>\\n{\"name\": \"' }}\n {{- tool_call.name }}\n {{- '\", \"arguments\": ' }}\n {{- tool_call.arguments | tojson }}\n {{- '}\\n</tool_call>' }}\n {%- endfor %}\n {{- '<|im_end|>\\n' }}\n {%- elif message.role == \"tool\" %}\n {%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != \"tool\") %}\n {{- '<|im_start|>user' }}\n {%- endif %}\n {{- '\\n<tool_response>\\n' }}\n {{- message.content }}\n {{- '\\n</tool_response>' }}\n {%- if loop.last or (messages[loop.index0 + 1].role != \"tool\") %}\n {{- '<|im_end|>\\n' }}\n {%- endif %}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|im_start|>assistant\\n' }}\n{%- endif %}\n",
7929
7929
  "stop_token_ids": [
7930
7930
  151643,
7931
7931
  151644,
@@ -246,7 +246,8 @@
246
246
  "th"
247
247
  ],
248
248
  "model_ability": [
249
- "chat"
249
+ "chat",
250
+ "tools"
250
251
  ],
251
252
  "model_description": "The Llama 3.1 instruction tuned models are optimized for dialogue use cases and outperform many of the available open source chat models on common industry benchmarks..",
252
253
  "model_specs": [
@@ -350,14 +351,16 @@
350
351
  "model_hub": "modelscope"
351
352
  }
352
353
  ],
353
- "chat_template": "{{- '<|begin_of_text|>' }}\n{%- if custom_tools is defined %}\n {%- set tools = custom_tools %}\n{%- endif %}\n{%- if not tools_in_user_message is defined %}\n {%- set tools_in_user_message = true %}\n{%- endif %}\n{%- if not date_string is defined %}\n {%- set date_string = \"26 Jul 2024\" %}\n{%- endif %}\n{%- if not tools is defined %}\n {%- set tools = none %}\n{%- endif %}\n\n{#- This block extracts the system message, so we can slot it into the right place. #}\n{%- if messages[0]['role'] == 'system' %}\n {%- set system_message = messages[0]['content']|trim %}\n {%- set messages = messages[1:] %}\n{%- else %}\n {%- set system_message = \"\" %}\n{%- endif %}\n\n{#- System message + builtin tools #}\n{{- \"<|start_header_id|>system<|end_header_id|>\n\n\" }}\n{%- if builtin_tools is defined or tools is not none %}\n {{- \"Environment: ipython\n\" }}\n{%- endif %}\n{%- if builtin_tools is defined %}\n {{- \"Tools: \" + builtin_tools | reject('equalto', 'code_interpreter') | join(\", \") + \"\n\n\"}}\n{%- endif %}\n{{- \"Cutting Knowledge Date: December 2023\n\" }}\n{{- \"Today Date: \" + date_string + \"\n\n\" }}\n{%- if tools is not none and not tools_in_user_message %}\n {{- \"You have access to the following functions. To call a function, please respond with JSON for a function call.\" }}\n {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n {{- \"Do not use variables.\n\n\" }}\n {%- for t in tools %}\n {{- t | tojson(indent=4) }}\n {{- \"\n\n\" }}\n {%- endfor %}\n{%- endif %}\n{{- system_message }}\n{{- \"<|eot_id|>\" }}\n\n{#- Custom tools are passed in a user message with some extra guidance #}\n{%- if tools_in_user_message and not tools is none %}\n {#- Extract the first user message so we can plug it in here #}\n {%- if messages | length != 0 %}\n {%- set first_user_message = messages[0]['content']|trim %}\n {%- set messages = messages[1:] %}\n {%- else %}\n {{- raise_exception(\"Cannot put tools in the first user message when there's no first user message!\") }}\n{%- endif %}\n {{- '<|start_header_id|>user<|end_header_id|>\n\n' -}}\n {{- \"Given the following functions, please respond with a JSON for a function call \" }}\n {{- \"with its proper arguments that best answers the given prompt.\n\n\" }}\n {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n {{- \"Do not use variables.\n\n\" }}\n {%- for t in tools %}\n {{- t | tojson(indent=4) }}\n {{- \"\n\n\" }}\n {%- endfor %}\n {{- first_user_message + \"<|eot_id|>\"}}\n{%- endif %}\n\n{%- for message in messages %}\n {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %}\n {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' }}\n {%- elif 'tool_calls' in message %}\n {%- if not message.tool_calls|length == 1 %}\n {{- raise_exception(\"This model only supports single tool-calls at once!\") }}\n {%- endif %}\n {%- set tool_call = message.tool_calls[0].function %}\n {%- if builtin_tools is defined and tool_call.name in builtin_tools %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}}\n {{- \"<|python_tag|>\" + tool_call.name + \".call(\" }}\n {%- for arg_name, arg_val in tool_call.arguments | items %}\n {{- arg_name + '=\"' + arg_val + '\"' }}\n {%- if not loop.last %}\n {{- \", \" }}\n {%- endif %}\n {%- endfor %}\n {{- \")\" }}\n {%- else %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}}\n {{- '{\"name\": \"' + tool_call.name + '\", ' }}\n {{- '\"parameters\": ' }}\n {{- tool_call.arguments | tojson }}\n {{- \"}\" }}\n {%- endif %}\n {%- if builtin_tools is defined %}\n {#- This means we're in ipython mode #}\n {{- \"<|eom_id|>\" }}\n {%- else %}\n {{- \"<|eot_id|>\" }}\n {%- endif %}\n {%- elif message.role == \"tool\" or message.role == \"ipython\" %}\n {{- \"<|start_header_id|>ipython<|end_header_id|>\n\n\" }}\n {%- if message.content is mapping or message.content is iterable %}\n {{- message.content | tojson }}\n {%- else %}\n {{- message.content }}\n {%- endif %}\n {{- \"<|eot_id|>\" }}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }}\n{%- endif %}\n",
354
+ "chat_template": "{{- bos_token }}\n{%- if custom_tools is defined %}\n {%- set tools = custom_tools %}\n{%- endif %}\n{%- if not tools_in_user_message is defined %}\n {%- set tools_in_user_message = true %}\n{%- endif %}\n{%- if not date_string is defined %}\n {%- set date_string = \"26 Jul 2024\" %}\n{%- endif %}\n{%- if not tools is defined %}\n {%- set tools = none %}\n{%- endif %}\n\n{#- This block extracts the system message, so we can slot it into the right place. #}\n{%- if messages[0]['role'] == 'system' %}\n {%- set system_message = messages[0]['content']|trim %}\n {%- set messages = messages[1:] %}\n{%- else %}\n {%- set system_message = \"\" %}\n{%- endif %}\n\n{#- System message + builtin tools #}\n{{- \"<|start_header_id|>system<|end_header_id|>\\n\\n\" }}\n{%- if builtin_tools is defined or tools is not none %}\n {{- \"Environment: ipython\\n\" }}\n{%- endif %}\n{%- if builtin_tools is defined %}\n {{- \"Tools: \" + builtin_tools | reject('equalto', 'code_interpreter') | join(\", \") + \"\\n\\n\"}}\n{%- endif %}\n{{- \"Cutting Knowledge Date: December 2023\\n\" }}\n{{- \"Today Date: \" + date_string + \"\\n\\n\" }}\n{%- if tools is not none and not tools_in_user_message %}\n {{- \"You have access to the following functions. To call a function, please respond with JSON for a function call.\" }}\n {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n {{- \"Do not use variables.\\n\\n\" }}\n {%- for t in tools %}\n {{- t | tojson(indent=4) }}\n {{- \"\\n\\n\" }}\n {%- endfor %}\n{%- endif %}\n{{- system_message }}\n{{- \"<|eot_id|>\" }}\n\n{#- Custom tools are passed in a user message with some extra guidance #}\n{%- if tools_in_user_message and not tools is none %}\n {#- Extract the first user message so we can plug it in here #}\n {%- if messages | length != 0 %}\n {%- set first_user_message = messages[0]['content']|trim %}\n {%- set messages = messages[1:] %}\n {%- else %}\n {{- raise_exception(\"Cannot put tools in the first user message when there's no first user message!\") }}\n{%- endif %}\n {{- '<|start_header_id|>user<|end_header_id|>\\n\\n' -}}\n {{- \"Given the following functions, please respond with a JSON for a function call \" }}\n {{- \"with its proper arguments that best answers the given prompt.\\n\\n\" }}\n {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n {{- \"Do not use variables.\\n\\n\" }}\n {%- for t in tools %}\n {{- t | tojson(indent=4) }}\n {{- \"\\n\\n\" }}\n {%- endfor %}\n {{- first_user_message + \"<|eot_id|>\"}}\n{%- endif %}\n\n{%- for message in messages %}\n {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %}\n {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\\n\\n'+ message['content'] | trim + '<|eot_id|>' }}\n {%- elif 'tool_calls' in message %}\n {%- if not message.tool_calls|length == 1 %}\n {{- raise_exception(\"This model only supports single tool-calls at once!\") }}\n {%- endif %}\n {%- set tool_call = message.tool_calls[0].function %}\n {%- if builtin_tools is defined and tool_call.name in builtin_tools %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' -}}\n {{- \"<|python_tag|>\" + tool_call.name + \".call(\" }}\n {%- for arg_name, arg_val in tool_call.arguments | items %}\n {{- arg_name + '=\"' + arg_val + '\"' }}\n {%- if not loop.last %}\n {{- \", \" }}\n {%- endif %}\n {%- endfor %}\n {{- \")\" }}\n {%- else %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' -}}\n {{- '{\"name\": \"' + tool_call.name + '\", ' }}\n {{- '\"parameters\": ' }}\n {{- tool_call.arguments | tojson }}\n {{- \"}\" }}\n {%- endif %}\n {%- if builtin_tools is defined %}\n {#- This means we're in ipython mode #}\n {{- \"<|eom_id|>\" }}\n {%- else %}\n {{- \"<|eot_id|>\" }}\n {%- endif %}\n {%- elif message.role == \"tool\" or message.role == \"ipython\" %}\n {{- \"<|start_header_id|>ipython<|end_header_id|>\\n\\n\" }}\n {%- if message.content is mapping or message.content is iterable %}\n {{- message.content | tojson }}\n {%- else %}\n {{- message.content }}\n {%- endif %}\n {{- \"<|eot_id|>\" }}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' }}\n{%- endif %}\n",
354
355
  "stop_token_ids": [
355
356
  128001,
357
+ 128008,
356
358
  128009
357
359
  ],
358
360
  "stop": [
359
361
  "<|end_of_text|>",
360
- "<|eot_id|>"
362
+ "<|eot_id|>",
363
+ "<|eom_id|>"
361
364
  ]
362
365
  },
363
366
  {
@@ -4624,14 +4627,15 @@
4624
4627
  "model_hub": "modelscope"
4625
4628
  }
4626
4629
  ],
4627
- "prompt_style": {
4628
- "style_name": "QWEN",
4629
- "system_prompt": "You are a helpful assistant",
4630
- "roles": [
4631
- "user",
4632
- "assistant"
4633
- ]
4634
- }
4630
+ "chat_template": "{% set image_count = namespace(value=0) %}{% set video_count = namespace(value=0) %}{% for message in messages %}{% if loop.first and message['role'] != 'system' %}<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n{% endif %}<|im_start|>{{ message['role'] }}\n{% if message['content'] is string %}{{ message['content'] }}<|im_end|>\n{% else %}{% for content in message['content'] %}{% if content['type'] == 'image' or 'image' in content or 'image_url' in content %}{% set image_count.value = image_count.value + 1 %}{% if add_vision_id %}Picture {{ image_count.value }}: {% endif %}<|vision_start|><|image_pad|><|vision_end|>{% elif content['type'] == 'video' or 'video' in content %}{% set video_count.value = video_count.value + 1 %}{% if add_vision_id %}Video {{ video_count.value }}: {% endif %}<|vision_start|><|video_pad|><|vision_end|>{% elif 'text' in content %}{{ content['text'] }}{% endif %}{% endfor %}<|im_end|>\n{% endif %}{% endfor %}{% if add_generation_prompt %}<|im_start|>assistant\n{% endif %}",
4631
+ "stop_token_ids": [
4632
+ 151645,
4633
+ 151643
4634
+ ],
4635
+ "stop": [
4636
+ "<|im_end|>",
4637
+ "<|endoftext|>"
4638
+ ]
4635
4639
  },
4636
4640
  {
4637
4641
  "version": 1,
@@ -5679,7 +5683,7 @@
5679
5683
  }
5680
5684
  }
5681
5685
  ],
5682
- "chat_template": "{%- if tools %}\n {{- '<|im_start|>system\\n' }}\n {%- if messages[0]['role'] == 'system' %}\n {{- messages[0]['content'] }}\n {%- else %}\n {{- 'You are Qwen, created by Alibaba Cloud. You are a helpful assistant.' }}\n {%- endif %}\n {{- \"\\n\\n# Tools\\n\\nYou may call one or more functions to assist with the user query.\\n\\nYou are provided with function signatures within <tools></tools> XML tags:\\n<tools>\" }}\n {%- for tool in tools %}\n {{- \"\\n\" }}\n {{- tool | tojson }}\n {%- endfor %}\n {{- \"\\n</tools>\\n\\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\\n<tool_call>\\n{{\\\"name\\\": <function-name>, \\\"arguments\\\": <args-json-object>}}\\n</tool_call><|im_end|>\\n\" }}\n{%- else %}\n {%- if messages[0]['role'] == 'system' %}\n {{- '<|im_start|>system\\n' + messages[0]['content'] + '<|im_end|>\\n' }}\n {%- else %}\n {{- '<|im_start|>system\\nYou are Qwen, created by Alibaba Cloud. You are a helpful assistant.<|im_end|>\\n' }}\n {%- endif %}\n{%- endif %}\n{%- for message in messages %}\n {%- if (message.role == \"user\") or (message.role == \"system\" and not loop.first) or (message.role == \"assistant\" and not message.tool_calls) %}\n {{- '<|im_start|>' + message.role + '\\n' + message.content + '<|im_end|>' + '\\n' }}\n {%- elif message.role == \"assistant\" %}\n {{- '<|im_start|>' + message.role }}\n {%- if message.content %}\n {{- '\\n' + message.content }}\n {%- endif %}\n {%- for tool_call in message.tool_calls %}\n {%- if tool_call.function is defined %}\n {%- set tool_call = tool_call.function %}\n {%- endif %}\n {{- '\\n<tool_call>\\n{\"name\": \"' }}\n {{- tool_call.name }}\n {{- '\", \"arguments\": ' }}\n {{- tool_call.arguments | tojson }}\n {{- '}\\n</tool_call>' }}\n {%- endfor %}\n {{- '<|im_end|>\\n' }}\n {%- elif message.role == \"tool\" %}\n {%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != \"tool\") %}\n {{- '<|im_start|>user' }}\n {%- endif %}\n {{- '\\n<tool_response>\\n' }}\n {{- message.content }}\n {{- '\\n</tool_response>' }}\n {%- if loop.last or (messages[loop.index0 + 1].role != \"tool\") %}\n {{- '<|im_end|>\\n' }}\n {%- endif %}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|im_start|>assistant\\n' }}\n{%- endif %}\n",
5686
+ "chat_template": "{%- if tools %}\n {{- '<|im_start|>system\\n' }}\n {%- if messages[0]['role'] == 'system' %}\n {{- messages[0]['content'] }}\n {%- else %}\n {{- 'You are Qwen, created by Alibaba Cloud. You are a helpful assistant.' }}\n {%- endif %}\n {{- \"\\n\\n# Tools\\n\\nYou may call one or more functions to assist with the user query.\\n\\nYou are provided with function signatures within <tools></tools> XML tags:\\n<tools>\" }}\n {%- for tool in tools %}\n {{- \"\\n\" }}\n {{- tool | tojson }}\n {%- endfor %}\n {{- \"\\n</tools>\\n\\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\\n<tool_call>\\n{\\\"name\\\": <function-name>, \\\"arguments\\\": <args-json-object>}\\n</tool_call><|im_end|>\\n\" }}\n{%- else %}\n {%- if messages[0]['role'] == 'system' %}\n {{- '<|im_start|>system\\n' + messages[0]['content'] + '<|im_end|>\\n' }}\n {%- else %}\n {{- '<|im_start|>system\\nYou are Qwen, created by Alibaba Cloud. You are a helpful assistant.<|im_end|>\\n' }}\n {%- endif %}\n{%- endif %}\n{%- for message in messages %}\n {%- if (message.role == \"user\") or (message.role == \"system\" and not loop.first) or (message.role == \"assistant\" and not message.tool_calls) %}\n {{- '<|im_start|>' + message.role + '\\n' + message.content + '<|im_end|>' + '\\n' }}\n {%- elif message.role == \"assistant\" %}\n {{- '<|im_start|>' + message.role }}\n {%- if message.content %}\n {{- '\\n' + message.content }}\n {%- endif %}\n {%- for tool_call in message.tool_calls %}\n {%- if tool_call.function is defined %}\n {%- set tool_call = tool_call.function %}\n {%- endif %}\n {{- '\\n<tool_call>\\n{\"name\": \"' }}\n {{- tool_call.name }}\n {{- '\", \"arguments\": ' }}\n {{- tool_call.arguments | tojson }}\n {{- '}\\n</tool_call>' }}\n {%- endfor %}\n {{- '<|im_end|>\\n' }}\n {%- elif message.role == \"tool\" %}\n {%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != \"tool\") %}\n {{- '<|im_start|>user' }}\n {%- endif %}\n {{- '\\n<tool_response>\\n' }}\n {{- message.content }}\n {{- '\\n</tool_response>' }}\n {%- if loop.last or (messages[loop.index0 + 1].role != \"tool\") %}\n {{- '<|im_end|>\\n' }}\n {%- endif %}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|im_start|>assistant\\n' }}\n{%- endif %}\n",
5683
5687
  "stop_token_ids": [
5684
5688
  151643,
5685
5689
  151644,
@@ -29,7 +29,7 @@ from ..utils import (
29
29
  parse_messages,
30
30
  )
31
31
  from .core import PytorchChatModel, PytorchGenerateConfig
32
- from .utils import get_max_src_len
32
+ from .utils import cache_clean, get_max_src_len
33
33
 
34
34
  logger = logging.getLogger(__name__)
35
35
 
@@ -176,6 +176,7 @@ class CogVLM2Model(PytorchChatModel):
176
176
  query = content
177
177
  return query, image, history
178
178
 
179
+ @cache_clean
179
180
  def chat(
180
181
  self,
181
182
  messages: List[Dict],
@@ -28,6 +28,7 @@ from ..utils import (
28
28
  parse_messages,
29
29
  )
30
30
  from .core import PytorchChatModel, PytorchGenerateConfig
31
+ from .utils import cache_clean
31
32
 
32
33
  logger = logging.getLogger(__name__)
33
34
 
@@ -227,6 +228,7 @@ class CogVLM2VideoModel(PytorchChatModel):
227
228
 
228
229
  return query, image, video, history
229
230
 
231
+ @cache_clean
230
232
  def chat(
231
233
  self,
232
234
  messages: List[Dict],
@@ -40,7 +40,7 @@ from ....types import (
40
40
  from ...utils import select_device
41
41
  from ..core import LLM
42
42
  from ..llm_family import LLMFamilyV1, LLMSpecV1
43
- from ..utils import QWEN_TOOL_CALL_FAMILY, ChatModelMixin
43
+ from ..utils import LLAMA3_TOOL_CALL_FAMILY, QWEN_TOOL_CALL_FAMILY, ChatModelMixin
44
44
  from .utils import get_context_length, get_max_src_len, pad_prefill_tokens
45
45
 
46
46
  logger = logging.getLogger(__name__)
@@ -733,7 +733,11 @@ class PytorchChatModel(PytorchModel, ChatModelMixin):
733
733
  tools = generate_config.pop("tools", []) if generate_config else None
734
734
  model_family = self.model_family.model_family or self.model_family.model_name
735
735
  full_context_kwargs = {}
736
- if tools and model_family in QWEN_TOOL_CALL_FAMILY:
736
+ if (
737
+ tools
738
+ and model_family in QWEN_TOOL_CALL_FAMILY
739
+ or model_family in LLAMA3_TOOL_CALL_FAMILY
740
+ ):
737
741
  full_context_kwargs["tools"] = tools
738
742
  assert self.model_family.chat_template is not None
739
743
  full_prompt = self.get_full_context(
@@ -28,6 +28,7 @@ from ....types import ChatCompletion, ChatCompletionChunk, CompletionChunk
28
28
  from ..llm_family import LLMFamilyV1, LLMSpecV1
29
29
  from ..utils import generate_chat_completion, generate_completion_chunk
30
30
  from .core import PytorchChatModel, PytorchGenerateConfig
31
+ from .utils import cache_clean
31
32
 
32
33
  logger = logging.getLogger(__name__)
33
34
 
@@ -137,6 +138,7 @@ class DeepSeekVLChatModel(PytorchChatModel):
137
138
  return "".join(new_content), images
138
139
  return content, []
139
140
 
141
+ @cache_clean
140
142
  def chat(
141
143
  self,
142
144
  messages: List[Dict],
@@ -26,7 +26,7 @@ from ...utils import select_device
26
26
  from ..llm_family import LLMFamilyV1, LLMSpecV1
27
27
  from ..utils import _decode_image, generate_chat_completion, generate_completion_chunk
28
28
  from .core import PytorchChatModel, PytorchGenerateConfig
29
- from .utils import get_max_src_len
29
+ from .utils import cache_clean, get_max_src_len
30
30
 
31
31
  logger = logging.getLogger(__name__)
32
32
 
@@ -129,6 +129,7 @@ class Glm4VModel(PytorchChatModel):
129
129
  res.append({"role": role, "content": text})
130
130
  return res
131
131
 
132
+ @cache_clean
132
133
  def chat(
133
134
  self,
134
135
  messages: List[Dict],
@@ -27,6 +27,7 @@ from ..utils import (
27
27
  parse_messages,
28
28
  )
29
29
  from .core import PytorchChatModel, PytorchGenerateConfig
30
+ from .utils import cache_clean
30
31
 
31
32
  logger = logging.getLogger(__name__)
32
33
 
@@ -326,6 +327,7 @@ class InternVLChatModel(PytorchChatModel):
326
327
  use_fast=False,
327
328
  )
328
329
 
330
+ @cache_clean
329
331
  def chat(
330
332
  self,
331
333
  messages: List[Dict],
@@ -29,6 +29,7 @@ from ..utils import (
29
29
  parse_messages,
30
30
  )
31
31
  from .core import PytorchChatModel, PytorchGenerateConfig
32
+ from .utils import cache_clean
32
33
 
33
34
  logger = logging.getLogger(__name__)
34
35
 
@@ -119,6 +120,7 @@ class MiniCPMV25Model(PytorchChatModel):
119
120
  raise RuntimeError("Only one image per message is supported")
120
121
  return content, []
121
122
 
123
+ @cache_clean
122
124
  def chat(
123
125
  self,
124
126
  messages: List[Dict],
@@ -30,6 +30,7 @@ from ..utils import (
30
30
  parse_messages,
31
31
  )
32
32
  from .core import PytorchChatModel, PytorchGenerateConfig
33
+ from .utils import cache_clean
33
34
 
34
35
  logger = logging.getLogger(__name__)
35
36
 
@@ -198,6 +199,7 @@ class MiniCPMV26Model(PytorchChatModel):
198
199
  msgs.append({"role": "user", "content": images_chat + [content]})
199
200
  return msgs, video_existed
200
201
 
202
+ @cache_clean
201
203
  def chat(
202
204
  self,
203
205
  messages: List[Dict],
@@ -24,6 +24,7 @@ from ...utils import select_device
24
24
  from ..llm_family import LLMFamilyV1, LLMSpecV1
25
25
  from ..utils import generate_chat_completion, parse_messages
26
26
  from .core import PytorchChatModel, PytorchGenerateConfig
27
+ from .utils import cache_clean
27
28
 
28
29
  logger = logging.getLogger(__name__)
29
30
 
@@ -87,6 +88,7 @@ class OmniLMMModel(PytorchChatModel):
87
88
  return images, other_content
88
89
  return [], [{"type": "text", "text": content}]
89
90
 
91
+ @cache_clean
90
92
  def chat(
91
93
  self,
92
94
  messages: List[Dict],
@@ -14,16 +14,22 @@
14
14
  import logging
15
15
  import uuid
16
16
  from io import BytesIO
17
- from typing import Dict, Iterator, List, Optional, Union
17
+ from typing import Iterator, List, Optional, Union
18
18
  from urllib.request import urlopen
19
19
 
20
20
  import numpy as np
21
21
 
22
22
  from ....model.utils import select_device
23
- from ....types import ChatCompletion, ChatCompletionChunk, CompletionChunk
23
+ from ....types import (
24
+ ChatCompletion,
25
+ ChatCompletionChunk,
26
+ ChatCompletionMessage,
27
+ CompletionChunk,
28
+ )
24
29
  from ..llm_family import LLMFamilyV1, LLMSpecV1
25
30
  from ..utils import generate_chat_completion, generate_completion_chunk
26
31
  from .core import PytorchChatModel, PytorchGenerateConfig
32
+ from .utils import cache_clean
27
33
 
28
34
  logger = logging.getLogger(__name__)
29
35
 
@@ -68,7 +74,7 @@ class Qwen2AudioChatModel(PytorchChatModel):
68
74
 
69
75
  def _transform_messages(
70
76
  self,
71
- messages: List[Dict],
77
+ messages: List[ChatCompletionMessage],
72
78
  ):
73
79
  import librosa
74
80
 
@@ -89,9 +95,10 @@ class Qwen2AudioChatModel(PytorchChatModel):
89
95
 
90
96
  return text, audios
91
97
 
98
+ @cache_clean
92
99
  def chat(
93
100
  self,
94
- messages: List[Dict],
101
+ messages: List[ChatCompletionMessage],
95
102
  generate_config: Optional[PytorchGenerateConfig] = None,
96
103
  ) -> Union[ChatCompletion, Iterator[ChatCompletionChunk]]:
97
104
  text, audios = self._transform_messages(messages)
@@ -27,6 +27,7 @@ from ....types import (
27
27
  from ..llm_family import LLMFamilyV1, LLMSpecV1
28
28
  from ..utils import generate_chat_completion, generate_completion_chunk
29
29
  from .core import PytorchChatModel, PytorchGenerateConfig
30
+ from .utils import cache_clean
30
31
 
31
32
  logger = logging.getLogger(__name__)
32
33
 
@@ -75,34 +76,7 @@ class Qwen2VLChatModel(PytorchChatModel):
75
76
  self.model_path, device_map=device, trust_remote_code=True
76
77
  ).eval()
77
78
 
78
- def _transform_messages(
79
- self,
80
- messages: List[ChatCompletionMessage],
81
- ):
82
- transformed_messages = []
83
- for msg in messages:
84
- new_content = []
85
- role = msg["role"]
86
- content = msg["content"]
87
- if isinstance(content, str):
88
- new_content.append({"type": "text", "text": content})
89
- elif isinstance(content, List):
90
- for item in content: # type: ignore
91
- if "text" in item:
92
- new_content.append({"type": "text", "text": item["text"]})
93
- elif "image_url" in item:
94
- new_content.append(
95
- {"type": "image", "image": item["image_url"]["url"]}
96
- )
97
- elif "video_url" in item:
98
- new_content.append(
99
- {"type": "video", "video": item["video_url"]["url"]}
100
- )
101
- new_message = {"role": role, "content": new_content}
102
- transformed_messages.append(new_message)
103
-
104
- return transformed_messages
105
-
79
+ @cache_clean
106
80
  def chat(
107
81
  self,
108
82
  messages: List[ChatCompletionMessage], # type: ignore
@@ -28,7 +28,7 @@ from ....types import ChatCompletion, ChatCompletionChunk, CompletionChunk
28
28
  from ..llm_family import LLMFamilyV1, LLMSpecV1
29
29
  from ..utils import generate_chat_completion, generate_completion_chunk
30
30
  from .core import PytorchChatModel, PytorchGenerateConfig
31
- from .utils import pad_prefill_tokens
31
+ from .utils import cache_clean, pad_prefill_tokens
32
32
 
33
33
  logger = logging.getLogger(__name__)
34
34
 
@@ -137,6 +137,7 @@ class QwenVLChatModel(PytorchChatModel):
137
137
  prompt = self._message_content_to_qwen(messages[-1]["content"])
138
138
  return prompt, qwen_history
139
139
 
140
+ @cache_clean
140
141
  def chat(
141
142
  self,
142
143
  messages: List[Dict],
@@ -11,7 +11,8 @@
11
11
  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
-
14
+ import asyncio
15
+ import functools
15
16
  import gc
16
17
  import logging
17
18
  import os
@@ -126,7 +127,8 @@ def generate_stream(
126
127
  echo = bool(generate_config.get("echo", False))
127
128
  stop_str = generate_config.get("stop", None)
128
129
  stop_token_ids = generate_config.get("stop_token_ids", None) or []
129
- stop_token_ids.append(tokenizer.eos_token_id)
130
+ if tokenizer.eos_token_id not in stop_token_ids:
131
+ stop_token_ids.append(tokenizer.eos_token_id)
130
132
  chunk_id = str(uuid.uuid4())
131
133
 
132
134
  logits_processor = prepare_logits_processor(
@@ -776,3 +778,34 @@ def batch_inference_one_step(
776
778
  for r in req_list:
777
779
  r.stopped = True
778
780
  r.error_msg = str(e)
781
+
782
+
783
+ def cache_clean(fn):
784
+ @functools.wraps(fn)
785
+ async def _async_wrapper(self, *args, **kwargs):
786
+ import gc
787
+
788
+ from ....device_utils import empty_cache
789
+
790
+ result = await fn(self, *args, **kwargs)
791
+
792
+ gc.collect()
793
+ empty_cache()
794
+ return result
795
+
796
+ @functools.wraps(fn)
797
+ def _wrapper(self, *args, **kwargs):
798
+ import gc
799
+
800
+ from ....device_utils import empty_cache
801
+
802
+ result = fn(self, *args, **kwargs)
803
+
804
+ gc.collect()
805
+ empty_cache()
806
+ return result
807
+
808
+ if asyncio.iscoroutinefunction(fn):
809
+ return _async_wrapper
810
+ else:
811
+ return _wrapper
@@ -29,6 +29,7 @@ from ..utils import (
29
29
  parse_messages,
30
30
  )
31
31
  from .core import PytorchChatModel, PytorchGenerateConfig
32
+ from .utils import cache_clean
32
33
 
33
34
  logger = logging.getLogger(__name__)
34
35
 
@@ -99,6 +100,7 @@ class YiVLChatModel(PytorchChatModel):
99
100
  raise RuntimeError("Only one image per message is supported by Yi VL.")
100
101
  return content
101
102
 
103
+ @cache_clean
102
104
  def chat(
103
105
  self,
104
106
  messages: List[Dict],