xinference 0.15.3__py3-none-any.whl → 0.15.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of xinference might be problematic. Click here for more details.
- xinference/_version.py +3 -3
- xinference/api/restful_api.py +29 -2
- xinference/client/restful/restful_client.py +10 -0
- xinference/constants.py +4 -0
- xinference/core/image_interface.py +76 -23
- xinference/core/model.py +80 -39
- xinference/core/progress_tracker.py +187 -0
- xinference/core/supervisor.py +11 -0
- xinference/core/worker.py +1 -0
- xinference/model/audio/chattts.py +2 -1
- xinference/model/audio/core.py +0 -2
- xinference/model/audio/model_spec.json +8 -0
- xinference/model/audio/model_spec_modelscope.json +9 -0
- xinference/model/image/core.py +6 -7
- xinference/model/image/sdapi.py +35 -4
- xinference/model/image/stable_diffusion/core.py +208 -78
- xinference/model/llm/llm_family.json +16 -16
- xinference/model/llm/llm_family_modelscope.json +16 -12
- xinference/model/llm/transformers/cogvlm2.py +2 -1
- xinference/model/llm/transformers/cogvlm2_video.py +2 -0
- xinference/model/llm/transformers/core.py +6 -2
- xinference/model/llm/transformers/deepseek_vl.py +2 -0
- xinference/model/llm/transformers/glm4v.py +2 -1
- xinference/model/llm/transformers/intern_vl.py +2 -0
- xinference/model/llm/transformers/minicpmv25.py +2 -0
- xinference/model/llm/transformers/minicpmv26.py +2 -0
- xinference/model/llm/transformers/omnilmm.py +2 -0
- xinference/model/llm/transformers/qwen2_audio.py +11 -4
- xinference/model/llm/transformers/qwen2_vl.py +2 -28
- xinference/model/llm/transformers/qwen_vl.py +2 -1
- xinference/model/llm/transformers/utils.py +35 -2
- xinference/model/llm/transformers/yi_vl.py +2 -0
- xinference/model/llm/utils.py +58 -14
- xinference/model/llm/vllm/core.py +52 -8
- xinference/model/llm/vllm/utils.py +0 -1
- xinference/model/utils.py +7 -4
- xinference/model/video/core.py +0 -2
- {xinference-0.15.3.dist-info → xinference-0.15.4.dist-info}/METADATA +3 -3
- {xinference-0.15.3.dist-info → xinference-0.15.4.dist-info}/RECORD +43 -42
- {xinference-0.15.3.dist-info → xinference-0.15.4.dist-info}/LICENSE +0 -0
- {xinference-0.15.3.dist-info → xinference-0.15.4.dist-info}/WHEEL +0 -0
- {xinference-0.15.3.dist-info → xinference-0.15.4.dist-info}/entry_points.txt +0 -0
- {xinference-0.15.3.dist-info → xinference-0.15.4.dist-info}/top_level.txt +0 -0
|
@@ -1111,7 +1111,8 @@
|
|
|
1111
1111
|
"th"
|
|
1112
1112
|
],
|
|
1113
1113
|
"model_ability": [
|
|
1114
|
-
"chat"
|
|
1114
|
+
"chat",
|
|
1115
|
+
"tools"
|
|
1115
1116
|
],
|
|
1116
1117
|
"model_description": "The Llama 3.1 instruction tuned models are optimized for dialogue use cases and outperform many of the available open source chat models on common industry benchmarks..",
|
|
1117
1118
|
"model_specs": [
|
|
@@ -1299,14 +1300,16 @@
|
|
|
1299
1300
|
"model_id": "hugging-quants/Meta-Llama-3.1-405B-Instruct-AWQ-INT4"
|
|
1300
1301
|
}
|
|
1301
1302
|
],
|
|
1302
|
-
"chat_template": "{{-
|
|
1303
|
+
"chat_template": "{{- bos_token }}\n{%- if custom_tools is defined %}\n {%- set tools = custom_tools %}\n{%- endif %}\n{%- if not tools_in_user_message is defined %}\n {%- set tools_in_user_message = true %}\n{%- endif %}\n{%- if not date_string is defined %}\n {%- set date_string = \"26 Jul 2024\" %}\n{%- endif %}\n{%- if not tools is defined %}\n {%- set tools = none %}\n{%- endif %}\n\n{#- This block extracts the system message, so we can slot it into the right place. #}\n{%- if messages[0]['role'] == 'system' %}\n {%- set system_message = messages[0]['content']|trim %}\n {%- set messages = messages[1:] %}\n{%- else %}\n {%- set system_message = \"\" %}\n{%- endif %}\n\n{#- System message + builtin tools #}\n{{- \"<|start_header_id|>system<|end_header_id|>\\n\\n\" }}\n{%- if builtin_tools is defined or tools is not none %}\n {{- \"Environment: ipython\\n\" }}\n{%- endif %}\n{%- if builtin_tools is defined %}\n {{- \"Tools: \" + builtin_tools | reject('equalto', 'code_interpreter') | join(\", \") + \"\\n\\n\"}}\n{%- endif %}\n{{- \"Cutting Knowledge Date: December 2023\\n\" }}\n{{- \"Today Date: \" + date_string + \"\\n\\n\" }}\n{%- if tools is not none and not tools_in_user_message %}\n {{- \"You have access to the following functions. To call a function, please respond with JSON for a function call.\" }}\n {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n {{- \"Do not use variables.\\n\\n\" }}\n {%- for t in tools %}\n {{- t | tojson(indent=4) }}\n {{- \"\\n\\n\" }}\n {%- endfor %}\n{%- endif %}\n{{- system_message }}\n{{- \"<|eot_id|>\" }}\n\n{#- Custom tools are passed in a user message with some extra guidance #}\n{%- if tools_in_user_message and not tools is none %}\n {#- Extract the first user message so we can plug it in here #}\n {%- if messages | length != 0 %}\n {%- set first_user_message = messages[0]['content']|trim %}\n {%- set messages = messages[1:] %}\n {%- else %}\n {{- raise_exception(\"Cannot put tools in the first user message when there's no first user message!\") }}\n{%- endif %}\n {{- '<|start_header_id|>user<|end_header_id|>\\n\\n' -}}\n {{- \"Given the following functions, please respond with a JSON for a function call \" }}\n {{- \"with its proper arguments that best answers the given prompt.\\n\\n\" }}\n {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n {{- \"Do not use variables.\\n\\n\" }}\n {%- for t in tools %}\n {{- t | tojson(indent=4) }}\n {{- \"\\n\\n\" }}\n {%- endfor %}\n {{- first_user_message + \"<|eot_id|>\"}}\n{%- endif %}\n\n{%- for message in messages %}\n {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %}\n {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\\n\\n'+ message['content'] | trim + '<|eot_id|>' }}\n {%- elif 'tool_calls' in message %}\n {%- if not message.tool_calls|length == 1 %}\n {{- raise_exception(\"This model only supports single tool-calls at once!\") }}\n {%- endif %}\n {%- set tool_call = message.tool_calls[0].function %}\n {%- if builtin_tools is defined and tool_call.name in builtin_tools %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' -}}\n {{- \"<|python_tag|>\" + tool_call.name + \".call(\" }}\n {%- for arg_name, arg_val in tool_call.arguments | items %}\n {{- arg_name + '=\"' + arg_val + '\"' }}\n {%- if not loop.last %}\n {{- \", \" }}\n {%- endif %}\n {%- endfor %}\n {{- \")\" }}\n {%- else %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' -}}\n {{- '{\"name\": \"' + tool_call.name + '\", ' }}\n {{- '\"parameters\": ' }}\n {{- tool_call.arguments | tojson }}\n {{- \"}\" }}\n {%- endif %}\n {%- if builtin_tools is defined %}\n {#- This means we're in ipython mode #}\n {{- \"<|eom_id|>\" }}\n {%- else %}\n {{- \"<|eot_id|>\" }}\n {%- endif %}\n {%- elif message.role == \"tool\" or message.role == \"ipython\" %}\n {{- \"<|start_header_id|>ipython<|end_header_id|>\\n\\n\" }}\n {%- if message.content is mapping or message.content is iterable %}\n {{- message.content | tojson }}\n {%- else %}\n {{- message.content }}\n {%- endif %}\n {{- \"<|eot_id|>\" }}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' }}\n{%- endif %}\n",
|
|
1303
1304
|
"stop_token_ids": [
|
|
1304
1305
|
128001,
|
|
1306
|
+
128008,
|
|
1305
1307
|
128009
|
|
1306
1308
|
],
|
|
1307
1309
|
"stop": [
|
|
1308
1310
|
"<|end_of_text|>",
|
|
1309
|
-
"<|eot_id|>"
|
|
1311
|
+
"<|eot_id|>",
|
|
1312
|
+
"<|eom_id|>"
|
|
1310
1313
|
]
|
|
1311
1314
|
},
|
|
1312
1315
|
{
|
|
@@ -6906,18 +6909,15 @@
|
|
|
6906
6909
|
"model_id":"Qwen/Qwen2-VL-72B-Instruct-GPTQ-{quantization}"
|
|
6907
6910
|
}
|
|
6908
6911
|
],
|
|
6909
|
-
"
|
|
6910
|
-
|
|
6911
|
-
|
|
6912
|
-
|
|
6913
|
-
|
|
6914
|
-
|
|
6915
|
-
|
|
6916
|
-
"
|
|
6917
|
-
|
|
6918
|
-
"<|endoftext|>"
|
|
6919
|
-
]
|
|
6920
|
-
}
|
|
6912
|
+
"chat_template": "{% set image_count = namespace(value=0) %}{% set video_count = namespace(value=0) %}{% for message in messages %}{% if loop.first and message['role'] != 'system' %}<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n{% endif %}<|im_start|>{{ message['role'] }}\n{% if message['content'] is string %}{{ message['content'] }}<|im_end|>\n{% else %}{% for content in message['content'] %}{% if content['type'] == 'image' or 'image' in content or 'image_url' in content %}{% set image_count.value = image_count.value + 1 %}{% if add_vision_id %}Picture {{ image_count.value }}: {% endif %}<|vision_start|><|image_pad|><|vision_end|>{% elif content['type'] == 'video' or 'video' in content %}{% set video_count.value = video_count.value + 1 %}{% if add_vision_id %}Video {{ video_count.value }}: {% endif %}<|vision_start|><|video_pad|><|vision_end|>{% elif 'text' in content %}{{ content['text'] }}{% endif %}{% endfor %}<|im_end|>\n{% endif %}{% endfor %}{% if add_generation_prompt %}<|im_start|>assistant\n{% endif %}",
|
|
6913
|
+
"stop_token_ids": [
|
|
6914
|
+
151645,
|
|
6915
|
+
151643
|
|
6916
|
+
],
|
|
6917
|
+
"stop": [
|
|
6918
|
+
"<|im_end|>",
|
|
6919
|
+
"<|endoftext|>"
|
|
6920
|
+
]
|
|
6921
6921
|
},
|
|
6922
6922
|
{
|
|
6923
6923
|
"version": 1,
|
|
@@ -7925,7 +7925,7 @@
|
|
|
7925
7925
|
}
|
|
7926
7926
|
}
|
|
7927
7927
|
],
|
|
7928
|
-
"chat_template": "{%- if tools %}\n {{- '<|im_start|>system\\n' }}\n {%- if messages[0]['role'] == 'system' %}\n {{- messages[0]['content'] }}\n {%- else %}\n {{- 'You are Qwen, created by Alibaba Cloud. You are a helpful assistant.' }}\n {%- endif %}\n {{- \"\\n\\n# Tools\\n\\nYou may call one or more functions to assist with the user query.\\n\\nYou are provided with function signatures within <tools></tools> XML tags:\\n<tools>\" }}\n {%- for tool in tools %}\n {{- \"\\n\" }}\n {{- tool | tojson }}\n {%- endfor %}\n {{- \"\\n</tools>\\n\\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\\n<tool_call>\\n{
|
|
7928
|
+
"chat_template": "{%- if tools %}\n {{- '<|im_start|>system\\n' }}\n {%- if messages[0]['role'] == 'system' %}\n {{- messages[0]['content'] }}\n {%- else %}\n {{- 'You are Qwen, created by Alibaba Cloud. You are a helpful assistant.' }}\n {%- endif %}\n {{- \"\\n\\n# Tools\\n\\nYou may call one or more functions to assist with the user query.\\n\\nYou are provided with function signatures within <tools></tools> XML tags:\\n<tools>\" }}\n {%- for tool in tools %}\n {{- \"\\n\" }}\n {{- tool | tojson }}\n {%- endfor %}\n {{- \"\\n</tools>\\n\\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\\n<tool_call>\\n{\\\"name\\\": <function-name>, \\\"arguments\\\": <args-json-object>}\\n</tool_call><|im_end|>\\n\" }}\n{%- else %}\n {%- if messages[0]['role'] == 'system' %}\n {{- '<|im_start|>system\\n' + messages[0]['content'] + '<|im_end|>\\n' }}\n {%- else %}\n {{- '<|im_start|>system\\nYou are Qwen, created by Alibaba Cloud. You are a helpful assistant.<|im_end|>\\n' }}\n {%- endif %}\n{%- endif %}\n{%- for message in messages %}\n {%- if (message.role == \"user\") or (message.role == \"system\" and not loop.first) or (message.role == \"assistant\" and not message.tool_calls) %}\n {{- '<|im_start|>' + message.role + '\\n' + message.content + '<|im_end|>' + '\\n' }}\n {%- elif message.role == \"assistant\" %}\n {{- '<|im_start|>' + message.role }}\n {%- if message.content %}\n {{- '\\n' + message.content }}\n {%- endif %}\n {%- for tool_call in message.tool_calls %}\n {%- if tool_call.function is defined %}\n {%- set tool_call = tool_call.function %}\n {%- endif %}\n {{- '\\n<tool_call>\\n{\"name\": \"' }}\n {{- tool_call.name }}\n {{- '\", \"arguments\": ' }}\n {{- tool_call.arguments | tojson }}\n {{- '}\\n</tool_call>' }}\n {%- endfor %}\n {{- '<|im_end|>\\n' }}\n {%- elif message.role == \"tool\" %}\n {%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != \"tool\") %}\n {{- '<|im_start|>user' }}\n {%- endif %}\n {{- '\\n<tool_response>\\n' }}\n {{- message.content }}\n {{- '\\n</tool_response>' }}\n {%- if loop.last or (messages[loop.index0 + 1].role != \"tool\") %}\n {{- '<|im_end|>\\n' }}\n {%- endif %}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|im_start|>assistant\\n' }}\n{%- endif %}\n",
|
|
7929
7929
|
"stop_token_ids": [
|
|
7930
7930
|
151643,
|
|
7931
7931
|
151644,
|
|
@@ -246,7 +246,8 @@
|
|
|
246
246
|
"th"
|
|
247
247
|
],
|
|
248
248
|
"model_ability": [
|
|
249
|
-
"chat"
|
|
249
|
+
"chat",
|
|
250
|
+
"tools"
|
|
250
251
|
],
|
|
251
252
|
"model_description": "The Llama 3.1 instruction tuned models are optimized for dialogue use cases and outperform many of the available open source chat models on common industry benchmarks..",
|
|
252
253
|
"model_specs": [
|
|
@@ -350,14 +351,16 @@
|
|
|
350
351
|
"model_hub": "modelscope"
|
|
351
352
|
}
|
|
352
353
|
],
|
|
353
|
-
"chat_template": "{{-
|
|
354
|
+
"chat_template": "{{- bos_token }}\n{%- if custom_tools is defined %}\n {%- set tools = custom_tools %}\n{%- endif %}\n{%- if not tools_in_user_message is defined %}\n {%- set tools_in_user_message = true %}\n{%- endif %}\n{%- if not date_string is defined %}\n {%- set date_string = \"26 Jul 2024\" %}\n{%- endif %}\n{%- if not tools is defined %}\n {%- set tools = none %}\n{%- endif %}\n\n{#- This block extracts the system message, so we can slot it into the right place. #}\n{%- if messages[0]['role'] == 'system' %}\n {%- set system_message = messages[0]['content']|trim %}\n {%- set messages = messages[1:] %}\n{%- else %}\n {%- set system_message = \"\" %}\n{%- endif %}\n\n{#- System message + builtin tools #}\n{{- \"<|start_header_id|>system<|end_header_id|>\\n\\n\" }}\n{%- if builtin_tools is defined or tools is not none %}\n {{- \"Environment: ipython\\n\" }}\n{%- endif %}\n{%- if builtin_tools is defined %}\n {{- \"Tools: \" + builtin_tools | reject('equalto', 'code_interpreter') | join(\", \") + \"\\n\\n\"}}\n{%- endif %}\n{{- \"Cutting Knowledge Date: December 2023\\n\" }}\n{{- \"Today Date: \" + date_string + \"\\n\\n\" }}\n{%- if tools is not none and not tools_in_user_message %}\n {{- \"You have access to the following functions. To call a function, please respond with JSON for a function call.\" }}\n {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n {{- \"Do not use variables.\\n\\n\" }}\n {%- for t in tools %}\n {{- t | tojson(indent=4) }}\n {{- \"\\n\\n\" }}\n {%- endfor %}\n{%- endif %}\n{{- system_message }}\n{{- \"<|eot_id|>\" }}\n\n{#- Custom tools are passed in a user message with some extra guidance #}\n{%- if tools_in_user_message and not tools is none %}\n {#- Extract the first user message so we can plug it in here #}\n {%- if messages | length != 0 %}\n {%- set first_user_message = messages[0]['content']|trim %}\n {%- set messages = messages[1:] %}\n {%- else %}\n {{- raise_exception(\"Cannot put tools in the first user message when there's no first user message!\") }}\n{%- endif %}\n {{- '<|start_header_id|>user<|end_header_id|>\\n\\n' -}}\n {{- \"Given the following functions, please respond with a JSON for a function call \" }}\n {{- \"with its proper arguments that best answers the given prompt.\\n\\n\" }}\n {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n {{- \"Do not use variables.\\n\\n\" }}\n {%- for t in tools %}\n {{- t | tojson(indent=4) }}\n {{- \"\\n\\n\" }}\n {%- endfor %}\n {{- first_user_message + \"<|eot_id|>\"}}\n{%- endif %}\n\n{%- for message in messages %}\n {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %}\n {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\\n\\n'+ message['content'] | trim + '<|eot_id|>' }}\n {%- elif 'tool_calls' in message %}\n {%- if not message.tool_calls|length == 1 %}\n {{- raise_exception(\"This model only supports single tool-calls at once!\") }}\n {%- endif %}\n {%- set tool_call = message.tool_calls[0].function %}\n {%- if builtin_tools is defined and tool_call.name in builtin_tools %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' -}}\n {{- \"<|python_tag|>\" + tool_call.name + \".call(\" }}\n {%- for arg_name, arg_val in tool_call.arguments | items %}\n {{- arg_name + '=\"' + arg_val + '\"' }}\n {%- if not loop.last %}\n {{- \", \" }}\n {%- endif %}\n {%- endfor %}\n {{- \")\" }}\n {%- else %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' -}}\n {{- '{\"name\": \"' + tool_call.name + '\", ' }}\n {{- '\"parameters\": ' }}\n {{- tool_call.arguments | tojson }}\n {{- \"}\" }}\n {%- endif %}\n {%- if builtin_tools is defined %}\n {#- This means we're in ipython mode #}\n {{- \"<|eom_id|>\" }}\n {%- else %}\n {{- \"<|eot_id|>\" }}\n {%- endif %}\n {%- elif message.role == \"tool\" or message.role == \"ipython\" %}\n {{- \"<|start_header_id|>ipython<|end_header_id|>\\n\\n\" }}\n {%- if message.content is mapping or message.content is iterable %}\n {{- message.content | tojson }}\n {%- else %}\n {{- message.content }}\n {%- endif %}\n {{- \"<|eot_id|>\" }}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' }}\n{%- endif %}\n",
|
|
354
355
|
"stop_token_ids": [
|
|
355
356
|
128001,
|
|
357
|
+
128008,
|
|
356
358
|
128009
|
|
357
359
|
],
|
|
358
360
|
"stop": [
|
|
359
361
|
"<|end_of_text|>",
|
|
360
|
-
"<|eot_id|>"
|
|
362
|
+
"<|eot_id|>",
|
|
363
|
+
"<|eom_id|>"
|
|
361
364
|
]
|
|
362
365
|
},
|
|
363
366
|
{
|
|
@@ -4624,14 +4627,15 @@
|
|
|
4624
4627
|
"model_hub": "modelscope"
|
|
4625
4628
|
}
|
|
4626
4629
|
],
|
|
4627
|
-
"
|
|
4628
|
-
|
|
4629
|
-
|
|
4630
|
-
|
|
4631
|
-
|
|
4632
|
-
|
|
4633
|
-
|
|
4634
|
-
|
|
4630
|
+
"chat_template": "{% set image_count = namespace(value=0) %}{% set video_count = namespace(value=0) %}{% for message in messages %}{% if loop.first and message['role'] != 'system' %}<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n{% endif %}<|im_start|>{{ message['role'] }}\n{% if message['content'] is string %}{{ message['content'] }}<|im_end|>\n{% else %}{% for content in message['content'] %}{% if content['type'] == 'image' or 'image' in content or 'image_url' in content %}{% set image_count.value = image_count.value + 1 %}{% if add_vision_id %}Picture {{ image_count.value }}: {% endif %}<|vision_start|><|image_pad|><|vision_end|>{% elif content['type'] == 'video' or 'video' in content %}{% set video_count.value = video_count.value + 1 %}{% if add_vision_id %}Video {{ video_count.value }}: {% endif %}<|vision_start|><|video_pad|><|vision_end|>{% elif 'text' in content %}{{ content['text'] }}{% endif %}{% endfor %}<|im_end|>\n{% endif %}{% endfor %}{% if add_generation_prompt %}<|im_start|>assistant\n{% endif %}",
|
|
4631
|
+
"stop_token_ids": [
|
|
4632
|
+
151645,
|
|
4633
|
+
151643
|
|
4634
|
+
],
|
|
4635
|
+
"stop": [
|
|
4636
|
+
"<|im_end|>",
|
|
4637
|
+
"<|endoftext|>"
|
|
4638
|
+
]
|
|
4635
4639
|
},
|
|
4636
4640
|
{
|
|
4637
4641
|
"version": 1,
|
|
@@ -5679,7 +5683,7 @@
|
|
|
5679
5683
|
}
|
|
5680
5684
|
}
|
|
5681
5685
|
],
|
|
5682
|
-
"chat_template": "{%- if tools %}\n {{- '<|im_start|>system\\n' }}\n {%- if messages[0]['role'] == 'system' %}\n {{- messages[0]['content'] }}\n {%- else %}\n {{- 'You are Qwen, created by Alibaba Cloud. You are a helpful assistant.' }}\n {%- endif %}\n {{- \"\\n\\n# Tools\\n\\nYou may call one or more functions to assist with the user query.\\n\\nYou are provided with function signatures within <tools></tools> XML tags:\\n<tools>\" }}\n {%- for tool in tools %}\n {{- \"\\n\" }}\n {{- tool | tojson }}\n {%- endfor %}\n {{- \"\\n</tools>\\n\\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\\n<tool_call>\\n{
|
|
5686
|
+
"chat_template": "{%- if tools %}\n {{- '<|im_start|>system\\n' }}\n {%- if messages[0]['role'] == 'system' %}\n {{- messages[0]['content'] }}\n {%- else %}\n {{- 'You are Qwen, created by Alibaba Cloud. You are a helpful assistant.' }}\n {%- endif %}\n {{- \"\\n\\n# Tools\\n\\nYou may call one or more functions to assist with the user query.\\n\\nYou are provided with function signatures within <tools></tools> XML tags:\\n<tools>\" }}\n {%- for tool in tools %}\n {{- \"\\n\" }}\n {{- tool | tojson }}\n {%- endfor %}\n {{- \"\\n</tools>\\n\\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\\n<tool_call>\\n{\\\"name\\\": <function-name>, \\\"arguments\\\": <args-json-object>}\\n</tool_call><|im_end|>\\n\" }}\n{%- else %}\n {%- if messages[0]['role'] == 'system' %}\n {{- '<|im_start|>system\\n' + messages[0]['content'] + '<|im_end|>\\n' }}\n {%- else %}\n {{- '<|im_start|>system\\nYou are Qwen, created by Alibaba Cloud. You are a helpful assistant.<|im_end|>\\n' }}\n {%- endif %}\n{%- endif %}\n{%- for message in messages %}\n {%- if (message.role == \"user\") or (message.role == \"system\" and not loop.first) or (message.role == \"assistant\" and not message.tool_calls) %}\n {{- '<|im_start|>' + message.role + '\\n' + message.content + '<|im_end|>' + '\\n' }}\n {%- elif message.role == \"assistant\" %}\n {{- '<|im_start|>' + message.role }}\n {%- if message.content %}\n {{- '\\n' + message.content }}\n {%- endif %}\n {%- for tool_call in message.tool_calls %}\n {%- if tool_call.function is defined %}\n {%- set tool_call = tool_call.function %}\n {%- endif %}\n {{- '\\n<tool_call>\\n{\"name\": \"' }}\n {{- tool_call.name }}\n {{- '\", \"arguments\": ' }}\n {{- tool_call.arguments | tojson }}\n {{- '}\\n</tool_call>' }}\n {%- endfor %}\n {{- '<|im_end|>\\n' }}\n {%- elif message.role == \"tool\" %}\n {%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != \"tool\") %}\n {{- '<|im_start|>user' }}\n {%- endif %}\n {{- '\\n<tool_response>\\n' }}\n {{- message.content }}\n {{- '\\n</tool_response>' }}\n {%- if loop.last or (messages[loop.index0 + 1].role != \"tool\") %}\n {{- '<|im_end|>\\n' }}\n {%- endif %}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|im_start|>assistant\\n' }}\n{%- endif %}\n",
|
|
5683
5687
|
"stop_token_ids": [
|
|
5684
5688
|
151643,
|
|
5685
5689
|
151644,
|
|
@@ -29,7 +29,7 @@ from ..utils import (
|
|
|
29
29
|
parse_messages,
|
|
30
30
|
)
|
|
31
31
|
from .core import PytorchChatModel, PytorchGenerateConfig
|
|
32
|
-
from .utils import get_max_src_len
|
|
32
|
+
from .utils import cache_clean, get_max_src_len
|
|
33
33
|
|
|
34
34
|
logger = logging.getLogger(__name__)
|
|
35
35
|
|
|
@@ -176,6 +176,7 @@ class CogVLM2Model(PytorchChatModel):
|
|
|
176
176
|
query = content
|
|
177
177
|
return query, image, history
|
|
178
178
|
|
|
179
|
+
@cache_clean
|
|
179
180
|
def chat(
|
|
180
181
|
self,
|
|
181
182
|
messages: List[Dict],
|
|
@@ -28,6 +28,7 @@ from ..utils import (
|
|
|
28
28
|
parse_messages,
|
|
29
29
|
)
|
|
30
30
|
from .core import PytorchChatModel, PytorchGenerateConfig
|
|
31
|
+
from .utils import cache_clean
|
|
31
32
|
|
|
32
33
|
logger = logging.getLogger(__name__)
|
|
33
34
|
|
|
@@ -227,6 +228,7 @@ class CogVLM2VideoModel(PytorchChatModel):
|
|
|
227
228
|
|
|
228
229
|
return query, image, video, history
|
|
229
230
|
|
|
231
|
+
@cache_clean
|
|
230
232
|
def chat(
|
|
231
233
|
self,
|
|
232
234
|
messages: List[Dict],
|
|
@@ -40,7 +40,7 @@ from ....types import (
|
|
|
40
40
|
from ...utils import select_device
|
|
41
41
|
from ..core import LLM
|
|
42
42
|
from ..llm_family import LLMFamilyV1, LLMSpecV1
|
|
43
|
-
from ..utils import QWEN_TOOL_CALL_FAMILY, ChatModelMixin
|
|
43
|
+
from ..utils import LLAMA3_TOOL_CALL_FAMILY, QWEN_TOOL_CALL_FAMILY, ChatModelMixin
|
|
44
44
|
from .utils import get_context_length, get_max_src_len, pad_prefill_tokens
|
|
45
45
|
|
|
46
46
|
logger = logging.getLogger(__name__)
|
|
@@ -733,7 +733,11 @@ class PytorchChatModel(PytorchModel, ChatModelMixin):
|
|
|
733
733
|
tools = generate_config.pop("tools", []) if generate_config else None
|
|
734
734
|
model_family = self.model_family.model_family or self.model_family.model_name
|
|
735
735
|
full_context_kwargs = {}
|
|
736
|
-
if
|
|
736
|
+
if (
|
|
737
|
+
tools
|
|
738
|
+
and model_family in QWEN_TOOL_CALL_FAMILY
|
|
739
|
+
or model_family in LLAMA3_TOOL_CALL_FAMILY
|
|
740
|
+
):
|
|
737
741
|
full_context_kwargs["tools"] = tools
|
|
738
742
|
assert self.model_family.chat_template is not None
|
|
739
743
|
full_prompt = self.get_full_context(
|
|
@@ -28,6 +28,7 @@ from ....types import ChatCompletion, ChatCompletionChunk, CompletionChunk
|
|
|
28
28
|
from ..llm_family import LLMFamilyV1, LLMSpecV1
|
|
29
29
|
from ..utils import generate_chat_completion, generate_completion_chunk
|
|
30
30
|
from .core import PytorchChatModel, PytorchGenerateConfig
|
|
31
|
+
from .utils import cache_clean
|
|
31
32
|
|
|
32
33
|
logger = logging.getLogger(__name__)
|
|
33
34
|
|
|
@@ -137,6 +138,7 @@ class DeepSeekVLChatModel(PytorchChatModel):
|
|
|
137
138
|
return "".join(new_content), images
|
|
138
139
|
return content, []
|
|
139
140
|
|
|
141
|
+
@cache_clean
|
|
140
142
|
def chat(
|
|
141
143
|
self,
|
|
142
144
|
messages: List[Dict],
|
|
@@ -26,7 +26,7 @@ from ...utils import select_device
|
|
|
26
26
|
from ..llm_family import LLMFamilyV1, LLMSpecV1
|
|
27
27
|
from ..utils import _decode_image, generate_chat_completion, generate_completion_chunk
|
|
28
28
|
from .core import PytorchChatModel, PytorchGenerateConfig
|
|
29
|
-
from .utils import get_max_src_len
|
|
29
|
+
from .utils import cache_clean, get_max_src_len
|
|
30
30
|
|
|
31
31
|
logger = logging.getLogger(__name__)
|
|
32
32
|
|
|
@@ -129,6 +129,7 @@ class Glm4VModel(PytorchChatModel):
|
|
|
129
129
|
res.append({"role": role, "content": text})
|
|
130
130
|
return res
|
|
131
131
|
|
|
132
|
+
@cache_clean
|
|
132
133
|
def chat(
|
|
133
134
|
self,
|
|
134
135
|
messages: List[Dict],
|
|
@@ -27,6 +27,7 @@ from ..utils import (
|
|
|
27
27
|
parse_messages,
|
|
28
28
|
)
|
|
29
29
|
from .core import PytorchChatModel, PytorchGenerateConfig
|
|
30
|
+
from .utils import cache_clean
|
|
30
31
|
|
|
31
32
|
logger = logging.getLogger(__name__)
|
|
32
33
|
|
|
@@ -326,6 +327,7 @@ class InternVLChatModel(PytorchChatModel):
|
|
|
326
327
|
use_fast=False,
|
|
327
328
|
)
|
|
328
329
|
|
|
330
|
+
@cache_clean
|
|
329
331
|
def chat(
|
|
330
332
|
self,
|
|
331
333
|
messages: List[Dict],
|
|
@@ -29,6 +29,7 @@ from ..utils import (
|
|
|
29
29
|
parse_messages,
|
|
30
30
|
)
|
|
31
31
|
from .core import PytorchChatModel, PytorchGenerateConfig
|
|
32
|
+
from .utils import cache_clean
|
|
32
33
|
|
|
33
34
|
logger = logging.getLogger(__name__)
|
|
34
35
|
|
|
@@ -119,6 +120,7 @@ class MiniCPMV25Model(PytorchChatModel):
|
|
|
119
120
|
raise RuntimeError("Only one image per message is supported")
|
|
120
121
|
return content, []
|
|
121
122
|
|
|
123
|
+
@cache_clean
|
|
122
124
|
def chat(
|
|
123
125
|
self,
|
|
124
126
|
messages: List[Dict],
|
|
@@ -30,6 +30,7 @@ from ..utils import (
|
|
|
30
30
|
parse_messages,
|
|
31
31
|
)
|
|
32
32
|
from .core import PytorchChatModel, PytorchGenerateConfig
|
|
33
|
+
from .utils import cache_clean
|
|
33
34
|
|
|
34
35
|
logger = logging.getLogger(__name__)
|
|
35
36
|
|
|
@@ -198,6 +199,7 @@ class MiniCPMV26Model(PytorchChatModel):
|
|
|
198
199
|
msgs.append({"role": "user", "content": images_chat + [content]})
|
|
199
200
|
return msgs, video_existed
|
|
200
201
|
|
|
202
|
+
@cache_clean
|
|
201
203
|
def chat(
|
|
202
204
|
self,
|
|
203
205
|
messages: List[Dict],
|
|
@@ -24,6 +24,7 @@ from ...utils import select_device
|
|
|
24
24
|
from ..llm_family import LLMFamilyV1, LLMSpecV1
|
|
25
25
|
from ..utils import generate_chat_completion, parse_messages
|
|
26
26
|
from .core import PytorchChatModel, PytorchGenerateConfig
|
|
27
|
+
from .utils import cache_clean
|
|
27
28
|
|
|
28
29
|
logger = logging.getLogger(__name__)
|
|
29
30
|
|
|
@@ -87,6 +88,7 @@ class OmniLMMModel(PytorchChatModel):
|
|
|
87
88
|
return images, other_content
|
|
88
89
|
return [], [{"type": "text", "text": content}]
|
|
89
90
|
|
|
91
|
+
@cache_clean
|
|
90
92
|
def chat(
|
|
91
93
|
self,
|
|
92
94
|
messages: List[Dict],
|
|
@@ -14,16 +14,22 @@
|
|
|
14
14
|
import logging
|
|
15
15
|
import uuid
|
|
16
16
|
from io import BytesIO
|
|
17
|
-
from typing import
|
|
17
|
+
from typing import Iterator, List, Optional, Union
|
|
18
18
|
from urllib.request import urlopen
|
|
19
19
|
|
|
20
20
|
import numpy as np
|
|
21
21
|
|
|
22
22
|
from ....model.utils import select_device
|
|
23
|
-
from ....types import
|
|
23
|
+
from ....types import (
|
|
24
|
+
ChatCompletion,
|
|
25
|
+
ChatCompletionChunk,
|
|
26
|
+
ChatCompletionMessage,
|
|
27
|
+
CompletionChunk,
|
|
28
|
+
)
|
|
24
29
|
from ..llm_family import LLMFamilyV1, LLMSpecV1
|
|
25
30
|
from ..utils import generate_chat_completion, generate_completion_chunk
|
|
26
31
|
from .core import PytorchChatModel, PytorchGenerateConfig
|
|
32
|
+
from .utils import cache_clean
|
|
27
33
|
|
|
28
34
|
logger = logging.getLogger(__name__)
|
|
29
35
|
|
|
@@ -68,7 +74,7 @@ class Qwen2AudioChatModel(PytorchChatModel):
|
|
|
68
74
|
|
|
69
75
|
def _transform_messages(
|
|
70
76
|
self,
|
|
71
|
-
messages: List[
|
|
77
|
+
messages: List[ChatCompletionMessage],
|
|
72
78
|
):
|
|
73
79
|
import librosa
|
|
74
80
|
|
|
@@ -89,9 +95,10 @@ class Qwen2AudioChatModel(PytorchChatModel):
|
|
|
89
95
|
|
|
90
96
|
return text, audios
|
|
91
97
|
|
|
98
|
+
@cache_clean
|
|
92
99
|
def chat(
|
|
93
100
|
self,
|
|
94
|
-
messages: List[
|
|
101
|
+
messages: List[ChatCompletionMessage],
|
|
95
102
|
generate_config: Optional[PytorchGenerateConfig] = None,
|
|
96
103
|
) -> Union[ChatCompletion, Iterator[ChatCompletionChunk]]:
|
|
97
104
|
text, audios = self._transform_messages(messages)
|
|
@@ -27,6 +27,7 @@ from ....types import (
|
|
|
27
27
|
from ..llm_family import LLMFamilyV1, LLMSpecV1
|
|
28
28
|
from ..utils import generate_chat_completion, generate_completion_chunk
|
|
29
29
|
from .core import PytorchChatModel, PytorchGenerateConfig
|
|
30
|
+
from .utils import cache_clean
|
|
30
31
|
|
|
31
32
|
logger = logging.getLogger(__name__)
|
|
32
33
|
|
|
@@ -75,34 +76,7 @@ class Qwen2VLChatModel(PytorchChatModel):
|
|
|
75
76
|
self.model_path, device_map=device, trust_remote_code=True
|
|
76
77
|
).eval()
|
|
77
78
|
|
|
78
|
-
|
|
79
|
-
self,
|
|
80
|
-
messages: List[ChatCompletionMessage],
|
|
81
|
-
):
|
|
82
|
-
transformed_messages = []
|
|
83
|
-
for msg in messages:
|
|
84
|
-
new_content = []
|
|
85
|
-
role = msg["role"]
|
|
86
|
-
content = msg["content"]
|
|
87
|
-
if isinstance(content, str):
|
|
88
|
-
new_content.append({"type": "text", "text": content})
|
|
89
|
-
elif isinstance(content, List):
|
|
90
|
-
for item in content: # type: ignore
|
|
91
|
-
if "text" in item:
|
|
92
|
-
new_content.append({"type": "text", "text": item["text"]})
|
|
93
|
-
elif "image_url" in item:
|
|
94
|
-
new_content.append(
|
|
95
|
-
{"type": "image", "image": item["image_url"]["url"]}
|
|
96
|
-
)
|
|
97
|
-
elif "video_url" in item:
|
|
98
|
-
new_content.append(
|
|
99
|
-
{"type": "video", "video": item["video_url"]["url"]}
|
|
100
|
-
)
|
|
101
|
-
new_message = {"role": role, "content": new_content}
|
|
102
|
-
transformed_messages.append(new_message)
|
|
103
|
-
|
|
104
|
-
return transformed_messages
|
|
105
|
-
|
|
79
|
+
@cache_clean
|
|
106
80
|
def chat(
|
|
107
81
|
self,
|
|
108
82
|
messages: List[ChatCompletionMessage], # type: ignore
|
|
@@ -28,7 +28,7 @@ from ....types import ChatCompletion, ChatCompletionChunk, CompletionChunk
|
|
|
28
28
|
from ..llm_family import LLMFamilyV1, LLMSpecV1
|
|
29
29
|
from ..utils import generate_chat_completion, generate_completion_chunk
|
|
30
30
|
from .core import PytorchChatModel, PytorchGenerateConfig
|
|
31
|
-
from .utils import pad_prefill_tokens
|
|
31
|
+
from .utils import cache_clean, pad_prefill_tokens
|
|
32
32
|
|
|
33
33
|
logger = logging.getLogger(__name__)
|
|
34
34
|
|
|
@@ -137,6 +137,7 @@ class QwenVLChatModel(PytorchChatModel):
|
|
|
137
137
|
prompt = self._message_content_to_qwen(messages[-1]["content"])
|
|
138
138
|
return prompt, qwen_history
|
|
139
139
|
|
|
140
|
+
@cache_clean
|
|
140
141
|
def chat(
|
|
141
142
|
self,
|
|
142
143
|
messages: List[Dict],
|
|
@@ -11,7 +11,8 @@
|
|
|
11
11
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
|
-
|
|
14
|
+
import asyncio
|
|
15
|
+
import functools
|
|
15
16
|
import gc
|
|
16
17
|
import logging
|
|
17
18
|
import os
|
|
@@ -126,7 +127,8 @@ def generate_stream(
|
|
|
126
127
|
echo = bool(generate_config.get("echo", False))
|
|
127
128
|
stop_str = generate_config.get("stop", None)
|
|
128
129
|
stop_token_ids = generate_config.get("stop_token_ids", None) or []
|
|
129
|
-
|
|
130
|
+
if tokenizer.eos_token_id not in stop_token_ids:
|
|
131
|
+
stop_token_ids.append(tokenizer.eos_token_id)
|
|
130
132
|
chunk_id = str(uuid.uuid4())
|
|
131
133
|
|
|
132
134
|
logits_processor = prepare_logits_processor(
|
|
@@ -776,3 +778,34 @@ def batch_inference_one_step(
|
|
|
776
778
|
for r in req_list:
|
|
777
779
|
r.stopped = True
|
|
778
780
|
r.error_msg = str(e)
|
|
781
|
+
|
|
782
|
+
|
|
783
|
+
def cache_clean(fn):
|
|
784
|
+
@functools.wraps(fn)
|
|
785
|
+
async def _async_wrapper(self, *args, **kwargs):
|
|
786
|
+
import gc
|
|
787
|
+
|
|
788
|
+
from ....device_utils import empty_cache
|
|
789
|
+
|
|
790
|
+
result = await fn(self, *args, **kwargs)
|
|
791
|
+
|
|
792
|
+
gc.collect()
|
|
793
|
+
empty_cache()
|
|
794
|
+
return result
|
|
795
|
+
|
|
796
|
+
@functools.wraps(fn)
|
|
797
|
+
def _wrapper(self, *args, **kwargs):
|
|
798
|
+
import gc
|
|
799
|
+
|
|
800
|
+
from ....device_utils import empty_cache
|
|
801
|
+
|
|
802
|
+
result = fn(self, *args, **kwargs)
|
|
803
|
+
|
|
804
|
+
gc.collect()
|
|
805
|
+
empty_cache()
|
|
806
|
+
return result
|
|
807
|
+
|
|
808
|
+
if asyncio.iscoroutinefunction(fn):
|
|
809
|
+
return _async_wrapper
|
|
810
|
+
else:
|
|
811
|
+
return _wrapper
|
|
@@ -29,6 +29,7 @@ from ..utils import (
|
|
|
29
29
|
parse_messages,
|
|
30
30
|
)
|
|
31
31
|
from .core import PytorchChatModel, PytorchGenerateConfig
|
|
32
|
+
from .utils import cache_clean
|
|
32
33
|
|
|
33
34
|
logger = logging.getLogger(__name__)
|
|
34
35
|
|
|
@@ -99,6 +100,7 @@ class YiVLChatModel(PytorchChatModel):
|
|
|
99
100
|
raise RuntimeError("Only one image per message is supported by Yi VL.")
|
|
100
101
|
return content
|
|
101
102
|
|
|
103
|
+
@cache_clean
|
|
102
104
|
def chat(
|
|
103
105
|
self,
|
|
104
106
|
messages: List[Dict],
|