xinference 0.15.2__py3-none-any.whl → 0.15.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of xinference might be problematic. Click here for more details.
- xinference/_version.py +3 -3
- xinference/api/restful_api.py +29 -2
- xinference/client/restful/restful_client.py +10 -0
- xinference/constants.py +4 -0
- xinference/core/image_interface.py +76 -23
- xinference/core/model.py +80 -39
- xinference/core/progress_tracker.py +187 -0
- xinference/core/supervisor.py +11 -0
- xinference/core/worker.py +1 -0
- xinference/model/audio/chattts.py +2 -1
- xinference/model/audio/core.py +0 -2
- xinference/model/audio/model_spec.json +8 -0
- xinference/model/audio/model_spec_modelscope.json +9 -0
- xinference/model/embedding/core.py +14 -5
- xinference/model/embedding/model_spec.json +7 -0
- xinference/model/embedding/model_spec_modelscope.json +9 -1
- xinference/model/image/core.py +6 -7
- xinference/model/image/sdapi.py +35 -4
- xinference/model/image/stable_diffusion/core.py +212 -70
- xinference/model/llm/llm_family.json +28 -40
- xinference/model/llm/llm_family_modelscope.json +18 -22
- xinference/model/llm/transformers/cogvlm2.py +2 -1
- xinference/model/llm/transformers/cogvlm2_video.py +2 -0
- xinference/model/llm/transformers/core.py +6 -2
- xinference/model/llm/transformers/deepseek_vl.py +2 -0
- xinference/model/llm/transformers/glm4v.py +2 -1
- xinference/model/llm/transformers/intern_vl.py +2 -0
- xinference/model/llm/transformers/minicpmv25.py +2 -0
- xinference/model/llm/transformers/minicpmv26.py +2 -0
- xinference/model/llm/transformers/omnilmm.py +2 -0
- xinference/model/llm/transformers/qwen2_audio.py +11 -4
- xinference/model/llm/transformers/qwen2_vl.py +2 -28
- xinference/model/llm/transformers/qwen_vl.py +2 -1
- xinference/model/llm/transformers/utils.py +35 -2
- xinference/model/llm/transformers/yi_vl.py +2 -0
- xinference/model/llm/utils.py +72 -17
- xinference/model/llm/vllm/core.py +69 -9
- xinference/model/llm/vllm/utils.py +41 -0
- xinference/model/rerank/core.py +19 -0
- xinference/model/rerank/model_spec.json +8 -0
- xinference/model/rerank/model_spec_modelscope.json +8 -0
- xinference/model/utils.py +7 -29
- xinference/model/video/core.py +0 -2
- xinference/web/ui/build/asset-manifest.json +3 -3
- xinference/web/ui/build/index.html +1 -1
- xinference/web/ui/build/static/js/{main.29578905.js → main.e51a356d.js} +3 -3
- xinference/web/ui/build/static/js/main.e51a356d.js.map +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/4385c1095eefbff0a8ec3b2964ba6e5a66a05ab31be721483ca2f43e2a91f6ff.json +1 -0
- {xinference-0.15.2.dist-info → xinference-0.15.4.dist-info}/METADATA +6 -5
- {xinference-0.15.2.dist-info → xinference-0.15.4.dist-info}/RECORD +55 -53
- xinference/web/ui/build/static/js/main.29578905.js.map +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/68bede6d95bb5ef0b35bbb3ec5b8c937eaf6862c6cdbddb5ef222a7776aaf336.json +0 -1
- /xinference/web/ui/build/static/js/{main.29578905.js.LICENSE.txt → main.e51a356d.js.LICENSE.txt} +0 -0
- {xinference-0.15.2.dist-info → xinference-0.15.4.dist-info}/LICENSE +0 -0
- {xinference-0.15.2.dist-info → xinference-0.15.4.dist-info}/WHEEL +0 -0
- {xinference-0.15.2.dist-info → xinference-0.15.4.dist-info}/entry_points.txt +0 -0
- {xinference-0.15.2.dist-info → xinference-0.15.4.dist-info}/top_level.txt +0 -0
|
@@ -1111,7 +1111,8 @@
|
|
|
1111
1111
|
"th"
|
|
1112
1112
|
],
|
|
1113
1113
|
"model_ability": [
|
|
1114
|
-
"chat"
|
|
1114
|
+
"chat",
|
|
1115
|
+
"tools"
|
|
1115
1116
|
],
|
|
1116
1117
|
"model_description": "The Llama 3.1 instruction tuned models are optimized for dialogue use cases and outperform many of the available open source chat models on common industry benchmarks..",
|
|
1117
1118
|
"model_specs": [
|
|
@@ -1299,14 +1300,16 @@
|
|
|
1299
1300
|
"model_id": "hugging-quants/Meta-Llama-3.1-405B-Instruct-AWQ-INT4"
|
|
1300
1301
|
}
|
|
1301
1302
|
],
|
|
1302
|
-
"chat_template": "{{-
|
|
1303
|
+
"chat_template": "{{- bos_token }}\n{%- if custom_tools is defined %}\n {%- set tools = custom_tools %}\n{%- endif %}\n{%- if not tools_in_user_message is defined %}\n {%- set tools_in_user_message = true %}\n{%- endif %}\n{%- if not date_string is defined %}\n {%- set date_string = \"26 Jul 2024\" %}\n{%- endif %}\n{%- if not tools is defined %}\n {%- set tools = none %}\n{%- endif %}\n\n{#- This block extracts the system message, so we can slot it into the right place. #}\n{%- if messages[0]['role'] == 'system' %}\n {%- set system_message = messages[0]['content']|trim %}\n {%- set messages = messages[1:] %}\n{%- else %}\n {%- set system_message = \"\" %}\n{%- endif %}\n\n{#- System message + builtin tools #}\n{{- \"<|start_header_id|>system<|end_header_id|>\\n\\n\" }}\n{%- if builtin_tools is defined or tools is not none %}\n {{- \"Environment: ipython\\n\" }}\n{%- endif %}\n{%- if builtin_tools is defined %}\n {{- \"Tools: \" + builtin_tools | reject('equalto', 'code_interpreter') | join(\", \") + \"\\n\\n\"}}\n{%- endif %}\n{{- \"Cutting Knowledge Date: December 2023\\n\" }}\n{{- \"Today Date: \" + date_string + \"\\n\\n\" }}\n{%- if tools is not none and not tools_in_user_message %}\n {{- \"You have access to the following functions. To call a function, please respond with JSON for a function call.\" }}\n {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n {{- \"Do not use variables.\\n\\n\" }}\n {%- for t in tools %}\n {{- t | tojson(indent=4) }}\n {{- \"\\n\\n\" }}\n {%- endfor %}\n{%- endif %}\n{{- system_message }}\n{{- \"<|eot_id|>\" }}\n\n{#- Custom tools are passed in a user message with some extra guidance #}\n{%- if tools_in_user_message and not tools is none %}\n {#- Extract the first user message so we can plug it in here #}\n {%- if messages | length != 0 %}\n {%- set first_user_message = messages[0]['content']|trim %}\n {%- set messages = messages[1:] %}\n {%- else %}\n {{- raise_exception(\"Cannot put tools in the first user message when there's no first user message!\") }}\n{%- endif %}\n {{- '<|start_header_id|>user<|end_header_id|>\\n\\n' -}}\n {{- \"Given the following functions, please respond with a JSON for a function call \" }}\n {{- \"with its proper arguments that best answers the given prompt.\\n\\n\" }}\n {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n {{- \"Do not use variables.\\n\\n\" }}\n {%- for t in tools %}\n {{- t | tojson(indent=4) }}\n {{- \"\\n\\n\" }}\n {%- endfor %}\n {{- first_user_message + \"<|eot_id|>\"}}\n{%- endif %}\n\n{%- for message in messages %}\n {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %}\n {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\\n\\n'+ message['content'] | trim + '<|eot_id|>' }}\n {%- elif 'tool_calls' in message %}\n {%- if not message.tool_calls|length == 1 %}\n {{- raise_exception(\"This model only supports single tool-calls at once!\") }}\n {%- endif %}\n {%- set tool_call = message.tool_calls[0].function %}\n {%- if builtin_tools is defined and tool_call.name in builtin_tools %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' -}}\n {{- \"<|python_tag|>\" + tool_call.name + \".call(\" }}\n {%- for arg_name, arg_val in tool_call.arguments | items %}\n {{- arg_name + '=\"' + arg_val + '\"' }}\n {%- if not loop.last %}\n {{- \", \" }}\n {%- endif %}\n {%- endfor %}\n {{- \")\" }}\n {%- else %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' -}}\n {{- '{\"name\": \"' + tool_call.name + '\", ' }}\n {{- '\"parameters\": ' }}\n {{- tool_call.arguments | tojson }}\n {{- \"}\" }}\n {%- endif %}\n {%- if builtin_tools is defined %}\n {#- This means we're in ipython mode #}\n {{- \"<|eom_id|>\" }}\n {%- else %}\n {{- \"<|eot_id|>\" }}\n {%- endif %}\n {%- elif message.role == \"tool\" or message.role == \"ipython\" %}\n {{- \"<|start_header_id|>ipython<|end_header_id|>\\n\\n\" }}\n {%- if message.content is mapping or message.content is iterable %}\n {{- message.content | tojson }}\n {%- else %}\n {{- message.content }}\n {%- endif %}\n {{- \"<|eot_id|>\" }}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' }}\n{%- endif %}\n",
|
|
1303
1304
|
"stop_token_ids": [
|
|
1304
1305
|
128001,
|
|
1306
|
+
128008,
|
|
1305
1307
|
128009
|
|
1306
1308
|
],
|
|
1307
1309
|
"stop": [
|
|
1308
1310
|
"<|end_of_text|>",
|
|
1309
|
-
"<|eot_id|>"
|
|
1311
|
+
"<|eot_id|>",
|
|
1312
|
+
"<|eom_id|>"
|
|
1310
1313
|
]
|
|
1311
1314
|
},
|
|
1312
1315
|
{
|
|
@@ -6483,8 +6486,7 @@
|
|
|
6483
6486
|
"8-bit",
|
|
6484
6487
|
"none"
|
|
6485
6488
|
],
|
|
6486
|
-
"model_id": "OpenGVLab/InternVL2-1B"
|
|
6487
|
-
"model_revision": "a9fc14aea824b6ea1d44f8778cad6b35512c4ce1"
|
|
6489
|
+
"model_id": "OpenGVLab/InternVL2-1B"
|
|
6488
6490
|
},
|
|
6489
6491
|
{
|
|
6490
6492
|
"model_format": "pytorch",
|
|
@@ -6494,8 +6496,7 @@
|
|
|
6494
6496
|
"8-bit",
|
|
6495
6497
|
"none"
|
|
6496
6498
|
],
|
|
6497
|
-
"model_id": "OpenGVLab/InternVL2-2B"
|
|
6498
|
-
"model_revision": "422ad7c6335917bfb514958233955512338485a6"
|
|
6499
|
+
"model_id": "OpenGVLab/InternVL2-2B"
|
|
6499
6500
|
},
|
|
6500
6501
|
{
|
|
6501
6502
|
"model_format": "awq",
|
|
@@ -6503,8 +6504,7 @@
|
|
|
6503
6504
|
"quantizations": [
|
|
6504
6505
|
"Int4"
|
|
6505
6506
|
],
|
|
6506
|
-
"model_id": "OpenGVLab/InternVL2-2B-AWQ"
|
|
6507
|
-
"model_revision": "701bc3fc098a8a3b686b3b4135cfb77202be89e0"
|
|
6507
|
+
"model_id": "OpenGVLab/InternVL2-2B-AWQ"
|
|
6508
6508
|
},
|
|
6509
6509
|
{
|
|
6510
6510
|
"model_format": "pytorch",
|
|
@@ -6514,8 +6514,7 @@
|
|
|
6514
6514
|
"8-bit",
|
|
6515
6515
|
"none"
|
|
6516
6516
|
],
|
|
6517
|
-
"model_id": "OpenGVLab/InternVL2-4B"
|
|
6518
|
-
"model_revision": "b50544dafada6c41e80bfde2f57cc9b0140fc21c"
|
|
6517
|
+
"model_id": "OpenGVLab/InternVL2-4B"
|
|
6519
6518
|
},
|
|
6520
6519
|
{
|
|
6521
6520
|
"model_format": "pytorch",
|
|
@@ -6525,8 +6524,7 @@
|
|
|
6525
6524
|
"8-bit",
|
|
6526
6525
|
"none"
|
|
6527
6526
|
],
|
|
6528
|
-
"model_id": "OpenGVLab/InternVL2-8B"
|
|
6529
|
-
"model_revision": "3bfd3664dea4f3da628785f5125d30f889701253"
|
|
6527
|
+
"model_id": "OpenGVLab/InternVL2-8B"
|
|
6530
6528
|
},
|
|
6531
6529
|
{
|
|
6532
6530
|
"model_format": "awq",
|
|
@@ -6534,8 +6532,7 @@
|
|
|
6534
6532
|
"quantizations": [
|
|
6535
6533
|
"Int4"
|
|
6536
6534
|
],
|
|
6537
|
-
"model_id": "OpenGVLab/InternVL2-8B-AWQ"
|
|
6538
|
-
"model_revision": "9f1a4756b7ae18eb26d8a22b618dfc283e8193b3"
|
|
6535
|
+
"model_id": "OpenGVLab/InternVL2-8B-AWQ"
|
|
6539
6536
|
},
|
|
6540
6537
|
{
|
|
6541
6538
|
"model_format": "pytorch",
|
|
@@ -6545,8 +6542,7 @@
|
|
|
6545
6542
|
"8-bit",
|
|
6546
6543
|
"none"
|
|
6547
6544
|
],
|
|
6548
|
-
"model_id": "OpenGVLab/InternVL2-26B"
|
|
6549
|
-
"model_revision": "b9f3c7e6d575b0115e076a3ffc46fd20b7586899"
|
|
6545
|
+
"model_id": "OpenGVLab/InternVL2-26B"
|
|
6550
6546
|
},
|
|
6551
6547
|
{
|
|
6552
6548
|
"model_format": "awq",
|
|
@@ -6554,8 +6550,7 @@
|
|
|
6554
6550
|
"quantizations": [
|
|
6555
6551
|
"Int4"
|
|
6556
6552
|
],
|
|
6557
|
-
"model_id": "OpenGVLab/InternVL2-26B-AWQ"
|
|
6558
|
-
"model_revision": "469e0019ffd251e22ff6501a5c2321964e86ef0d"
|
|
6553
|
+
"model_id": "OpenGVLab/InternVL2-26B-AWQ"
|
|
6559
6554
|
},
|
|
6560
6555
|
{
|
|
6561
6556
|
"model_format": "pytorch",
|
|
@@ -6565,8 +6560,7 @@
|
|
|
6565
6560
|
"8-bit",
|
|
6566
6561
|
"none"
|
|
6567
6562
|
],
|
|
6568
|
-
"model_id": "OpenGVLab/InternVL2-40B"
|
|
6569
|
-
"model_revision": "725a12063bb855c966e30a0617d0ccd9e870d772"
|
|
6563
|
+
"model_id": "OpenGVLab/InternVL2-40B"
|
|
6570
6564
|
},
|
|
6571
6565
|
{
|
|
6572
6566
|
"model_format": "awq",
|
|
@@ -6574,8 +6568,7 @@
|
|
|
6574
6568
|
"quantizations": [
|
|
6575
6569
|
"Int4"
|
|
6576
6570
|
],
|
|
6577
|
-
"model_id": "OpenGVLab/InternVL2-40B-AWQ"
|
|
6578
|
-
"model_revision": "d92e140f6dfe8ea9679924c6a31898f42c4e1846"
|
|
6571
|
+
"model_id": "OpenGVLab/InternVL2-40B-AWQ"
|
|
6579
6572
|
},
|
|
6580
6573
|
{
|
|
6581
6574
|
"model_format": "pytorch",
|
|
@@ -6585,8 +6578,7 @@
|
|
|
6585
6578
|
"8-bit",
|
|
6586
6579
|
"none"
|
|
6587
6580
|
],
|
|
6588
|
-
"model_id": "OpenGVLab/InternVL2-Llama3-76B"
|
|
6589
|
-
"model_revision": "cf7914905f78e9e3560ddbd6f5dfc39becac494f"
|
|
6581
|
+
"model_id": "OpenGVLab/InternVL2-Llama3-76B"
|
|
6590
6582
|
},
|
|
6591
6583
|
{
|
|
6592
6584
|
"model_format": "awq",
|
|
@@ -6594,8 +6586,7 @@
|
|
|
6594
6586
|
"quantizations": [
|
|
6595
6587
|
"Int4"
|
|
6596
6588
|
],
|
|
6597
|
-
"model_id": "OpenGVLab/InternVL2-Llama3-76B-AWQ"
|
|
6598
|
-
"model_revision": "1bc796bf80f2ebc7d6a14c15f55217a4600d50a4"
|
|
6589
|
+
"model_id": "OpenGVLab/InternVL2-Llama3-76B-AWQ"
|
|
6599
6590
|
}
|
|
6600
6591
|
],
|
|
6601
6592
|
"chat_template": "{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n' }}{% endif %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
|
|
@@ -6918,18 +6909,15 @@
|
|
|
6918
6909
|
"model_id":"Qwen/Qwen2-VL-72B-Instruct-GPTQ-{quantization}"
|
|
6919
6910
|
}
|
|
6920
6911
|
],
|
|
6921
|
-
"
|
|
6922
|
-
|
|
6923
|
-
|
|
6924
|
-
|
|
6925
|
-
|
|
6926
|
-
|
|
6927
|
-
|
|
6928
|
-
"
|
|
6929
|
-
|
|
6930
|
-
"<|endoftext|>"
|
|
6931
|
-
]
|
|
6932
|
-
}
|
|
6912
|
+
"chat_template": "{% set image_count = namespace(value=0) %}{% set video_count = namespace(value=0) %}{% for message in messages %}{% if loop.first and message['role'] != 'system' %}<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n{% endif %}<|im_start|>{{ message['role'] }}\n{% if message['content'] is string %}{{ message['content'] }}<|im_end|>\n{% else %}{% for content in message['content'] %}{% if content['type'] == 'image' or 'image' in content or 'image_url' in content %}{% set image_count.value = image_count.value + 1 %}{% if add_vision_id %}Picture {{ image_count.value }}: {% endif %}<|vision_start|><|image_pad|><|vision_end|>{% elif content['type'] == 'video' or 'video' in content %}{% set video_count.value = video_count.value + 1 %}{% if add_vision_id %}Video {{ video_count.value }}: {% endif %}<|vision_start|><|video_pad|><|vision_end|>{% elif 'text' in content %}{{ content['text'] }}{% endif %}{% endfor %}<|im_end|>\n{% endif %}{% endfor %}{% if add_generation_prompt %}<|im_start|>assistant\n{% endif %}",
|
|
6913
|
+
"stop_token_ids": [
|
|
6914
|
+
151645,
|
|
6915
|
+
151643
|
|
6916
|
+
],
|
|
6917
|
+
"stop": [
|
|
6918
|
+
"<|im_end|>",
|
|
6919
|
+
"<|endoftext|>"
|
|
6920
|
+
]
|
|
6933
6921
|
},
|
|
6934
6922
|
{
|
|
6935
6923
|
"version": 1,
|
|
@@ -7937,7 +7925,7 @@
|
|
|
7937
7925
|
}
|
|
7938
7926
|
}
|
|
7939
7927
|
],
|
|
7940
|
-
"chat_template": "{%- if tools %}\n {{- '<|im_start|>system\\n' }}\n {%- if messages[0]['role'] == 'system' %}\n {{- messages[0]['content'] }}\n {%- else %}\n {{- 'You are Qwen, created by Alibaba Cloud. You are a helpful assistant.' }}\n {%- endif %}\n {{- \"\\n\\n# Tools\\n\\nYou may call one or more functions to assist with the user query.\\n\\nYou are provided with function signatures within <tools></tools> XML tags:\\n<tools>\" }}\n {%- for tool in tools %}\n {{- \"\\n\" }}\n {{- tool | tojson }}\n {%- endfor %}\n {{- \"\\n</tools>\\n\\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\\n<tool_call>\\n{
|
|
7928
|
+
"chat_template": "{%- if tools %}\n {{- '<|im_start|>system\\n' }}\n {%- if messages[0]['role'] == 'system' %}\n {{- messages[0]['content'] }}\n {%- else %}\n {{- 'You are Qwen, created by Alibaba Cloud. You are a helpful assistant.' }}\n {%- endif %}\n {{- \"\\n\\n# Tools\\n\\nYou may call one or more functions to assist with the user query.\\n\\nYou are provided with function signatures within <tools></tools> XML tags:\\n<tools>\" }}\n {%- for tool in tools %}\n {{- \"\\n\" }}\n {{- tool | tojson }}\n {%- endfor %}\n {{- \"\\n</tools>\\n\\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\\n<tool_call>\\n{\\\"name\\\": <function-name>, \\\"arguments\\\": <args-json-object>}\\n</tool_call><|im_end|>\\n\" }}\n{%- else %}\n {%- if messages[0]['role'] == 'system' %}\n {{- '<|im_start|>system\\n' + messages[0]['content'] + '<|im_end|>\\n' }}\n {%- else %}\n {{- '<|im_start|>system\\nYou are Qwen, created by Alibaba Cloud. You are a helpful assistant.<|im_end|>\\n' }}\n {%- endif %}\n{%- endif %}\n{%- for message in messages %}\n {%- if (message.role == \"user\") or (message.role == \"system\" and not loop.first) or (message.role == \"assistant\" and not message.tool_calls) %}\n {{- '<|im_start|>' + message.role + '\\n' + message.content + '<|im_end|>' + '\\n' }}\n {%- elif message.role == \"assistant\" %}\n {{- '<|im_start|>' + message.role }}\n {%- if message.content %}\n {{- '\\n' + message.content }}\n {%- endif %}\n {%- for tool_call in message.tool_calls %}\n {%- if tool_call.function is defined %}\n {%- set tool_call = tool_call.function %}\n {%- endif %}\n {{- '\\n<tool_call>\\n{\"name\": \"' }}\n {{- tool_call.name }}\n {{- '\", \"arguments\": ' }}\n {{- tool_call.arguments | tojson }}\n {{- '}\\n</tool_call>' }}\n {%- endfor %}\n {{- '<|im_end|>\\n' }}\n {%- elif message.role == \"tool\" %}\n {%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != \"tool\") %}\n {{- '<|im_start|>user' }}\n {%- endif %}\n {{- '\\n<tool_response>\\n' }}\n {{- message.content }}\n {{- '\\n</tool_response>' }}\n {%- if loop.last or (messages[loop.index0 + 1].role != \"tool\") %}\n {{- '<|im_end|>\\n' }}\n {%- endif %}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|im_start|>assistant\\n' }}\n{%- endif %}\n",
|
|
7941
7929
|
"stop_token_ids": [
|
|
7942
7930
|
151643,
|
|
7943
7931
|
151644,
|
|
@@ -246,7 +246,8 @@
|
|
|
246
246
|
"th"
|
|
247
247
|
],
|
|
248
248
|
"model_ability": [
|
|
249
|
-
"chat"
|
|
249
|
+
"chat",
|
|
250
|
+
"tools"
|
|
250
251
|
],
|
|
251
252
|
"model_description": "The Llama 3.1 instruction tuned models are optimized for dialogue use cases and outperform many of the available open source chat models on common industry benchmarks..",
|
|
252
253
|
"model_specs": [
|
|
@@ -350,14 +351,16 @@
|
|
|
350
351
|
"model_hub": "modelscope"
|
|
351
352
|
}
|
|
352
353
|
],
|
|
353
|
-
"chat_template": "{{-
|
|
354
|
+
"chat_template": "{{- bos_token }}\n{%- if custom_tools is defined %}\n {%- set tools = custom_tools %}\n{%- endif %}\n{%- if not tools_in_user_message is defined %}\n {%- set tools_in_user_message = true %}\n{%- endif %}\n{%- if not date_string is defined %}\n {%- set date_string = \"26 Jul 2024\" %}\n{%- endif %}\n{%- if not tools is defined %}\n {%- set tools = none %}\n{%- endif %}\n\n{#- This block extracts the system message, so we can slot it into the right place. #}\n{%- if messages[0]['role'] == 'system' %}\n {%- set system_message = messages[0]['content']|trim %}\n {%- set messages = messages[1:] %}\n{%- else %}\n {%- set system_message = \"\" %}\n{%- endif %}\n\n{#- System message + builtin tools #}\n{{- \"<|start_header_id|>system<|end_header_id|>\\n\\n\" }}\n{%- if builtin_tools is defined or tools is not none %}\n {{- \"Environment: ipython\\n\" }}\n{%- endif %}\n{%- if builtin_tools is defined %}\n {{- \"Tools: \" + builtin_tools | reject('equalto', 'code_interpreter') | join(\", \") + \"\\n\\n\"}}\n{%- endif %}\n{{- \"Cutting Knowledge Date: December 2023\\n\" }}\n{{- \"Today Date: \" + date_string + \"\\n\\n\" }}\n{%- if tools is not none and not tools_in_user_message %}\n {{- \"You have access to the following functions. To call a function, please respond with JSON for a function call.\" }}\n {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n {{- \"Do not use variables.\\n\\n\" }}\n {%- for t in tools %}\n {{- t | tojson(indent=4) }}\n {{- \"\\n\\n\" }}\n {%- endfor %}\n{%- endif %}\n{{- system_message }}\n{{- \"<|eot_id|>\" }}\n\n{#- Custom tools are passed in a user message with some extra guidance #}\n{%- if tools_in_user_message and not tools is none %}\n {#- Extract the first user message so we can plug it in here #}\n {%- if messages | length != 0 %}\n {%- set first_user_message = messages[0]['content']|trim %}\n {%- set messages = messages[1:] %}\n {%- else %}\n {{- raise_exception(\"Cannot put tools in the first user message when there's no first user message!\") }}\n{%- endif %}\n {{- '<|start_header_id|>user<|end_header_id|>\\n\\n' -}}\n {{- \"Given the following functions, please respond with a JSON for a function call \" }}\n {{- \"with its proper arguments that best answers the given prompt.\\n\\n\" }}\n {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n {{- \"Do not use variables.\\n\\n\" }}\n {%- for t in tools %}\n {{- t | tojson(indent=4) }}\n {{- \"\\n\\n\" }}\n {%- endfor %}\n {{- first_user_message + \"<|eot_id|>\"}}\n{%- endif %}\n\n{%- for message in messages %}\n {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %}\n {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\\n\\n'+ message['content'] | trim + '<|eot_id|>' }}\n {%- elif 'tool_calls' in message %}\n {%- if not message.tool_calls|length == 1 %}\n {{- raise_exception(\"This model only supports single tool-calls at once!\") }}\n {%- endif %}\n {%- set tool_call = message.tool_calls[0].function %}\n {%- if builtin_tools is defined and tool_call.name in builtin_tools %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' -}}\n {{- \"<|python_tag|>\" + tool_call.name + \".call(\" }}\n {%- for arg_name, arg_val in tool_call.arguments | items %}\n {{- arg_name + '=\"' + arg_val + '\"' }}\n {%- if not loop.last %}\n {{- \", \" }}\n {%- endif %}\n {%- endfor %}\n {{- \")\" }}\n {%- else %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' -}}\n {{- '{\"name\": \"' + tool_call.name + '\", ' }}\n {{- '\"parameters\": ' }}\n {{- tool_call.arguments | tojson }}\n {{- \"}\" }}\n {%- endif %}\n {%- if builtin_tools is defined %}\n {#- This means we're in ipython mode #}\n {{- \"<|eom_id|>\" }}\n {%- else %}\n {{- \"<|eot_id|>\" }}\n {%- endif %}\n {%- elif message.role == \"tool\" or message.role == \"ipython\" %}\n {{- \"<|start_header_id|>ipython<|end_header_id|>\\n\\n\" }}\n {%- if message.content is mapping or message.content is iterable %}\n {{- message.content | tojson }}\n {%- else %}\n {{- message.content }}\n {%- endif %}\n {{- \"<|eot_id|>\" }}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' }}\n{%- endif %}\n",
|
|
354
355
|
"stop_token_ids": [
|
|
355
356
|
128001,
|
|
357
|
+
128008,
|
|
356
358
|
128009
|
|
357
359
|
],
|
|
358
360
|
"stop": [
|
|
359
361
|
"<|end_of_text|>",
|
|
360
|
-
"<|eot_id|>"
|
|
362
|
+
"<|eot_id|>",
|
|
363
|
+
"<|eom_id|>"
|
|
361
364
|
]
|
|
362
365
|
},
|
|
363
366
|
{
|
|
@@ -4334,16 +4337,8 @@
|
|
|
4334
4337
|
}
|
|
4335
4338
|
],
|
|
4336
4339
|
"chat_template": "{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n' }}{% endif %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
|
|
4337
|
-
"stop_token_ids": [
|
|
4338
|
-
|
|
4339
|
-
151644,
|
|
4340
|
-
151645
|
|
4341
|
-
],
|
|
4342
|
-
"stop": [
|
|
4343
|
-
"<|endoftext|>",
|
|
4344
|
-
"<|im_start|>",
|
|
4345
|
-
"<|im_end|>"
|
|
4346
|
-
]
|
|
4340
|
+
"stop_token_ids": [],
|
|
4341
|
+
"stop": []
|
|
4347
4342
|
},
|
|
4348
4343
|
{
|
|
4349
4344
|
"version": 1,
|
|
@@ -4632,14 +4627,15 @@
|
|
|
4632
4627
|
"model_hub": "modelscope"
|
|
4633
4628
|
}
|
|
4634
4629
|
],
|
|
4635
|
-
"
|
|
4636
|
-
|
|
4637
|
-
|
|
4638
|
-
|
|
4639
|
-
|
|
4640
|
-
|
|
4641
|
-
|
|
4642
|
-
|
|
4630
|
+
"chat_template": "{% set image_count = namespace(value=0) %}{% set video_count = namespace(value=0) %}{% for message in messages %}{% if loop.first and message['role'] != 'system' %}<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n{% endif %}<|im_start|>{{ message['role'] }}\n{% if message['content'] is string %}{{ message['content'] }}<|im_end|>\n{% else %}{% for content in message['content'] %}{% if content['type'] == 'image' or 'image' in content or 'image_url' in content %}{% set image_count.value = image_count.value + 1 %}{% if add_vision_id %}Picture {{ image_count.value }}: {% endif %}<|vision_start|><|image_pad|><|vision_end|>{% elif content['type'] == 'video' or 'video' in content %}{% set video_count.value = video_count.value + 1 %}{% if add_vision_id %}Video {{ video_count.value }}: {% endif %}<|vision_start|><|video_pad|><|vision_end|>{% elif 'text' in content %}{{ content['text'] }}{% endif %}{% endfor %}<|im_end|>\n{% endif %}{% endfor %}{% if add_generation_prompt %}<|im_start|>assistant\n{% endif %}",
|
|
4631
|
+
"stop_token_ids": [
|
|
4632
|
+
151645,
|
|
4633
|
+
151643
|
|
4634
|
+
],
|
|
4635
|
+
"stop": [
|
|
4636
|
+
"<|im_end|>",
|
|
4637
|
+
"<|endoftext|>"
|
|
4638
|
+
]
|
|
4643
4639
|
},
|
|
4644
4640
|
{
|
|
4645
4641
|
"version": 1,
|
|
@@ -5687,7 +5683,7 @@
|
|
|
5687
5683
|
}
|
|
5688
5684
|
}
|
|
5689
5685
|
],
|
|
5690
|
-
"chat_template": "{%- if tools %}\n {{- '<|im_start|>system\\n' }}\n {%- if messages[0]['role'] == 'system' %}\n {{- messages[0]['content'] }}\n {%- else %}\n {{- 'You are Qwen, created by Alibaba Cloud. You are a helpful assistant.' }}\n {%- endif %}\n {{- \"\\n\\n# Tools\\n\\nYou may call one or more functions to assist with the user query.\\n\\nYou are provided with function signatures within <tools></tools> XML tags:\\n<tools>\" }}\n {%- for tool in tools %}\n {{- \"\\n\" }}\n {{- tool | tojson }}\n {%- endfor %}\n {{- \"\\n</tools>\\n\\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\\n<tool_call>\\n{
|
|
5686
|
+
"chat_template": "{%- if tools %}\n {{- '<|im_start|>system\\n' }}\n {%- if messages[0]['role'] == 'system' %}\n {{- messages[0]['content'] }}\n {%- else %}\n {{- 'You are Qwen, created by Alibaba Cloud. You are a helpful assistant.' }}\n {%- endif %}\n {{- \"\\n\\n# Tools\\n\\nYou may call one or more functions to assist with the user query.\\n\\nYou are provided with function signatures within <tools></tools> XML tags:\\n<tools>\" }}\n {%- for tool in tools %}\n {{- \"\\n\" }}\n {{- tool | tojson }}\n {%- endfor %}\n {{- \"\\n</tools>\\n\\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\\n<tool_call>\\n{\\\"name\\\": <function-name>, \\\"arguments\\\": <args-json-object>}\\n</tool_call><|im_end|>\\n\" }}\n{%- else %}\n {%- if messages[0]['role'] == 'system' %}\n {{- '<|im_start|>system\\n' + messages[0]['content'] + '<|im_end|>\\n' }}\n {%- else %}\n {{- '<|im_start|>system\\nYou are Qwen, created by Alibaba Cloud. You are a helpful assistant.<|im_end|>\\n' }}\n {%- endif %}\n{%- endif %}\n{%- for message in messages %}\n {%- if (message.role == \"user\") or (message.role == \"system\" and not loop.first) or (message.role == \"assistant\" and not message.tool_calls) %}\n {{- '<|im_start|>' + message.role + '\\n' + message.content + '<|im_end|>' + '\\n' }}\n {%- elif message.role == \"assistant\" %}\n {{- '<|im_start|>' + message.role }}\n {%- if message.content %}\n {{- '\\n' + message.content }}\n {%- endif %}\n {%- for tool_call in message.tool_calls %}\n {%- if tool_call.function is defined %}\n {%- set tool_call = tool_call.function %}\n {%- endif %}\n {{- '\\n<tool_call>\\n{\"name\": \"' }}\n {{- tool_call.name }}\n {{- '\", \"arguments\": ' }}\n {{- tool_call.arguments | tojson }}\n {{- '}\\n</tool_call>' }}\n {%- endfor %}\n {{- '<|im_end|>\\n' }}\n {%- elif message.role == \"tool\" %}\n {%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != \"tool\") %}\n {{- '<|im_start|>user' }}\n {%- endif %}\n {{- '\\n<tool_response>\\n' }}\n {{- message.content }}\n {{- '\\n</tool_response>' }}\n {%- if loop.last or (messages[loop.index0 + 1].role != \"tool\") %}\n {{- '<|im_end|>\\n' }}\n {%- endif %}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|im_start|>assistant\\n' }}\n{%- endif %}\n",
|
|
5691
5687
|
"stop_token_ids": [
|
|
5692
5688
|
151643,
|
|
5693
5689
|
151644,
|
|
@@ -29,7 +29,7 @@ from ..utils import (
|
|
|
29
29
|
parse_messages,
|
|
30
30
|
)
|
|
31
31
|
from .core import PytorchChatModel, PytorchGenerateConfig
|
|
32
|
-
from .utils import get_max_src_len
|
|
32
|
+
from .utils import cache_clean, get_max_src_len
|
|
33
33
|
|
|
34
34
|
logger = logging.getLogger(__name__)
|
|
35
35
|
|
|
@@ -176,6 +176,7 @@ class CogVLM2Model(PytorchChatModel):
|
|
|
176
176
|
query = content
|
|
177
177
|
return query, image, history
|
|
178
178
|
|
|
179
|
+
@cache_clean
|
|
179
180
|
def chat(
|
|
180
181
|
self,
|
|
181
182
|
messages: List[Dict],
|
|
@@ -28,6 +28,7 @@ from ..utils import (
|
|
|
28
28
|
parse_messages,
|
|
29
29
|
)
|
|
30
30
|
from .core import PytorchChatModel, PytorchGenerateConfig
|
|
31
|
+
from .utils import cache_clean
|
|
31
32
|
|
|
32
33
|
logger = logging.getLogger(__name__)
|
|
33
34
|
|
|
@@ -227,6 +228,7 @@ class CogVLM2VideoModel(PytorchChatModel):
|
|
|
227
228
|
|
|
228
229
|
return query, image, video, history
|
|
229
230
|
|
|
231
|
+
@cache_clean
|
|
230
232
|
def chat(
|
|
231
233
|
self,
|
|
232
234
|
messages: List[Dict],
|
|
@@ -40,7 +40,7 @@ from ....types import (
|
|
|
40
40
|
from ...utils import select_device
|
|
41
41
|
from ..core import LLM
|
|
42
42
|
from ..llm_family import LLMFamilyV1, LLMSpecV1
|
|
43
|
-
from ..utils import QWEN_TOOL_CALL_FAMILY, ChatModelMixin
|
|
43
|
+
from ..utils import LLAMA3_TOOL_CALL_FAMILY, QWEN_TOOL_CALL_FAMILY, ChatModelMixin
|
|
44
44
|
from .utils import get_context_length, get_max_src_len, pad_prefill_tokens
|
|
45
45
|
|
|
46
46
|
logger = logging.getLogger(__name__)
|
|
@@ -733,7 +733,11 @@ class PytorchChatModel(PytorchModel, ChatModelMixin):
|
|
|
733
733
|
tools = generate_config.pop("tools", []) if generate_config else None
|
|
734
734
|
model_family = self.model_family.model_family or self.model_family.model_name
|
|
735
735
|
full_context_kwargs = {}
|
|
736
|
-
if
|
|
736
|
+
if (
|
|
737
|
+
tools
|
|
738
|
+
and model_family in QWEN_TOOL_CALL_FAMILY
|
|
739
|
+
or model_family in LLAMA3_TOOL_CALL_FAMILY
|
|
740
|
+
):
|
|
737
741
|
full_context_kwargs["tools"] = tools
|
|
738
742
|
assert self.model_family.chat_template is not None
|
|
739
743
|
full_prompt = self.get_full_context(
|
|
@@ -28,6 +28,7 @@ from ....types import ChatCompletion, ChatCompletionChunk, CompletionChunk
|
|
|
28
28
|
from ..llm_family import LLMFamilyV1, LLMSpecV1
|
|
29
29
|
from ..utils import generate_chat_completion, generate_completion_chunk
|
|
30
30
|
from .core import PytorchChatModel, PytorchGenerateConfig
|
|
31
|
+
from .utils import cache_clean
|
|
31
32
|
|
|
32
33
|
logger = logging.getLogger(__name__)
|
|
33
34
|
|
|
@@ -137,6 +138,7 @@ class DeepSeekVLChatModel(PytorchChatModel):
|
|
|
137
138
|
return "".join(new_content), images
|
|
138
139
|
return content, []
|
|
139
140
|
|
|
141
|
+
@cache_clean
|
|
140
142
|
def chat(
|
|
141
143
|
self,
|
|
142
144
|
messages: List[Dict],
|
|
@@ -26,7 +26,7 @@ from ...utils import select_device
|
|
|
26
26
|
from ..llm_family import LLMFamilyV1, LLMSpecV1
|
|
27
27
|
from ..utils import _decode_image, generate_chat_completion, generate_completion_chunk
|
|
28
28
|
from .core import PytorchChatModel, PytorchGenerateConfig
|
|
29
|
-
from .utils import get_max_src_len
|
|
29
|
+
from .utils import cache_clean, get_max_src_len
|
|
30
30
|
|
|
31
31
|
logger = logging.getLogger(__name__)
|
|
32
32
|
|
|
@@ -129,6 +129,7 @@ class Glm4VModel(PytorchChatModel):
|
|
|
129
129
|
res.append({"role": role, "content": text})
|
|
130
130
|
return res
|
|
131
131
|
|
|
132
|
+
@cache_clean
|
|
132
133
|
def chat(
|
|
133
134
|
self,
|
|
134
135
|
messages: List[Dict],
|
|
@@ -27,6 +27,7 @@ from ..utils import (
|
|
|
27
27
|
parse_messages,
|
|
28
28
|
)
|
|
29
29
|
from .core import PytorchChatModel, PytorchGenerateConfig
|
|
30
|
+
from .utils import cache_clean
|
|
30
31
|
|
|
31
32
|
logger = logging.getLogger(__name__)
|
|
32
33
|
|
|
@@ -326,6 +327,7 @@ class InternVLChatModel(PytorchChatModel):
|
|
|
326
327
|
use_fast=False,
|
|
327
328
|
)
|
|
328
329
|
|
|
330
|
+
@cache_clean
|
|
329
331
|
def chat(
|
|
330
332
|
self,
|
|
331
333
|
messages: List[Dict],
|
|
@@ -29,6 +29,7 @@ from ..utils import (
|
|
|
29
29
|
parse_messages,
|
|
30
30
|
)
|
|
31
31
|
from .core import PytorchChatModel, PytorchGenerateConfig
|
|
32
|
+
from .utils import cache_clean
|
|
32
33
|
|
|
33
34
|
logger = logging.getLogger(__name__)
|
|
34
35
|
|
|
@@ -119,6 +120,7 @@ class MiniCPMV25Model(PytorchChatModel):
|
|
|
119
120
|
raise RuntimeError("Only one image per message is supported")
|
|
120
121
|
return content, []
|
|
121
122
|
|
|
123
|
+
@cache_clean
|
|
122
124
|
def chat(
|
|
123
125
|
self,
|
|
124
126
|
messages: List[Dict],
|
|
@@ -30,6 +30,7 @@ from ..utils import (
|
|
|
30
30
|
parse_messages,
|
|
31
31
|
)
|
|
32
32
|
from .core import PytorchChatModel, PytorchGenerateConfig
|
|
33
|
+
from .utils import cache_clean
|
|
33
34
|
|
|
34
35
|
logger = logging.getLogger(__name__)
|
|
35
36
|
|
|
@@ -198,6 +199,7 @@ class MiniCPMV26Model(PytorchChatModel):
|
|
|
198
199
|
msgs.append({"role": "user", "content": images_chat + [content]})
|
|
199
200
|
return msgs, video_existed
|
|
200
201
|
|
|
202
|
+
@cache_clean
|
|
201
203
|
def chat(
|
|
202
204
|
self,
|
|
203
205
|
messages: List[Dict],
|
|
@@ -24,6 +24,7 @@ from ...utils import select_device
|
|
|
24
24
|
from ..llm_family import LLMFamilyV1, LLMSpecV1
|
|
25
25
|
from ..utils import generate_chat_completion, parse_messages
|
|
26
26
|
from .core import PytorchChatModel, PytorchGenerateConfig
|
|
27
|
+
from .utils import cache_clean
|
|
27
28
|
|
|
28
29
|
logger = logging.getLogger(__name__)
|
|
29
30
|
|
|
@@ -87,6 +88,7 @@ class OmniLMMModel(PytorchChatModel):
|
|
|
87
88
|
return images, other_content
|
|
88
89
|
return [], [{"type": "text", "text": content}]
|
|
89
90
|
|
|
91
|
+
@cache_clean
|
|
90
92
|
def chat(
|
|
91
93
|
self,
|
|
92
94
|
messages: List[Dict],
|
|
@@ -14,16 +14,22 @@
|
|
|
14
14
|
import logging
|
|
15
15
|
import uuid
|
|
16
16
|
from io import BytesIO
|
|
17
|
-
from typing import
|
|
17
|
+
from typing import Iterator, List, Optional, Union
|
|
18
18
|
from urllib.request import urlopen
|
|
19
19
|
|
|
20
20
|
import numpy as np
|
|
21
21
|
|
|
22
22
|
from ....model.utils import select_device
|
|
23
|
-
from ....types import
|
|
23
|
+
from ....types import (
|
|
24
|
+
ChatCompletion,
|
|
25
|
+
ChatCompletionChunk,
|
|
26
|
+
ChatCompletionMessage,
|
|
27
|
+
CompletionChunk,
|
|
28
|
+
)
|
|
24
29
|
from ..llm_family import LLMFamilyV1, LLMSpecV1
|
|
25
30
|
from ..utils import generate_chat_completion, generate_completion_chunk
|
|
26
31
|
from .core import PytorchChatModel, PytorchGenerateConfig
|
|
32
|
+
from .utils import cache_clean
|
|
27
33
|
|
|
28
34
|
logger = logging.getLogger(__name__)
|
|
29
35
|
|
|
@@ -68,7 +74,7 @@ class Qwen2AudioChatModel(PytorchChatModel):
|
|
|
68
74
|
|
|
69
75
|
def _transform_messages(
|
|
70
76
|
self,
|
|
71
|
-
messages: List[
|
|
77
|
+
messages: List[ChatCompletionMessage],
|
|
72
78
|
):
|
|
73
79
|
import librosa
|
|
74
80
|
|
|
@@ -89,9 +95,10 @@ class Qwen2AudioChatModel(PytorchChatModel):
|
|
|
89
95
|
|
|
90
96
|
return text, audios
|
|
91
97
|
|
|
98
|
+
@cache_clean
|
|
92
99
|
def chat(
|
|
93
100
|
self,
|
|
94
|
-
messages: List[
|
|
101
|
+
messages: List[ChatCompletionMessage],
|
|
95
102
|
generate_config: Optional[PytorchGenerateConfig] = None,
|
|
96
103
|
) -> Union[ChatCompletion, Iterator[ChatCompletionChunk]]:
|
|
97
104
|
text, audios = self._transform_messages(messages)
|
|
@@ -27,6 +27,7 @@ from ....types import (
|
|
|
27
27
|
from ..llm_family import LLMFamilyV1, LLMSpecV1
|
|
28
28
|
from ..utils import generate_chat_completion, generate_completion_chunk
|
|
29
29
|
from .core import PytorchChatModel, PytorchGenerateConfig
|
|
30
|
+
from .utils import cache_clean
|
|
30
31
|
|
|
31
32
|
logger = logging.getLogger(__name__)
|
|
32
33
|
|
|
@@ -75,34 +76,7 @@ class Qwen2VLChatModel(PytorchChatModel):
|
|
|
75
76
|
self.model_path, device_map=device, trust_remote_code=True
|
|
76
77
|
).eval()
|
|
77
78
|
|
|
78
|
-
|
|
79
|
-
self,
|
|
80
|
-
messages: List[ChatCompletionMessage],
|
|
81
|
-
):
|
|
82
|
-
transformed_messages = []
|
|
83
|
-
for msg in messages:
|
|
84
|
-
new_content = []
|
|
85
|
-
role = msg["role"]
|
|
86
|
-
content = msg["content"]
|
|
87
|
-
if isinstance(content, str):
|
|
88
|
-
new_content.append({"type": "text", "text": content})
|
|
89
|
-
elif isinstance(content, List):
|
|
90
|
-
for item in content: # type: ignore
|
|
91
|
-
if "text" in item:
|
|
92
|
-
new_content.append({"type": "text", "text": item["text"]})
|
|
93
|
-
elif "image_url" in item:
|
|
94
|
-
new_content.append(
|
|
95
|
-
{"type": "image", "image": item["image_url"]["url"]}
|
|
96
|
-
)
|
|
97
|
-
elif "video_url" in item:
|
|
98
|
-
new_content.append(
|
|
99
|
-
{"type": "video", "video": item["video_url"]["url"]}
|
|
100
|
-
)
|
|
101
|
-
new_message = {"role": role, "content": new_content}
|
|
102
|
-
transformed_messages.append(new_message)
|
|
103
|
-
|
|
104
|
-
return transformed_messages
|
|
105
|
-
|
|
79
|
+
@cache_clean
|
|
106
80
|
def chat(
|
|
107
81
|
self,
|
|
108
82
|
messages: List[ChatCompletionMessage], # type: ignore
|
|
@@ -28,7 +28,7 @@ from ....types import ChatCompletion, ChatCompletionChunk, CompletionChunk
|
|
|
28
28
|
from ..llm_family import LLMFamilyV1, LLMSpecV1
|
|
29
29
|
from ..utils import generate_chat_completion, generate_completion_chunk
|
|
30
30
|
from .core import PytorchChatModel, PytorchGenerateConfig
|
|
31
|
-
from .utils import pad_prefill_tokens
|
|
31
|
+
from .utils import cache_clean, pad_prefill_tokens
|
|
32
32
|
|
|
33
33
|
logger = logging.getLogger(__name__)
|
|
34
34
|
|
|
@@ -137,6 +137,7 @@ class QwenVLChatModel(PytorchChatModel):
|
|
|
137
137
|
prompt = self._message_content_to_qwen(messages[-1]["content"])
|
|
138
138
|
return prompt, qwen_history
|
|
139
139
|
|
|
140
|
+
@cache_clean
|
|
140
141
|
def chat(
|
|
141
142
|
self,
|
|
142
143
|
messages: List[Dict],
|
|
@@ -11,7 +11,8 @@
|
|
|
11
11
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
|
-
|
|
14
|
+
import asyncio
|
|
15
|
+
import functools
|
|
15
16
|
import gc
|
|
16
17
|
import logging
|
|
17
18
|
import os
|
|
@@ -126,7 +127,8 @@ def generate_stream(
|
|
|
126
127
|
echo = bool(generate_config.get("echo", False))
|
|
127
128
|
stop_str = generate_config.get("stop", None)
|
|
128
129
|
stop_token_ids = generate_config.get("stop_token_ids", None) or []
|
|
129
|
-
|
|
130
|
+
if tokenizer.eos_token_id not in stop_token_ids:
|
|
131
|
+
stop_token_ids.append(tokenizer.eos_token_id)
|
|
130
132
|
chunk_id = str(uuid.uuid4())
|
|
131
133
|
|
|
132
134
|
logits_processor = prepare_logits_processor(
|
|
@@ -776,3 +778,34 @@ def batch_inference_one_step(
|
|
|
776
778
|
for r in req_list:
|
|
777
779
|
r.stopped = True
|
|
778
780
|
r.error_msg = str(e)
|
|
781
|
+
|
|
782
|
+
|
|
783
|
+
def cache_clean(fn):
|
|
784
|
+
@functools.wraps(fn)
|
|
785
|
+
async def _async_wrapper(self, *args, **kwargs):
|
|
786
|
+
import gc
|
|
787
|
+
|
|
788
|
+
from ....device_utils import empty_cache
|
|
789
|
+
|
|
790
|
+
result = await fn(self, *args, **kwargs)
|
|
791
|
+
|
|
792
|
+
gc.collect()
|
|
793
|
+
empty_cache()
|
|
794
|
+
return result
|
|
795
|
+
|
|
796
|
+
@functools.wraps(fn)
|
|
797
|
+
def _wrapper(self, *args, **kwargs):
|
|
798
|
+
import gc
|
|
799
|
+
|
|
800
|
+
from ....device_utils import empty_cache
|
|
801
|
+
|
|
802
|
+
result = fn(self, *args, **kwargs)
|
|
803
|
+
|
|
804
|
+
gc.collect()
|
|
805
|
+
empty_cache()
|
|
806
|
+
return result
|
|
807
|
+
|
|
808
|
+
if asyncio.iscoroutinefunction(fn):
|
|
809
|
+
return _async_wrapper
|
|
810
|
+
else:
|
|
811
|
+
return _wrapper
|