xinference 1.5.1__py3-none-any.whl → 1.6.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of xinference might be problematic. Click here for more details.
- xinference/_version.py +3 -3
- xinference/api/restful_api.py +97 -8
- xinference/client/restful/restful_client.py +51 -11
- xinference/core/media_interface.py +758 -0
- xinference/core/model.py +49 -9
- xinference/core/worker.py +31 -37
- xinference/deploy/utils.py +0 -3
- xinference/model/audio/__init__.py +16 -27
- xinference/model/audio/core.py +1 -0
- xinference/model/audio/cosyvoice.py +4 -2
- xinference/model/audio/model_spec.json +20 -3
- xinference/model/audio/model_spec_modelscope.json +18 -1
- xinference/model/embedding/__init__.py +16 -24
- xinference/model/image/__init__.py +15 -25
- xinference/model/llm/__init__.py +37 -110
- xinference/model/llm/core.py +15 -6
- xinference/model/llm/llama_cpp/core.py +25 -353
- xinference/model/llm/llm_family.json +613 -89
- xinference/model/llm/llm_family.py +9 -1
- xinference/model/llm/llm_family_modelscope.json +540 -90
- xinference/model/llm/mlx/core.py +6 -3
- xinference/model/llm/reasoning_parser.py +281 -5
- xinference/model/llm/sglang/core.py +16 -3
- xinference/model/llm/transformers/chatglm.py +2 -2
- xinference/model/llm/transformers/cogagent.py +1 -1
- xinference/model/llm/transformers/cogvlm2.py +1 -1
- xinference/model/llm/transformers/core.py +9 -3
- xinference/model/llm/transformers/glm4v.py +1 -1
- xinference/model/llm/transformers/minicpmv26.py +1 -1
- xinference/model/llm/transformers/qwen-omni.py +6 -0
- xinference/model/llm/transformers/qwen_vl.py +1 -1
- xinference/model/llm/utils.py +68 -45
- xinference/model/llm/vllm/core.py +38 -18
- xinference/model/llm/vllm/xavier/test/test_xavier.py +1 -10
- xinference/model/rerank/__init__.py +13 -24
- xinference/model/video/__init__.py +15 -25
- xinference/model/video/core.py +3 -3
- xinference/model/video/diffusers.py +133 -16
- xinference/model/video/model_spec.json +54 -0
- xinference/model/video/model_spec_modelscope.json +56 -0
- xinference/thirdparty/cosyvoice/bin/average_model.py +5 -4
- xinference/thirdparty/cosyvoice/bin/export_jit.py +50 -20
- xinference/thirdparty/cosyvoice/bin/export_onnx.py +136 -51
- xinference/thirdparty/cosyvoice/bin/inference.py +15 -5
- xinference/thirdparty/cosyvoice/bin/train.py +7 -2
- xinference/thirdparty/cosyvoice/cli/cosyvoice.py +72 -52
- xinference/thirdparty/cosyvoice/cli/frontend.py +58 -58
- xinference/thirdparty/cosyvoice/cli/model.py +140 -155
- xinference/thirdparty/cosyvoice/dataset/processor.py +9 -5
- xinference/thirdparty/cosyvoice/flow/decoder.py +656 -54
- xinference/thirdparty/cosyvoice/flow/flow.py +69 -11
- xinference/thirdparty/cosyvoice/flow/flow_matching.py +167 -63
- xinference/thirdparty/cosyvoice/flow/length_regulator.py +1 -0
- xinference/thirdparty/cosyvoice/hifigan/discriminator.py +91 -1
- xinference/thirdparty/cosyvoice/hifigan/f0_predictor.py +4 -1
- xinference/thirdparty/cosyvoice/hifigan/generator.py +4 -1
- xinference/thirdparty/cosyvoice/hifigan/hifigan.py +2 -2
- xinference/thirdparty/cosyvoice/llm/llm.py +198 -18
- xinference/thirdparty/cosyvoice/transformer/embedding.py +12 -4
- xinference/thirdparty/cosyvoice/transformer/upsample_encoder.py +124 -21
- xinference/thirdparty/cosyvoice/utils/class_utils.py +13 -0
- xinference/thirdparty/cosyvoice/utils/common.py +1 -1
- xinference/thirdparty/cosyvoice/utils/file_utils.py +40 -2
- xinference/thirdparty/cosyvoice/utils/frontend_utils.py +7 -0
- xinference/thirdparty/cosyvoice/utils/mask.py +4 -0
- xinference/thirdparty/cosyvoice/utils/train_utils.py +5 -1
- xinference/thirdparty/matcha/hifigan/xutils.py +3 -3
- xinference/types.py +0 -71
- xinference/web/ui/build/asset-manifest.json +3 -3
- xinference/web/ui/build/index.html +1 -1
- xinference/web/ui/build/static/js/main.ae579a97.js +3 -0
- xinference/web/ui/build/static/js/main.ae579a97.js.map +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/0196a4b09e3264614e54360d5f832c46b31d964ec58296765ebff191ace6adbf.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/12e02ee790dbf57ead09a241a93bb5f893393aa36628ca741d44390e836a103f.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/18fa271456b31cded36c05c4c71c6b2b1cf4e4128c1e32f0e45d8b9f21764397.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/2fdc61dcb6a9d1fbcb44be592d0e87d8c3f21297a7327559ef5345665f8343f7.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/3d596a3e8dd6430d7ce81d164e32c31f8d47cfa5f725c328a298754d78563e14.json +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/8472e58a31720892d534f3febda31f746b25ec4aa60787eef34217b074e67965.json +1 -0
- xinference/web/ui/src/locales/en.json +6 -4
- xinference/web/ui/src/locales/zh.json +6 -4
- {xinference-1.5.1.dist-info → xinference-1.6.0.dist-info}/METADATA +56 -36
- {xinference-1.5.1.dist-info → xinference-1.6.0.dist-info}/RECORD +87 -87
- {xinference-1.5.1.dist-info → xinference-1.6.0.dist-info}/WHEEL +1 -1
- xinference/core/image_interface.py +0 -377
- xinference/thirdparty/cosyvoice/bin/export_trt.sh +0 -9
- xinference/web/ui/build/static/js/main.91e77b5c.js +0 -3
- xinference/web/ui/build/static/js/main.91e77b5c.js.map +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/0f0adb2283a8f469d097a7a0ebb754624fa52414c83b83696c41f2e6a737ceda.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/5e6edb0fb87e3798f142e9abf8dd2dc46bab33a60d31dff525797c0c99887097.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/6087820be1bd5c02c42dff797e7df365448ef35ab26dd5d6bd33e967e05cbfd4.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/8157db83995c671eb57abc316c337f867d1dc63fb83520bb4ff351fee57dcce2.json +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/f04f666b77b44d7be3e16034d6b0074de2ba9c254f1fae15222b3148608fa8b3.json +0 -1
- /xinference/web/ui/build/static/js/{main.91e77b5c.js.LICENSE.txt → main.ae579a97.js.LICENSE.txt} +0 -0
- {xinference-1.5.1.dist-info → xinference-1.6.0.dist-info}/entry_points.txt +0 -0
- {xinference-1.5.1.dist-info → xinference-1.6.0.dist-info}/licenses/LICENSE +0 -0
- {xinference-1.5.1.dist-info → xinference-1.6.0.dist-info}/top_level.txt +0 -0
|
@@ -6246,6 +6246,17 @@
|
|
|
6246
6246
|
],
|
|
6247
6247
|
"model_id": "mlx-community/Qwen2.5-VL-7B-Instruct-{quantization}"
|
|
6248
6248
|
},
|
|
6249
|
+
{
|
|
6250
|
+
"model_format": "mlx",
|
|
6251
|
+
"model_size_in_billions": 32,
|
|
6252
|
+
"quantizations": [
|
|
6253
|
+
"4bit",
|
|
6254
|
+
"6bit",
|
|
6255
|
+
"8bit",
|
|
6256
|
+
"bf16"
|
|
6257
|
+
],
|
|
6258
|
+
"model_id": "mlx-community/Qwen2.5-VL-32B-Instruct-{quantization}"
|
|
6259
|
+
},
|
|
6249
6260
|
{
|
|
6250
6261
|
"model_format": "mlx",
|
|
6251
6262
|
"model_size_in_billions": 72,
|
|
@@ -6285,6 +6296,14 @@
|
|
|
6285
6296
|
],
|
|
6286
6297
|
"model_description": "Qwen2.5-Omni: the new flagship end-to-end multimodal model in the Qwen series.",
|
|
6287
6298
|
"model_specs": [
|
|
6299
|
+
{
|
|
6300
|
+
"model_format": "pytorch",
|
|
6301
|
+
"model_size_in_billions": 3,
|
|
6302
|
+
"quantizations": [
|
|
6303
|
+
"none"
|
|
6304
|
+
],
|
|
6305
|
+
"model_id": "Qwen/Qwen2.5-Omni-3B"
|
|
6306
|
+
},
|
|
6288
6307
|
{
|
|
6289
6308
|
"model_format": "pytorch",
|
|
6290
6309
|
"model_size_in_billions": 7,
|
|
@@ -7741,6 +7760,236 @@
|
|
|
7741
7760
|
"<|im_end|>"
|
|
7742
7761
|
]
|
|
7743
7762
|
},
|
|
7763
|
+
{
|
|
7764
|
+
"version": 1,
|
|
7765
|
+
"context_length": 32768,
|
|
7766
|
+
"model_name": "HuatuoGPT-o1-Qwen2.5",
|
|
7767
|
+
"model_lang": [
|
|
7768
|
+
"en",
|
|
7769
|
+
"zh"
|
|
7770
|
+
],
|
|
7771
|
+
"model_ability": [
|
|
7772
|
+
"chat",
|
|
7773
|
+
"tools"
|
|
7774
|
+
],
|
|
7775
|
+
"model_description": "HuatuoGPT-o1 is a medical LLM designed for advanced medical reasoning. It generates a complex thought process, reflecting and refining its reasoning, before providing a final response.",
|
|
7776
|
+
"model_specs": [
|
|
7777
|
+
{
|
|
7778
|
+
"model_format": "pytorch",
|
|
7779
|
+
"model_size_in_billions": 7,
|
|
7780
|
+
"quantizations": [
|
|
7781
|
+
"none"
|
|
7782
|
+
],
|
|
7783
|
+
"model_id": "FreedomIntelligence/HuatuoGPT-o1-7B"
|
|
7784
|
+
},
|
|
7785
|
+
{
|
|
7786
|
+
"model_format": "pytorch",
|
|
7787
|
+
"model_size_in_billions": 72,
|
|
7788
|
+
"quantizations": [
|
|
7789
|
+
"none"
|
|
7790
|
+
],
|
|
7791
|
+
"model_id": "FreedomIntelligence/HuatuoGPT-o1-72B"
|
|
7792
|
+
}
|
|
7793
|
+
],
|
|
7794
|
+
"chat_template": "{%- if tools %}\n {{- '<|im_start|>system\\n' }}\n {%- if messages[0]['role'] == 'system' %}\n {{- messages[0]['content'] }}\n {%- else %}\n {{- 'You are Qwen, created by Alibaba Cloud. You are a helpful assistant.' }}\n {%- endif %}\n {{- \"\\n\\n# Tools\\n\\nYou may call one or more functions to assist with the user query.\\n\\nYou are provided with function signatures within <tools></tools> XML tags:\\n<tools>\" }}\n {%- for tool in tools %}\n {{- \"\\n\" }}\n {{- tool | tojson }}\n {%- endfor %}\n {{- \"\\n</tools>\\n\\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\\n<tool_call>\\n{\\\"name\\\": <function-name>, \\\"arguments\\\": <args-json-object>}\\n</tool_call><|im_end|>\\n\" }}\n{%- else %}\n {%- if messages[0]['role'] == 'system' %}\n {{- '<|im_start|>system\\n' + messages[0]['content'] + '<|im_end|>\\n' }}\n {%- else %}\n {{- '<|im_start|>system\\nYou are Qwen, created by Alibaba Cloud. You are a helpful assistant.<|im_end|>\\n' }}\n {%- endif %}\n{%- endif %}\n{%- for message in messages %}\n {%- if (message.role == \"user\") or (message.role == \"system\" and not loop.first) or (message.role == \"assistant\" and not message.tool_calls) %}\n {{- '<|im_start|>' + message.role + '\\n' + message.content + '<|im_end|>' + '\\n' }}\n {%- elif message.role == \"assistant\" %}\n {{- '<|im_start|>' + message.role }}\n {%- if message.content %}\n {{- '\\n' + message.content }}\n {%- endif %}\n {%- for tool_call in message.tool_calls %}\n {%- if tool_call.function is defined %}\n {%- set tool_call = tool_call.function %}\n {%- endif %}\n {{- '\\n<tool_call>\\n{\"name\": \"' }}\n {{- tool_call.name }}\n {{- '\", \"arguments\": ' }}\n {{- tool_call.arguments | tojson }}\n {{- '}\\n</tool_call>' }}\n {%- endfor %}\n {{- '<|im_end|>\\n' }}\n {%- elif message.role == \"tool\" %}\n {%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != \"tool\") %}\n {{- '<|im_start|>user' }}\n {%- endif %}\n {{- '\\n<tool_response>\\n' }}\n {{- message.content }}\n {{- '\\n</tool_response>' }}\n {%- if loop.last or (messages[loop.index0 + 1].role != \"tool\") %}\n {{- '<|im_end|>\\n' }}\n {%- endif %}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|im_start|>assistant\\n' }}\n{%- endif %}\n",
|
|
7795
|
+
"stop_token_ids": [
|
|
7796
|
+
151643,
|
|
7797
|
+
151644,
|
|
7798
|
+
151645
|
|
7799
|
+
],
|
|
7800
|
+
"stop": [
|
|
7801
|
+
"<|endoftext|>",
|
|
7802
|
+
"<|im_start|>",
|
|
7803
|
+
"<|im_end|>"
|
|
7804
|
+
]
|
|
7805
|
+
},
|
|
7806
|
+
{
|
|
7807
|
+
"version": 1,
|
|
7808
|
+
"context_length": 131072,
|
|
7809
|
+
"model_name": "HuatuoGPT-o1-LLaMA-3.1",
|
|
7810
|
+
"model_lang": [
|
|
7811
|
+
"en"
|
|
7812
|
+
],
|
|
7813
|
+
"model_ability": [
|
|
7814
|
+
"chat",
|
|
7815
|
+
"tools"
|
|
7816
|
+
],
|
|
7817
|
+
"model_description": "HuatuoGPT-o1 is a medical LLM designed for advanced medical reasoning. It generates a complex thought process, reflecting and refining its reasoning, before providing a final response.",
|
|
7818
|
+
"model_specs": [
|
|
7819
|
+
{
|
|
7820
|
+
"model_format": "pytorch",
|
|
7821
|
+
"model_size_in_billions": 8,
|
|
7822
|
+
"quantizations": [
|
|
7823
|
+
"none"
|
|
7824
|
+
],
|
|
7825
|
+
"model_id": "FreedomIntelligence/HuatuoGPT-o1-8B"
|
|
7826
|
+
},
|
|
7827
|
+
{
|
|
7828
|
+
"model_format": "pytorch",
|
|
7829
|
+
"model_size_in_billions": 70,
|
|
7830
|
+
"quantizations": [
|
|
7831
|
+
"none"
|
|
7832
|
+
],
|
|
7833
|
+
"model_id": "FreedomIntelligence/HuatuoGPT-o1-70B"
|
|
7834
|
+
}
|
|
7835
|
+
],
|
|
7836
|
+
"chat_template": "{{- bos_token }}\n{%- if custom_tools is defined %}\n {%- set tools = custom_tools %}\n{%- endif %}\n{%- if not tools_in_user_message is defined %}\n {%- set tools_in_user_message = true %}\n{%- endif %}\n{%- if not date_string is defined %}\n {%- set date_string = \"26 Jul 2024\" %}\n{%- endif %}\n{%- if not tools is defined %}\n {%- set tools = none %}\n{%- endif %}\n\n{#- This block extracts the system message, so we can slot it into the right place. #}\n{%- if messages[0]['role'] == 'system' %}\n {%- set system_message = messages[0]['content']|trim %}\n {%- set messages = messages[1:] %}\n{%- else %}\n {%- set system_message = \"\" %}\n{%- endif %}\n\n{#- System message + builtin tools #}\n{{- \"<|start_header_id|>system<|end_header_id|>\\n\\n\" }}\n{%- if builtin_tools is defined or tools is not none %}\n {{- \"Environment: ipython\\n\" }}\n{%- endif %}\n{%- if builtin_tools is defined %}\n {{- \"Tools: \" + builtin_tools | reject('equalto', 'code_interpreter') | join(\", \") + \"\\n\\n\"}}\n{%- endif %}\n{{- \"Cutting Knowledge Date: December 2023\\n\" }}\n{{- \"Today Date: \" + date_string + \"\\n\\n\" }}\n{%- if tools is not none and not tools_in_user_message %}\n {{- \"You have access to the following functions. To call a function, please respond with JSON for a function call.\" }}\n {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n {{- \"Do not use variables.\\n\\n\" }}\n {%- for t in tools %}\n {{- t | tojson(indent=4) }}\n {{- \"\\n\\n\" }}\n {%- endfor %}\n{%- endif %}\n{{- system_message }}\n{{- \"<|eot_id|>\" }}\n\n{#- Custom tools are passed in a user message with some extra guidance #}\n{%- if tools_in_user_message and not tools is none %}\n {#- Extract the first user message so we can plug it in here #}\n {%- if messages | length != 0 %}\n {%- set first_user_message = messages[0]['content']|trim %}\n {%- set messages = messages[1:] %}\n {%- else %}\n {{- raise_exception(\"Cannot put tools in the first user message when there's no first user message!\") }}\n{%- endif %}\n {{- '<|start_header_id|>user<|end_header_id|>\\n\\n' -}}\n {{- \"Given the following functions, please respond with a JSON for a function call \" }}\n {{- \"with its proper arguments that best answers the given prompt.\\n\\n\" }}\n {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n {{- \"Do not use variables.\\n\\n\" }}\n {%- for t in tools %}\n {{- t | tojson(indent=4) }}\n {{- \"\\n\\n\" }}\n {%- endfor %}\n {{- first_user_message + \"<|eot_id|>\"}}\n{%- endif %}\n\n{%- for message in messages %}\n {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %}\n {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\\n\\n'+ message['content'] | trim + '<|eot_id|>' }}\n {%- elif 'tool_calls' in message %}\n {%- if not message.tool_calls|length == 1 %}\n {{- raise_exception(\"This model only supports single tool-calls at once!\") }}\n {%- endif %}\n {%- set tool_call = message.tool_calls[0].function %}\n {%- if builtin_tools is defined and tool_call.name in builtin_tools %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' -}}\n {{- \"<|python_tag|>\" + tool_call.name + \".call(\" }}\n {%- for arg_name, arg_val in tool_call.arguments | items %}\n {{- arg_name + '=\"' + arg_val + '\"' }}\n {%- if not loop.last %}\n {{- \", \" }}\n {%- endif %}\n {%- endfor %}\n {{- \")\" }}\n {%- else %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' -}}\n {{- '{\"name\": \"' + tool_call.name + '\", ' }}\n {{- '\"parameters\": ' }}\n {{- tool_call.arguments | tojson }}\n {{- \"}\" }}\n {%- endif %}\n {%- if builtin_tools is defined %}\n {#- This means we're in ipython mode #}\n {{- \"<|eom_id|>\" }}\n {%- else %}\n {{- \"<|eot_id|>\" }}\n {%- endif %}\n {%- elif message.role == \"tool\" or message.role == \"ipython\" %}\n {{- \"<|start_header_id|>ipython<|end_header_id|>\\n\\n\" }}\n {%- if message.content is mapping or message.content is iterable %}\n {{- message.content | tojson }}\n {%- else %}\n {{- message.content }}\n {%- endif %}\n {{- \"<|eot_id|>\" }}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' }}\n{%- endif %}\n",
|
|
7837
|
+
"stop_token_ids": [
|
|
7838
|
+
128001,
|
|
7839
|
+
128008,
|
|
7840
|
+
128009
|
|
7841
|
+
],
|
|
7842
|
+
"stop": [
|
|
7843
|
+
"<|end_of_text|>",
|
|
7844
|
+
"<|eot_id|>",
|
|
7845
|
+
"<|eom_id|>"
|
|
7846
|
+
]
|
|
7847
|
+
},
|
|
7848
|
+
{
|
|
7849
|
+
"version": 1,
|
|
7850
|
+
"context_length": 32768,
|
|
7851
|
+
"model_name": "DianJin-R1",
|
|
7852
|
+
"model_lang": [
|
|
7853
|
+
"en",
|
|
7854
|
+
"zh"
|
|
7855
|
+
],
|
|
7856
|
+
"model_ability": [
|
|
7857
|
+
"chat",
|
|
7858
|
+
"tools"
|
|
7859
|
+
],
|
|
7860
|
+
"model_description": "Tongyi DianJin is a financial intelligence solution platform built by Alibaba Cloud, dedicated to providing financial business developers with a convenient artificial intelligence application development environment.",
|
|
7861
|
+
"model_specs": [
|
|
7862
|
+
{
|
|
7863
|
+
"model_format": "pytorch",
|
|
7864
|
+
"model_size_in_billions": 7,
|
|
7865
|
+
"quantizations": [
|
|
7866
|
+
"none"
|
|
7867
|
+
],
|
|
7868
|
+
"model_id": "DianJin/DianJin-R1-7B"
|
|
7869
|
+
},
|
|
7870
|
+
{
|
|
7871
|
+
"model_format": "pytorch",
|
|
7872
|
+
"model_size_in_billions": 32,
|
|
7873
|
+
"quantizations": [
|
|
7874
|
+
"none"
|
|
7875
|
+
],
|
|
7876
|
+
"model_id": "DianJin/DianJin-R1-32B"
|
|
7877
|
+
},
|
|
7878
|
+
{
|
|
7879
|
+
"model_format": "ggufv2",
|
|
7880
|
+
"model_size_in_billions": 7,
|
|
7881
|
+
"quantizations": [
|
|
7882
|
+
"Q2_K",
|
|
7883
|
+
"Q3_K_S",
|
|
7884
|
+
"Q3_K_M",
|
|
7885
|
+
"Q3_K_L",
|
|
7886
|
+
"IQ4_XS",
|
|
7887
|
+
"Q4_K_S",
|
|
7888
|
+
"Q4_K_M",
|
|
7889
|
+
"Q5_K_S",
|
|
7890
|
+
"Q5_K_M",
|
|
7891
|
+
"Q6_K",
|
|
7892
|
+
"Q8_0",
|
|
7893
|
+
"f16"
|
|
7894
|
+
],
|
|
7895
|
+
"model_id": "mradermacher/DianJin-R1-7B-GGUF",
|
|
7896
|
+
"model_file_name_template": "DianJin-R1-7B.{quantization}.gguf"
|
|
7897
|
+
},
|
|
7898
|
+
{
|
|
7899
|
+
"model_format": "ggufv2",
|
|
7900
|
+
"model_size_in_billions": 7,
|
|
7901
|
+
"quantizations": [
|
|
7902
|
+
"i1-IQ1_S",
|
|
7903
|
+
"i1-IQ1_M",
|
|
7904
|
+
"i1-IQ2_XXS",
|
|
7905
|
+
"i1-IQ2_XS",
|
|
7906
|
+
"i1-IQ2_S",
|
|
7907
|
+
"i1-IQ2_M",
|
|
7908
|
+
"i1-Q2_K_S",
|
|
7909
|
+
"i1-Q2_K",
|
|
7910
|
+
"i1-IQ3_XXS",
|
|
7911
|
+
"i1-IQ3_XS",
|
|
7912
|
+
"i1-Q3_K_S",
|
|
7913
|
+
"i1-IQ3_S",
|
|
7914
|
+
"i1-IQ3_M",
|
|
7915
|
+
"i1-Q3_K_M",
|
|
7916
|
+
"i1-Q3_K_L",
|
|
7917
|
+
"i1-IQ4_XS",
|
|
7918
|
+
"i1-IQ4_NL",
|
|
7919
|
+
"i1-Q4_0",
|
|
7920
|
+
"i1-Q4_K_S",
|
|
7921
|
+
"i1-Q4_K_M",
|
|
7922
|
+
"i1-Q4_1",
|
|
7923
|
+
"i1-Q5_K_S",
|
|
7924
|
+
"i1-Q5_K_M",
|
|
7925
|
+
"i1-Q6_K"
|
|
7926
|
+
],
|
|
7927
|
+
"model_id": "mradermacher/DianJin-R1-7B-i1-GGUF",
|
|
7928
|
+
"model_file_name_template": "DianJin-R1-7B.{quantization}.gguf"
|
|
7929
|
+
},
|
|
7930
|
+
{
|
|
7931
|
+
"model_format": "ggufv2",
|
|
7932
|
+
"model_size_in_billions": 32,
|
|
7933
|
+
"quantizations": [
|
|
7934
|
+
"Q2_K",
|
|
7935
|
+
"Q3_K_S",
|
|
7936
|
+
"Q3_K_M",
|
|
7937
|
+
"Q3_K_L",
|
|
7938
|
+
"IQ4_XS",
|
|
7939
|
+
"Q4_K_S",
|
|
7940
|
+
"Q4_K_M",
|
|
7941
|
+
"Q5_K_S",
|
|
7942
|
+
"Q5_K_M",
|
|
7943
|
+
"Q6_K",
|
|
7944
|
+
"Q8_0"
|
|
7945
|
+
],
|
|
7946
|
+
"model_id": "mradermacher/DianJin-R1-32B-GGUF",
|
|
7947
|
+
"model_file_name_template": "DianJin-R1-32B.{quantization}.gguf"
|
|
7948
|
+
},
|
|
7949
|
+
{
|
|
7950
|
+
"model_format": "ggufv2",
|
|
7951
|
+
"model_size_in_billions": 32,
|
|
7952
|
+
"quantizations": [
|
|
7953
|
+
"i1-IQ1_S",
|
|
7954
|
+
"i1-IQ1_M",
|
|
7955
|
+
"i1-IQ2_XXS",
|
|
7956
|
+
"i1-IQ2_XS",
|
|
7957
|
+
"i1-IQ2_S",
|
|
7958
|
+
"i1-IQ2_M",
|
|
7959
|
+
"i1-Q2_K_S",
|
|
7960
|
+
"i1-Q2_K",
|
|
7961
|
+
"i1-IQ3_XXS",
|
|
7962
|
+
"i1-IQ3_XS",
|
|
7963
|
+
"i1-Q3_K_S",
|
|
7964
|
+
"i1-IQ3_S",
|
|
7965
|
+
"i1-IQ3_M",
|
|
7966
|
+
"i1-Q3_K_M",
|
|
7967
|
+
"i1-Q3_K_L",
|
|
7968
|
+
"i1-IQ4_XS",
|
|
7969
|
+
"i1-Q4_0",
|
|
7970
|
+
"i1-Q4_K_S",
|
|
7971
|
+
"i1-Q4_K_M",
|
|
7972
|
+
"i1-Q4_1",
|
|
7973
|
+
"i1-Q5_K_S",
|
|
7974
|
+
"i1-Q5_K_M",
|
|
7975
|
+
"i1-Q6_K"
|
|
7976
|
+
],
|
|
7977
|
+
"model_id": "mradermacher/DianJin-R1-32B-i1-GGUF",
|
|
7978
|
+
"model_file_name_template": "DianJin-R1-32B.{quantization}.gguf"
|
|
7979
|
+
}
|
|
7980
|
+
],
|
|
7981
|
+
"chat_template": "{%- if tools %}\n {{- '<|im_start|>system\\n' }}\n {%- if messages[0]['role'] == 'system' %}\n {{- messages[0]['content'] }}\n {%- else %}\n {{- 'You are Qwen, created by Alibaba Cloud. You are a helpful assistant.' }}\n {%- endif %}\n {{- \"\\n\\n# Tools\\n\\nYou may call one or more functions to assist with the user query.\\n\\nYou are provided with function signatures within <tools></tools> XML tags:\\n<tools>\" }}\n {%- for tool in tools %}\n {{- \"\\n\" }}\n {{- tool | tojson }}\n {%- endfor %}\n {{- \"\\n</tools>\\n\\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\\n<tool_call>\\n{\\\"name\\\": <function-name>, \\\"arguments\\\": <args-json-object>}\\n</tool_call><|im_end|>\\n\" }}\n{%- else %}\n {%- if messages[0]['role'] == 'system' %}\n {{- '<|im_start|>system\\n' + messages[0]['content'] + '<|im_end|>\\n' }}\n {%- else %}\n {{- '<|im_start|>system\\nYou are Qwen, created by Alibaba Cloud. You are a helpful assistant.<|im_end|>\\n' }}\n {%- endif %}\n{%- endif %}\n{%- for message in messages %}\n {%- if (message.role == \"user\") or (message.role == \"system\" and not loop.first) or (message.role == \"assistant\" and not message.tool_calls) %}\n {{- '<|im_start|>' + message.role + '\\n' + message.content + '<|im_end|>' + '\\n' }}\n {%- elif message.role == \"assistant\" %}\n {{- '<|im_start|>' + message.role }}\n {%- if message.content %}\n {{- '\\n' + message.content }}\n {%- endif %}\n {%- for tool_call in message.tool_calls %}\n {%- if tool_call.function is defined %}\n {%- set tool_call = tool_call.function %}\n {%- endif %}\n {{- '\\n<tool_call>\\n{\"name\": \"' }}\n {{- tool_call.name }}\n {{- '\", \"arguments\": ' }}\n {{- tool_call.arguments | tojson }}\n {{- '}\\n</tool_call>' }}\n {%- endfor %}\n {{- '<|im_end|>\\n' }}\n {%- elif message.role == \"tool\" %}\n {%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != \"tool\") %}\n {{- '<|im_start|>user' }}\n {%- endif %}\n {{- '\\n<tool_response>\\n' }}\n {{- message.content }}\n {{- '\\n</tool_response>' }}\n {%- if loop.last or (messages[loop.index0 + 1].role != \"tool\") %}\n {{- '<|im_end|>\\n' }}\n {%- endif %}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|im_start|>assistant\\n' }}\n{%- endif %}\n",
|
|
7982
|
+
"stop_token_ids": [
|
|
7983
|
+
151643,
|
|
7984
|
+
151644,
|
|
7985
|
+
151645
|
|
7986
|
+
],
|
|
7987
|
+
"stop": [
|
|
7988
|
+
"<|endoftext|>",
|
|
7989
|
+
"<|im_start|>",
|
|
7990
|
+
"<|im_end|>"
|
|
7991
|
+
]
|
|
7992
|
+
},
|
|
7744
7993
|
{
|
|
7745
7994
|
"version": 1,
|
|
7746
7995
|
"context_length": 32768,
|
|
@@ -8041,6 +8290,49 @@
|
|
|
8041
8290
|
"<|im_end|>"
|
|
8042
8291
|
]
|
|
8043
8292
|
},
|
|
8293
|
+
{
|
|
8294
|
+
"version": 1,
|
|
8295
|
+
"context_length": 32768,
|
|
8296
|
+
"model_name": "XiYanSQL-QwenCoder-2504",
|
|
8297
|
+
"model_lang": [
|
|
8298
|
+
"en",
|
|
8299
|
+
"zh"
|
|
8300
|
+
],
|
|
8301
|
+
"model_ability": [
|
|
8302
|
+
"chat",
|
|
8303
|
+
"tools"
|
|
8304
|
+
],
|
|
8305
|
+
"model_description": "The XiYanSQL-QwenCoder models, as multi-dialect SQL base models, demonstrating robust SQL generation capabilities.",
|
|
8306
|
+
"model_specs": [
|
|
8307
|
+
{
|
|
8308
|
+
"model_format": "pytorch",
|
|
8309
|
+
"model_size_in_billions": 7,
|
|
8310
|
+
"quantizations": [
|
|
8311
|
+
"none"
|
|
8312
|
+
],
|
|
8313
|
+
"model_id": "XGenerationLab/XiYanSQL-QwenCoder-7B-2504"
|
|
8314
|
+
},
|
|
8315
|
+
{
|
|
8316
|
+
"model_format": "pytorch",
|
|
8317
|
+
"model_size_in_billions": 32,
|
|
8318
|
+
"quantizations": [
|
|
8319
|
+
"none"
|
|
8320
|
+
],
|
|
8321
|
+
"model_id": "XGenerationLab/XiYanSQL-QwenCoder-32B-2504"
|
|
8322
|
+
}
|
|
8323
|
+
],
|
|
8324
|
+
"chat_template": "{%- if tools %}\n {{- '<|im_start|>system\\n' }}\n {%- if messages[0]['role'] == 'system' %}\n {{- messages[0]['content'] }}\n {%- else %}\n {{- 'You are a helpful assistant.' }}\n {%- endif %}\n {{- \"\\n\\n# Tools\\n\\nYou may call one or more functions to assist with the user query.\\n\\nYou are provided with function signatures within <tools></tools> XML tags:\\n<tools>\" }}\n {%- for tool in tools %}\n {{- \"\\n\" }}\n {{- tool | tojson }}\n {%- endfor %}\n {{- \"\\n</tools>\\n\\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\\n<tool_call>\\n{{\\\"name\\\": <function-name>, \\\"arguments\\\": <args-json-object>}}\\n</tool_call><|im_end|>\\n\" }}\n{%- else %}\n {%- if messages[0]['role'] == 'system' %}\n {{- '<|im_start|>system\\n' + messages[0]['content'] + '<|im_end|>\\n' }}\n {%- else %}\n {{- '<|im_start|>system\\nYou are a helpful assistant.<|im_end|>\\n' }}\n {%- endif %}\n{%- endif %}\n{%- for message in messages %}\n {%- if (message.role == \"user\") or (message.role == \"system\" and not loop.first) or (message.role == \"assistant\" and not message.tool_calls) %}\n {{- '<|im_start|>' + message.role + '\\n' + message.content + '<|im_end|>' + '\\n' }}\n {%- elif message.role == \"assistant\" %}\n {{- '<|im_start|>' + message.role }}\n {%- if message.content %}\n {{- '\\n' + message.content }}\n {%- endif %}\n {%- for tool_call in message.tool_calls %}\n {%- if tool_call.function is defined %}\n {%- set tool_call = tool_call.function %}\n {%- endif %}\n {{- '\\n<tool_call>\\n{\"name\": \"' }}\n {{- tool_call.name }}\n {{- '\", \"arguments\": ' }}\n {{- tool_call.arguments | tojson }}\n {{- '}\\n</tool_call>' }}\n {%- endfor %}\n {{- '<|im_end|>\\n' }}\n {%- elif message.role == \"tool\" %}\n {%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != \"tool\") %}\n {{- '<|im_start|>user' }}\n {%- endif %}\n {{- '\\n<tool_response>\\n' }}\n {{- message.content }}\n {{- '\\n</tool_response>' }}\n {%- if loop.last or (messages[loop.index0 + 1].role != \"tool\") %}\n {{- '<|im_end|>\\n' }}\n {%- endif %}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|im_start|>assistant\\n' }}\n{%- endif %}\n",
|
|
8325
|
+
"stop_token_ids": [
|
|
8326
|
+
151643,
|
|
8327
|
+
151644,
|
|
8328
|
+
151645
|
|
8329
|
+
],
|
|
8330
|
+
"stop": [
|
|
8331
|
+
"<|endoftext|>",
|
|
8332
|
+
"<|im_start|>",
|
|
8333
|
+
"<|im_end|>"
|
|
8334
|
+
]
|
|
8335
|
+
},
|
|
8044
8336
|
{
|
|
8045
8337
|
"version": 1,
|
|
8046
8338
|
"context_length": 32768,
|
|
@@ -8166,37 +8458,40 @@
|
|
|
8166
8458
|
"model_format": "ggufv2",
|
|
8167
8459
|
"model_size_in_billions": 32,
|
|
8168
8460
|
"quantizations": [
|
|
8169
|
-
"
|
|
8170
|
-
"
|
|
8171
|
-
"
|
|
8172
|
-
"
|
|
8173
|
-
"
|
|
8174
|
-
"
|
|
8461
|
+
"BF16",
|
|
8462
|
+
"IQ4_NL",
|
|
8463
|
+
"IQ4_XS",
|
|
8464
|
+
"Q2_K",
|
|
8465
|
+
"Q2_K_L",
|
|
8466
|
+
"Q3_K_M",
|
|
8467
|
+
"Q3_K_S",
|
|
8468
|
+
"Q4_0",
|
|
8469
|
+
"Q4_1",
|
|
8470
|
+
"Q4_K_M",
|
|
8471
|
+
"Q5_K_M",
|
|
8472
|
+
"Q6_K",
|
|
8473
|
+
"UD-IQ1_M",
|
|
8474
|
+
"UD-IQ1_S",
|
|
8475
|
+
"UD-IQ2_M",
|
|
8476
|
+
"UD-IQ2_XXS",
|
|
8477
|
+
"UD-IQ3_XXS",
|
|
8478
|
+
"UD-Q2_K_XL",
|
|
8479
|
+
"UD-Q3_K_XL",
|
|
8480
|
+
"UD-Q4_K_XL",
|
|
8481
|
+
"UD-Q5_K_XL",
|
|
8482
|
+
"UD-Q6_K_XL",
|
|
8483
|
+
"UD-Q8_K_XL",
|
|
8484
|
+
"Q8_0"
|
|
8175
8485
|
],
|
|
8176
8486
|
"quantization_parts": {
|
|
8177
|
-
"
|
|
8178
|
-
"00001-of-
|
|
8179
|
-
"00002-of-
|
|
8180
|
-
"00003-of-00017",
|
|
8181
|
-
"00004-of-00017",
|
|
8182
|
-
"00005-of-00017",
|
|
8183
|
-
"00006-of-00017",
|
|
8184
|
-
"00007-of-00017",
|
|
8185
|
-
"00008-of-00017",
|
|
8186
|
-
"00009-of-00017",
|
|
8187
|
-
"00010-of-00017",
|
|
8188
|
-
"00011-of-00017",
|
|
8189
|
-
"00012-of-00017",
|
|
8190
|
-
"00013-of-00017",
|
|
8191
|
-
"00014-of-00017",
|
|
8192
|
-
"00015-of-00017",
|
|
8193
|
-
"00016-of-00017",
|
|
8194
|
-
"00017-of-00017"
|
|
8487
|
+
"BF16": [
|
|
8488
|
+
"00001-of-00002",
|
|
8489
|
+
"00002-of-00002"
|
|
8195
8490
|
]
|
|
8196
8491
|
},
|
|
8197
|
-
"model_id": "
|
|
8198
|
-
"model_file_name_template": "
|
|
8199
|
-
"model_file_name_split_template": "
|
|
8492
|
+
"model_id": "unsloth/QwQ-32B-GGUF",
|
|
8493
|
+
"model_file_name_template": "QwQ-32B-{quantization}.gguf",
|
|
8494
|
+
"model_file_name_split_template": "BF16/QwQ-32B-{quantization}-{part}.gguf"
|
|
8200
8495
|
}
|
|
8201
8496
|
],
|
|
8202
8497
|
"chat_template": "{%- if tools %}\n {{- '<|im_start|>system\\n' }}\n {%- if messages[0]['role'] == 'system' %}\n {{- messages[0]['content'] }}\n {%- else %}\n {{- '' }}\n {%- endif %}\n {{- \"\\n\\n# Tools\\n\\nYou may call one or more functions to assist with the user query.\\n\\nYou are provided with function signatures within <tools></tools> XML tags:\\n<tools>\" }}\n {%- for tool in tools %}\n {{- \"\\n\" }}\n {{- tool | tojson }}\n {%- endfor %}\n {{- \"\\n</tools>\\n\\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\\n<tool_call>\\n{\\\"name\\\": <function-name>, \\\"arguments\\\": <args-json-object>}\\n</tool_call><|im_end|>\\n\" }}\n{%- else %}\n {%- if messages[0]['role'] == 'system' %}\n {{- '<|im_start|>system\\n' + messages[0]['content'] + '<|im_end|>\\n' }}\n {%- endif %}\n{%- endif %}\n{%- for message in messages %}\n {%- if (message.role == \"user\") or (message.role == \"system\" and not loop.first) %}\n {{- '<|im_start|>' + message.role + '\\n' + message.content + '<|im_end|>' + '\\n' }}\n {%- elif message.role == \"assistant\" and not message.tool_calls %}\n {%- set content = message.content.split('</think>')[-1].lstrip('\\n') %}\n {{- '<|im_start|>' + message.role + '\\n' + content + '<|im_end|>' + '\\n' }}\n {%- elif message.role == \"assistant\" %}\n {%- set content = message.content.split('</think>')[-1].lstrip('\\n') %}\n {{- '<|im_start|>' + message.role }}\n {%- if message.content %}\n {{- '\\n' + content }}\n {%- endif %}\n {%- for tool_call in message.tool_calls %}\n {%- if tool_call.function is defined %}\n {%- set tool_call = tool_call.function %}\n {%- endif %}\n {{- '\\n<tool_call>\\n{\"name\": \"' }}\n {{- tool_call.name }}\n {{- '\", \"arguments\": ' }}\n {{- tool_call.arguments | tojson }}\n {{- '}\\n</tool_call>' }}\n {%- endfor %}\n {{- '<|im_end|>\\n' }}\n {%- elif message.role == \"tool\" %}\n {%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != \"tool\") %}\n {{- '<|im_start|>user' }}\n {%- endif %}\n {{- '\\n<tool_response>\\n' }}\n {{- message.content }}\n {{- '\\n</tool_response>' }}\n {%- if loop.last or (messages[loop.index0 + 1].role != \"tool\") %}\n {{- '<|im_end|>\\n' }}\n {%- endif %}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|im_start|>assistant\\n<think>\\n' }}\n{%- endif %}\n",
|
|
@@ -9140,6 +9435,7 @@
|
|
|
9140
9435
|
{
|
|
9141
9436
|
"model_format": "pytorch",
|
|
9142
9437
|
"model_size_in_billions": 27,
|
|
9438
|
+
"activated_size_in_billions": "4_5",
|
|
9143
9439
|
"quantizations": [
|
|
9144
9440
|
"none"
|
|
9145
9441
|
],
|
|
@@ -9148,6 +9444,7 @@
|
|
|
9148
9444
|
{
|
|
9149
9445
|
"model_format": "pytorch",
|
|
9150
9446
|
"model_size_in_billions": 16,
|
|
9447
|
+
"activated_size_in_billions": "2_8",
|
|
9151
9448
|
"quantizations": [
|
|
9152
9449
|
"none"
|
|
9153
9450
|
],
|
|
@@ -9156,6 +9453,7 @@
|
|
|
9156
9453
|
{
|
|
9157
9454
|
"model_format": "pytorch",
|
|
9158
9455
|
"model_size_in_billions": 3,
|
|
9456
|
+
"activated_size_in_billions": 1,
|
|
9159
9457
|
"quantizations": [
|
|
9160
9458
|
"none"
|
|
9161
9459
|
],
|
|
@@ -9271,7 +9569,7 @@
|
|
|
9271
9569
|
],
|
|
9272
9570
|
"model_id": "mlx-community/GLM-4-32B-0414-{quantization}"
|
|
9273
9571
|
},
|
|
9274
|
-
|
|
9572
|
+
{
|
|
9275
9573
|
"model_format": "ggufv2",
|
|
9276
9574
|
"model_size_in_billions": 9,
|
|
9277
9575
|
"quantizations": [
|
|
@@ -9357,110 +9655,110 @@
|
|
|
9357
9655
|
}
|
|
9358
9656
|
},
|
|
9359
9657
|
{
|
|
9360
|
-
"version":1,
|
|
9361
|
-
"context_length":32768,
|
|
9362
|
-
"model_name":"Ovis2",
|
|
9363
|
-
"model_lang":[
|
|
9658
|
+
"version": 1,
|
|
9659
|
+
"context_length": 32768,
|
|
9660
|
+
"model_name": "Ovis2",
|
|
9661
|
+
"model_lang": [
|
|
9364
9662
|
"en",
|
|
9365
9663
|
"zh"
|
|
9366
9664
|
],
|
|
9367
|
-
"model_ability":[
|
|
9665
|
+
"model_ability": [
|
|
9368
9666
|
"chat",
|
|
9369
9667
|
"vision"
|
|
9370
9668
|
],
|
|
9371
|
-
"model_description":"Ovis (Open VISion) is a novel Multimodal Large Language Model (MLLM) architecture, designed to structurally align visual and textual embeddings.",
|
|
9372
|
-
"model_specs":[
|
|
9669
|
+
"model_description": "Ovis (Open VISion) is a novel Multimodal Large Language Model (MLLM) architecture, designed to structurally align visual and textual embeddings.",
|
|
9670
|
+
"model_specs": [
|
|
9373
9671
|
{
|
|
9374
|
-
"model_format":"pytorch",
|
|
9375
|
-
"model_size_in_billions":1,
|
|
9376
|
-
"quantizations":[
|
|
9672
|
+
"model_format": "pytorch",
|
|
9673
|
+
"model_size_in_billions": 1,
|
|
9674
|
+
"quantizations": [
|
|
9377
9675
|
"none"
|
|
9378
9676
|
],
|
|
9379
|
-
"model_id":"AIDC-AI/Ovis2-1B"
|
|
9677
|
+
"model_id": "AIDC-AI/Ovis2-1B"
|
|
9380
9678
|
},
|
|
9381
9679
|
{
|
|
9382
|
-
"model_format":"pytorch",
|
|
9383
|
-
"model_size_in_billions":2,
|
|
9384
|
-
"quantizations":[
|
|
9680
|
+
"model_format": "pytorch",
|
|
9681
|
+
"model_size_in_billions": 2,
|
|
9682
|
+
"quantizations": [
|
|
9385
9683
|
"none"
|
|
9386
9684
|
],
|
|
9387
|
-
"model_id":"AIDC-AI/Ovis2-2B"
|
|
9685
|
+
"model_id": "AIDC-AI/Ovis2-2B"
|
|
9388
9686
|
},
|
|
9389
9687
|
{
|
|
9390
|
-
"model_format":"pytorch",
|
|
9391
|
-
"model_size_in_billions":4,
|
|
9392
|
-
"quantizations":[
|
|
9688
|
+
"model_format": "pytorch",
|
|
9689
|
+
"model_size_in_billions": 4,
|
|
9690
|
+
"quantizations": [
|
|
9393
9691
|
"none"
|
|
9394
9692
|
],
|
|
9395
|
-
"model_id":"AIDC-AI/Ovis2-4B"
|
|
9693
|
+
"model_id": "AIDC-AI/Ovis2-4B"
|
|
9396
9694
|
},
|
|
9397
9695
|
{
|
|
9398
|
-
"model_format":"pytorch",
|
|
9399
|
-
"model_size_in_billions":8,
|
|
9400
|
-
"quantizations":[
|
|
9696
|
+
"model_format": "pytorch",
|
|
9697
|
+
"model_size_in_billions": 8,
|
|
9698
|
+
"quantizations": [
|
|
9401
9699
|
"none"
|
|
9402
9700
|
],
|
|
9403
|
-
"model_id":"AIDC-AI/Ovis2-8B"
|
|
9701
|
+
"model_id": "AIDC-AI/Ovis2-8B"
|
|
9404
9702
|
},
|
|
9405
9703
|
{
|
|
9406
|
-
"model_format":"pytorch",
|
|
9407
|
-
"model_size_in_billions":16,
|
|
9408
|
-
"quantizations":[
|
|
9704
|
+
"model_format": "pytorch",
|
|
9705
|
+
"model_size_in_billions": 16,
|
|
9706
|
+
"quantizations": [
|
|
9409
9707
|
"none"
|
|
9410
9708
|
],
|
|
9411
|
-
"model_id":"AIDC-AI/Ovis2-16B"
|
|
9709
|
+
"model_id": "AIDC-AI/Ovis2-16B"
|
|
9412
9710
|
},
|
|
9413
9711
|
{
|
|
9414
|
-
"model_format":"pytorch",
|
|
9415
|
-
"model_size_in_billions":34,
|
|
9416
|
-
"quantizations":[
|
|
9712
|
+
"model_format": "pytorch",
|
|
9713
|
+
"model_size_in_billions": 34,
|
|
9714
|
+
"quantizations": [
|
|
9417
9715
|
"none"
|
|
9418
9716
|
],
|
|
9419
|
-
"model_id":"AIDC-AI/Ovis2-34B"
|
|
9717
|
+
"model_id": "AIDC-AI/Ovis2-34B"
|
|
9420
9718
|
},
|
|
9421
9719
|
{
|
|
9422
|
-
"model_format":"gptq",
|
|
9423
|
-
"model_size_in_billions":2,
|
|
9424
|
-
"quantizations":[
|
|
9720
|
+
"model_format": "gptq",
|
|
9721
|
+
"model_size_in_billions": 2,
|
|
9722
|
+
"quantizations": [
|
|
9425
9723
|
"Int4"
|
|
9426
9724
|
],
|
|
9427
|
-
"model_id":"AIDC-AI/Ovis2-2B-GPTQ-{quantization}"
|
|
9725
|
+
"model_id": "AIDC-AI/Ovis2-2B-GPTQ-{quantization}"
|
|
9428
9726
|
},
|
|
9429
9727
|
{
|
|
9430
|
-
"model_format":"gptq",
|
|
9431
|
-
"model_size_in_billions":4,
|
|
9432
|
-
"quantizations":[
|
|
9728
|
+
"model_format": "gptq",
|
|
9729
|
+
"model_size_in_billions": 4,
|
|
9730
|
+
"quantizations": [
|
|
9433
9731
|
"Int4"
|
|
9434
9732
|
],
|
|
9435
|
-
"model_id":"AIDC-AI/Ovis2-4B-GPTQ-{quantization}"
|
|
9733
|
+
"model_id": "AIDC-AI/Ovis2-4B-GPTQ-{quantization}"
|
|
9436
9734
|
},
|
|
9437
9735
|
{
|
|
9438
|
-
"model_format":"gptq",
|
|
9439
|
-
"model_size_in_billions":8,
|
|
9440
|
-
"quantizations":[
|
|
9736
|
+
"model_format": "gptq",
|
|
9737
|
+
"model_size_in_billions": 8,
|
|
9738
|
+
"quantizations": [
|
|
9441
9739
|
"Int4"
|
|
9442
9740
|
],
|
|
9443
|
-
"model_id":"AIDC-AI/Ovis2-8B-GPTQ-{quantization}"
|
|
9741
|
+
"model_id": "AIDC-AI/Ovis2-8B-GPTQ-{quantization}"
|
|
9444
9742
|
},
|
|
9445
9743
|
{
|
|
9446
|
-
"model_format":"gptq",
|
|
9447
|
-
"model_size_in_billions":16,
|
|
9448
|
-
"quantizations":[
|
|
9744
|
+
"model_format": "gptq",
|
|
9745
|
+
"model_size_in_billions": 16,
|
|
9746
|
+
"quantizations": [
|
|
9449
9747
|
"Int4"
|
|
9450
9748
|
],
|
|
9451
|
-
"model_id":"AIDC-AI/Ovis2-16B-GPTQ-{quantization}"
|
|
9749
|
+
"model_id": "AIDC-AI/Ovis2-16B-GPTQ-{quantization}"
|
|
9452
9750
|
},
|
|
9453
9751
|
{
|
|
9454
|
-
"model_format":"gptq",
|
|
9455
|
-
"model_size_in_billions":34,
|
|
9456
|
-
"quantizations":[
|
|
9752
|
+
"model_format": "gptq",
|
|
9753
|
+
"model_size_in_billions": 34,
|
|
9754
|
+
"quantizations": [
|
|
9457
9755
|
"Int4",
|
|
9458
9756
|
"Int8"
|
|
9459
9757
|
],
|
|
9460
|
-
"model_id":"AIDC-AI/Ovis2-34B-GPTQ-{quantization}"
|
|
9758
|
+
"model_id": "AIDC-AI/Ovis2-34B-GPTQ-{quantization}"
|
|
9461
9759
|
}
|
|
9462
9760
|
],
|
|
9463
|
-
"chat_template":
|
|
9761
|
+
"chat_template": "{%- if tools %}\n {{- '<|im_start|>system\\n' }}\n {%- if messages[0]['role'] == 'system' %}\n {{- messages[0]['content'] }}\n {%- else %}\n {{- 'You are Qwen, created by Alibaba Cloud. You are a helpful assistant.' }}\n {%- endif %}\n {{- \"\\n\\n# Tools\\n\\nYou may call one or more functions to assist with the user query.\\n\\nYou are provided with function signatures within <tools></tools> XML tags:\\n<tools>\" }}\n {%- for tool in tools %}\n {{- \"\\n\" }}\n {{- tool | tojson }}\n {%- endfor %}\n {{- \"\\n</tools>\\n\\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\\n<tool_call>\\n{\\\"name\\\": <function-name>, \\\"arguments\\\": <args-json-object>}\\n</tool_call><|im_end|>\\n\" }}\n{%- else %}\n {%- if messages[0]['role'] == 'system' %}\n {{- '<|im_start|>system\\n' + messages[0]['content'] + '<|im_end|>\\n' }}\n {%- else %}\n {{- '<|im_start|>system\\nYou are Qwen, created by Alibaba Cloud. You are a helpful assistant.<|im_end|>\\n' }}\n {%- endif %}\n{%- endif %}\n{%- for message in messages %}\n {%- if (message.role == \"user\") or (message.role == \"system\" and not loop.first) or (message.role == \"assistant\" and not message.tool_calls) %}\n {{- '<|im_start|>' + message.role + '\\n' + message.content + '<|im_end|>' + '\\n' }}\n {%- elif message.role == \"assistant\" %}\n {{- '<|im_start|>' + message.role }}\n {%- if message.content %}\n {{- '\\n' + message.content }}\n {%- endif %}\n {%- for tool_call in message.tool_calls %}\n {%- if tool_call.function is defined %}\n {%- set tool_call = tool_call.function %}\n {%- endif %}\n {{- '\\n<tool_call>\\n{\"name\": \"' }}\n {{- tool_call.name }}\n {{- '\", \"arguments\": ' }}\n {{- tool_call.arguments | tojson }}\n {{- '}\\n</tool_call>' }}\n {%- endfor %}\n {{- '<|im_end|>\\n' }}\n {%- elif message.role == \"tool\" %}\n {%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != \"tool\") %}\n {{- '<|im_start|>user' }}\n {%- endif %}\n {{- '\\n<tool_response>\\n' }}\n {{- message.content }}\n {{- '\\n</tool_response>' }}\n {%- if loop.last or (messages[loop.index0 + 1].role != \"tool\") %}\n {{- '<|im_end|>\\n' }}\n {%- endif %}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|im_start|>assistant\\n' }}\n{%- endif %}\n",
|
|
9464
9762
|
"stop_token_ids": [
|
|
9465
9763
|
151645,
|
|
9466
9764
|
151643
|
|
@@ -9587,6 +9885,64 @@
|
|
|
9587
9885
|
"<|im_end|>"
|
|
9588
9886
|
]
|
|
9589
9887
|
},
|
|
9888
|
+
{
|
|
9889
|
+
"version": 1,
|
|
9890
|
+
"context_length": 131072,
|
|
9891
|
+
"model_name": "skywork-or1",
|
|
9892
|
+
"model_lang": [
|
|
9893
|
+
"en",
|
|
9894
|
+
"zh"
|
|
9895
|
+
],
|
|
9896
|
+
"model_ability": [
|
|
9897
|
+
"chat"
|
|
9898
|
+
],
|
|
9899
|
+
"model_description": "We release the final version of Skywork-OR1 (Open Reasoner 1) series of models, including",
|
|
9900
|
+
"model_specs": [
|
|
9901
|
+
{
|
|
9902
|
+
"model_format": "pytorch",
|
|
9903
|
+
"model_size_in_billions": 32,
|
|
9904
|
+
"quantizations": [
|
|
9905
|
+
"none"
|
|
9906
|
+
],
|
|
9907
|
+
"model_id": "Skywork/Skywork-OR1-32B"
|
|
9908
|
+
},
|
|
9909
|
+
{
|
|
9910
|
+
"model_format": "gptq",
|
|
9911
|
+
"model_size_in_billions": 32,
|
|
9912
|
+
"quantizations": [
|
|
9913
|
+
"Int8",
|
|
9914
|
+
"Int4"
|
|
9915
|
+
],
|
|
9916
|
+
"model_id": "JunHowie/Skywork-OR1-32B-GPTQ-{quantization}"
|
|
9917
|
+
},
|
|
9918
|
+
{
|
|
9919
|
+
"model_format": "pytorch",
|
|
9920
|
+
"model_size_in_billions": 7,
|
|
9921
|
+
"quantizations": [
|
|
9922
|
+
"none"
|
|
9923
|
+
],
|
|
9924
|
+
"model_id": "Skywork/Skywork-OR1-7B"
|
|
9925
|
+
},
|
|
9926
|
+
{
|
|
9927
|
+
"model_format": "gptq",
|
|
9928
|
+
"model_size_in_billions": 7,
|
|
9929
|
+
"quantizations": [
|
|
9930
|
+
"Int8",
|
|
9931
|
+
"Int4"
|
|
9932
|
+
],
|
|
9933
|
+
"model_id": "JunHowie/Skywork-OR1-7B-GPTQ-{quantization}"
|
|
9934
|
+
}
|
|
9935
|
+
],
|
|
9936
|
+
"chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='') %}{%- for message in messages %}{%- if message['role'] == 'system' %}{% set ns.system_prompt = message['content'] %}{%- endif %}{%- endfor %}{{bos_token}}{{ns.system_prompt}}{%- for message in messages %}{%- if message['role'] == 'user' %}{%- set ns.is_tool = false -%}{{'<|User|>' + message['content']}}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is none %}{%- set ns.is_tool = false -%}{%- for tool in message['tool_calls']%}{%- if not ns.is_first %}{{'<|Assistant|><|tool▁calls▁begin|><|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<|tool▁call▁end|>'}}{%- set ns.is_first = true -%}{%- else %}{{'\\n' + '<|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<|tool▁call▁end|>'}}{{'<|tool▁calls▁end|><|end▁of▁sentence|>'}}{%- endif %}{%- endfor %}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is not none %}{%- if ns.is_tool %}{{'<|tool▁outputs▁end|>' + message['content'] + '<|end▁of▁sentence|>'}}{%- set ns.is_tool = false -%}{%- else %}{% set content = message['content'] %}{% if '</think>' in content %}{% set content = content.split('</think>')[-1] %}{% endif %}{{'<|Assistant|>' + content + '<|end▁of▁sentence|>'}}{%- endif %}{%- endif %}{%- if message['role'] == 'tool' %}{%- set ns.is_tool = true -%}{%- if ns.is_output_first %}{{'<|tool▁outputs▁begin|><|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- set ns.is_output_first = false %}{%- else %}{{'\\n<|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- endif %}{%- endif %}{%- endfor -%}{% if ns.is_tool %}{{'<|tool▁outputs▁end|>'}}{% endif %}{% if add_generation_prompt and not ns.is_tool %}{{'<|Assistant|>'}}{% endif %}",
|
|
9937
|
+
"stop_token_ids": [
|
|
9938
|
+
151643
|
|
9939
|
+
],
|
|
9940
|
+
"stop": [
|
|
9941
|
+
"<|end▁of▁sentence|>"
|
|
9942
|
+
],
|
|
9943
|
+
"reasoning_start_tag": "<think>",
|
|
9944
|
+
"reasoning_end_tag": "</think>"
|
|
9945
|
+
},
|
|
9590
9946
|
{
|
|
9591
9947
|
"version": 1,
|
|
9592
9948
|
"context_length": 40960,
|
|
@@ -9598,6 +9954,7 @@
|
|
|
9598
9954
|
"model_ability": [
|
|
9599
9955
|
"chat",
|
|
9600
9956
|
"reasoning",
|
|
9957
|
+
"hybrid",
|
|
9601
9958
|
"tools"
|
|
9602
9959
|
],
|
|
9603
9960
|
"model_description": "Qwen3 is the latest generation of large language models in Qwen series, offering a comprehensive suite of dense and mixture-of-experts (MoE) models. Built upon extensive training, Qwen3 delivers groundbreaking advancements in reasoning, instruction-following, agent capabilities, and multilingual support",
|
|
@@ -9622,10 +9979,29 @@
|
|
|
9622
9979
|
"model_format": "gptq",
|
|
9623
9980
|
"model_size_in_billions": "0_6",
|
|
9624
9981
|
"quantizations": [
|
|
9625
|
-
"Int4",
|
|
9626
9982
|
"Int8"
|
|
9627
9983
|
],
|
|
9628
|
-
"model_id": "
|
|
9984
|
+
"model_id": "Qwen/Qwen3-0.6B-GPTQ-Int8"
|
|
9985
|
+
},
|
|
9986
|
+
{
|
|
9987
|
+
"model_format": "gptq",
|
|
9988
|
+
"model_size_in_billions": "0_6",
|
|
9989
|
+
"quantizations": [
|
|
9990
|
+
"Int4"
|
|
9991
|
+
],
|
|
9992
|
+
"model_id": "JunHowie/Qwen3-0.6B-GPTQ-Int4"
|
|
9993
|
+
},
|
|
9994
|
+
{
|
|
9995
|
+
"model_format": "mlx",
|
|
9996
|
+
"model_size_in_billions": "0_6",
|
|
9997
|
+
"quantizations": [
|
|
9998
|
+
"3bit",
|
|
9999
|
+
"4bit",
|
|
10000
|
+
"6bit",
|
|
10001
|
+
"8bit",
|
|
10002
|
+
"bf16"
|
|
10003
|
+
],
|
|
10004
|
+
"model_id": "mlx-community/Qwen3-0.6B-{quantization}"
|
|
9629
10005
|
},
|
|
9630
10006
|
{
|
|
9631
10007
|
"model_format": "ggufv2",
|
|
@@ -9679,10 +10055,29 @@
|
|
|
9679
10055
|
"model_format": "gptq",
|
|
9680
10056
|
"model_size_in_billions": "1_7",
|
|
9681
10057
|
"quantizations": [
|
|
9682
|
-
"Int4",
|
|
9683
10058
|
"Int8"
|
|
9684
10059
|
],
|
|
9685
|
-
"model_id": "
|
|
10060
|
+
"model_id": "Qwen/Qwen3-1.7B-GPTQ-Int8"
|
|
10061
|
+
},
|
|
10062
|
+
{
|
|
10063
|
+
"model_format": "gptq",
|
|
10064
|
+
"model_size_in_billions": "1_7",
|
|
10065
|
+
"quantizations": [
|
|
10066
|
+
"Int4"
|
|
10067
|
+
],
|
|
10068
|
+
"model_id": "JunHowie/Qwen3-1.7B-GPTQ-Int4"
|
|
10069
|
+
},
|
|
10070
|
+
{
|
|
10071
|
+
"model_format": "mlx",
|
|
10072
|
+
"model_size_in_billions": "1_7",
|
|
10073
|
+
"quantizations": [
|
|
10074
|
+
"3bit",
|
|
10075
|
+
"4bit",
|
|
10076
|
+
"6bit",
|
|
10077
|
+
"8bit",
|
|
10078
|
+
"bf16"
|
|
10079
|
+
],
|
|
10080
|
+
"model_id": "mlx-community/Qwen3-1.7B-{quantization}"
|
|
9686
10081
|
},
|
|
9687
10082
|
{
|
|
9688
10083
|
"model_format": "ggufv2",
|
|
@@ -9732,6 +10127,14 @@
|
|
|
9732
10127
|
],
|
|
9733
10128
|
"model_id": "Qwen/Qwen3-4B-FP8"
|
|
9734
10129
|
},
|
|
10130
|
+
{
|
|
10131
|
+
"model_format": "awq",
|
|
10132
|
+
"model_size_in_billions": 4,
|
|
10133
|
+
"quantizations": [
|
|
10134
|
+
"Int4"
|
|
10135
|
+
],
|
|
10136
|
+
"model_id": "Qwen/Qwen3-4B-AWQ"
|
|
10137
|
+
},
|
|
9735
10138
|
{
|
|
9736
10139
|
"model_format": "gptq",
|
|
9737
10140
|
"model_size_in_billions": 4,
|
|
@@ -9741,6 +10144,18 @@
|
|
|
9741
10144
|
],
|
|
9742
10145
|
"model_id": "JunHowie/Qwen3-4B-GPTQ-{quantization}"
|
|
9743
10146
|
},
|
|
10147
|
+
{
|
|
10148
|
+
"model_format": "mlx",
|
|
10149
|
+
"model_size_in_billions": 4,
|
|
10150
|
+
"quantizations": [
|
|
10151
|
+
"3bit",
|
|
10152
|
+
"4bit",
|
|
10153
|
+
"6bit",
|
|
10154
|
+
"8bit",
|
|
10155
|
+
"bf16"
|
|
10156
|
+
],
|
|
10157
|
+
"model_id": "mlx-community/Qwen3-4B-{quantization}"
|
|
10158
|
+
},
|
|
9744
10159
|
{
|
|
9745
10160
|
"model_format": "ggufv2",
|
|
9746
10161
|
"model_size_in_billions": 4,
|
|
@@ -9789,6 +10204,14 @@
|
|
|
9789
10204
|
],
|
|
9790
10205
|
"model_id": "Qwen/Qwen3-8B-FP8"
|
|
9791
10206
|
},
|
|
10207
|
+
{
|
|
10208
|
+
"model_format": "awq",
|
|
10209
|
+
"model_size_in_billions": 8,
|
|
10210
|
+
"quantizations": [
|
|
10211
|
+
"Int4"
|
|
10212
|
+
],
|
|
10213
|
+
"model_id": "Qwen/Qwen3-8B-AWQ"
|
|
10214
|
+
},
|
|
9792
10215
|
{
|
|
9793
10216
|
"model_format": "gptq",
|
|
9794
10217
|
"model_size_in_billions": 8,
|
|
@@ -9798,6 +10221,18 @@
|
|
|
9798
10221
|
],
|
|
9799
10222
|
"model_id": "JunHowie/Qwen3-8B-GPTQ-{quantization}"
|
|
9800
10223
|
},
|
|
10224
|
+
{
|
|
10225
|
+
"model_format": "mlx",
|
|
10226
|
+
"model_size_in_billions": 8,
|
|
10227
|
+
"quantizations": [
|
|
10228
|
+
"3bit",
|
|
10229
|
+
"4bit",
|
|
10230
|
+
"6bit",
|
|
10231
|
+
"8bit",
|
|
10232
|
+
"bf16"
|
|
10233
|
+
],
|
|
10234
|
+
"model_id": "mlx-community/Qwen3-8B-{quantization}"
|
|
10235
|
+
},
|
|
9801
10236
|
{
|
|
9802
10237
|
"model_format": "ggufv2",
|
|
9803
10238
|
"model_size_in_billions": 8,
|
|
@@ -9846,6 +10281,14 @@
|
|
|
9846
10281
|
],
|
|
9847
10282
|
"model_id": "Qwen/Qwen3-14B-FP8"
|
|
9848
10283
|
},
|
|
10284
|
+
{
|
|
10285
|
+
"model_format": "awq",
|
|
10286
|
+
"model_size_in_billions": 14,
|
|
10287
|
+
"quantizations": [
|
|
10288
|
+
"Int4"
|
|
10289
|
+
],
|
|
10290
|
+
"model_id": "Qwen/Qwen3-14B-AWQ"
|
|
10291
|
+
},
|
|
9849
10292
|
{
|
|
9850
10293
|
"model_format": "gptq",
|
|
9851
10294
|
"model_size_in_billions": 14,
|
|
@@ -9855,6 +10298,18 @@
|
|
|
9855
10298
|
],
|
|
9856
10299
|
"model_id": "JunHowie/Qwen3-14B-GPTQ-{quantization}"
|
|
9857
10300
|
},
|
|
10301
|
+
{
|
|
10302
|
+
"model_format": "mlx",
|
|
10303
|
+
"model_size_in_billions": 14,
|
|
10304
|
+
"quantizations": [
|
|
10305
|
+
"3bit",
|
|
10306
|
+
"4bit",
|
|
10307
|
+
"6bit",
|
|
10308
|
+
"8bit",
|
|
10309
|
+
"bf16"
|
|
10310
|
+
],
|
|
10311
|
+
"model_id": "mlx-community/Qwen3-14B-{quantization}"
|
|
10312
|
+
},
|
|
9858
10313
|
{
|
|
9859
10314
|
"model_format": "ggufv2",
|
|
9860
10315
|
"model_size_in_billions": 14,
|
|
@@ -9910,10 +10365,30 @@
|
|
|
9910
10365
|
"model_size_in_billions": 30,
|
|
9911
10366
|
"activated_size_in_billions": 3,
|
|
9912
10367
|
"quantizations": [
|
|
9913
|
-
"Int4",
|
|
9914
10368
|
"Int8"
|
|
9915
10369
|
],
|
|
9916
|
-
"model_id": "JunHowie/Qwen3-30B-A3B-GPTQ-
|
|
10370
|
+
"model_id": "JunHowie/Qwen3-30B-A3B-GPTQ-Int8"
|
|
10371
|
+
},
|
|
10372
|
+
{
|
|
10373
|
+
"model_format": "gptq",
|
|
10374
|
+
"model_size_in_billions": 30,
|
|
10375
|
+
"activated_size_in_billions": 3,
|
|
10376
|
+
"quantizations": [
|
|
10377
|
+
"Int4"
|
|
10378
|
+
],
|
|
10379
|
+
"model_id": "Qwen/Qwen3-30B-A3B-GPTQ-Int4"
|
|
10380
|
+
},
|
|
10381
|
+
{
|
|
10382
|
+
"model_format": "mlx",
|
|
10383
|
+
"model_size_in_billions": 30,
|
|
10384
|
+
"activated_size_in_billions": 3,
|
|
10385
|
+
"quantizations": [
|
|
10386
|
+
"4bit",
|
|
10387
|
+
"6bit",
|
|
10388
|
+
"8bit",
|
|
10389
|
+
"bf16"
|
|
10390
|
+
],
|
|
10391
|
+
"model_id": "mlx-community/Qwen3-30B-A3B-{quantization}"
|
|
9917
10392
|
},
|
|
9918
10393
|
{
|
|
9919
10394
|
"model_format": "ggufv2",
|
|
@@ -9971,7 +10446,15 @@
|
|
|
9971
10446
|
],
|
|
9972
10447
|
"model_id": "Qwen/Qwen3-32B-FP8"
|
|
9973
10448
|
},
|
|
9974
|
-
|
|
10449
|
+
{
|
|
10450
|
+
"model_format": "awq",
|
|
10451
|
+
"model_size_in_billions": 32,
|
|
10452
|
+
"quantizations": [
|
|
10453
|
+
"Int4"
|
|
10454
|
+
],
|
|
10455
|
+
"model_id": "Qwen/Qwen3-32B-AWQ"
|
|
10456
|
+
},
|
|
10457
|
+
{
|
|
9975
10458
|
"model_format": "gptq",
|
|
9976
10459
|
"model_size_in_billions": 32,
|
|
9977
10460
|
"quantizations": [
|
|
@@ -9980,6 +10463,17 @@
|
|
|
9980
10463
|
],
|
|
9981
10464
|
"model_id": "JunHowie/Qwen3-32B-GPTQ-{quantization}"
|
|
9982
10465
|
},
|
|
10466
|
+
{
|
|
10467
|
+
"model_format": "mlx",
|
|
10468
|
+
"model_size_in_billions": 32,
|
|
10469
|
+
"quantizations": [
|
|
10470
|
+
"4bit",
|
|
10471
|
+
"6bit",
|
|
10472
|
+
"8bit",
|
|
10473
|
+
"bf16"
|
|
10474
|
+
],
|
|
10475
|
+
"model_id": "mlx-community/Qwen3-32B-{quantization}"
|
|
10476
|
+
},
|
|
9983
10477
|
{
|
|
9984
10478
|
"model_format": "ggufv2",
|
|
9985
10479
|
"model_size_in_billions": 32,
|
|
@@ -10026,7 +10520,7 @@
|
|
|
10026
10520
|
"quantizations": [
|
|
10027
10521
|
"none"
|
|
10028
10522
|
],
|
|
10029
|
-
"model_id": "Qwen/Qwen3-235B"
|
|
10523
|
+
"model_id": "Qwen/Qwen3-235B-A22B"
|
|
10030
10524
|
},
|
|
10031
10525
|
{
|
|
10032
10526
|
"model_format": "fp8",
|
|
@@ -10035,7 +10529,36 @@
|
|
|
10035
10529
|
"quantizations": [
|
|
10036
10530
|
"fp8"
|
|
10037
10531
|
],
|
|
10038
|
-
"model_id": "Qwen/Qwen3-235B-FP8"
|
|
10532
|
+
"model_id": "Qwen/Qwen3-235B-A22B-FP8"
|
|
10533
|
+
},
|
|
10534
|
+
{
|
|
10535
|
+
"model_format": "gptq",
|
|
10536
|
+
"model_size_in_billions": 235,
|
|
10537
|
+
"activated_size_in_billions": 22,
|
|
10538
|
+
"quantizations": [
|
|
10539
|
+
"Int8"
|
|
10540
|
+
],
|
|
10541
|
+
"model_id": "QuantTrio/Qwen3-235B-A22B-GPTQ-Int8"
|
|
10542
|
+
},
|
|
10543
|
+
{
|
|
10544
|
+
"model_format": "gptq",
|
|
10545
|
+
"model_size_in_billions": 235,
|
|
10546
|
+
"activated_size_in_billions": 22,
|
|
10547
|
+
"quantizations": [
|
|
10548
|
+
"Int4"
|
|
10549
|
+
],
|
|
10550
|
+
"model_id": "Qwen/Qwen3-235B-A22B-GPTQ-Int4"
|
|
10551
|
+
},
|
|
10552
|
+
{
|
|
10553
|
+
"model_format": "mlx",
|
|
10554
|
+
"model_size_in_billions": 235,
|
|
10555
|
+
"activated_size_in_billions": 22,
|
|
10556
|
+
"quantizations": [
|
|
10557
|
+
"3bit",
|
|
10558
|
+
"4bit",
|
|
10559
|
+
"8bit"
|
|
10560
|
+
],
|
|
10561
|
+
"model_id": "mlx-community/Qwen/Qwen3-235B-A22B-{quantization}"
|
|
10039
10562
|
},
|
|
10040
10563
|
{
|
|
10041
10564
|
"model_format": "ggufv2",
|
|
@@ -10149,6 +10672,7 @@
|
|
|
10149
10672
|
"virtualenv": {
|
|
10150
10673
|
"packages": [
|
|
10151
10674
|
"transformers>=4.51.0",
|
|
10675
|
+
"mlx-lm>=0.24.0 ; sys_platform=='darwin'",
|
|
10152
10676
|
"numpy==1.26.4"
|
|
10153
10677
|
]
|
|
10154
10678
|
}
|