xinference 1.0.1__py3-none-any.whl → 1.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of xinference might be problematic. Click here for more details.

Files changed (87) hide show
  1. xinference/_version.py +3 -3
  2. xinference/api/restful_api.py +5 -5
  3. xinference/core/model.py +6 -1
  4. xinference/deploy/cmdline.py +3 -1
  5. xinference/deploy/test/test_cmdline.py +56 -0
  6. xinference/isolation.py +24 -0
  7. xinference/model/audio/core.py +5 -0
  8. xinference/model/audio/f5tts.py +195 -0
  9. xinference/model/audio/fish_speech.py +2 -1
  10. xinference/model/audio/model_spec.json +8 -0
  11. xinference/model/audio/model_spec_modelscope.json +9 -0
  12. xinference/model/embedding/core.py +203 -142
  13. xinference/model/embedding/model_spec.json +7 -0
  14. xinference/model/embedding/model_spec_modelscope.json +8 -0
  15. xinference/model/llm/__init__.py +2 -2
  16. xinference/model/llm/llm_family.json +172 -53
  17. xinference/model/llm/llm_family_modelscope.json +118 -20
  18. xinference/model/llm/mlx/core.py +230 -49
  19. xinference/model/llm/sglang/core.py +1 -0
  20. xinference/model/llm/transformers/chatglm.py +9 -5
  21. xinference/model/llm/transformers/utils.py +16 -8
  22. xinference/model/llm/utils.py +4 -1
  23. xinference/model/llm/vllm/core.py +5 -0
  24. xinference/thirdparty/f5_tts/__init__.py +0 -0
  25. xinference/thirdparty/f5_tts/api.py +166 -0
  26. xinference/thirdparty/f5_tts/configs/E2TTS_Base_train.yaml +44 -0
  27. xinference/thirdparty/f5_tts/configs/E2TTS_Small_train.yaml +44 -0
  28. xinference/thirdparty/f5_tts/configs/F5TTS_Base_train.yaml +46 -0
  29. xinference/thirdparty/f5_tts/configs/F5TTS_Small_train.yaml +46 -0
  30. xinference/thirdparty/f5_tts/eval/README.md +49 -0
  31. xinference/thirdparty/f5_tts/eval/ecapa_tdnn.py +330 -0
  32. xinference/thirdparty/f5_tts/eval/eval_infer_batch.py +207 -0
  33. xinference/thirdparty/f5_tts/eval/eval_infer_batch.sh +13 -0
  34. xinference/thirdparty/f5_tts/eval/eval_librispeech_test_clean.py +84 -0
  35. xinference/thirdparty/f5_tts/eval/eval_seedtts_testset.py +84 -0
  36. xinference/thirdparty/f5_tts/eval/utils_eval.py +405 -0
  37. xinference/thirdparty/f5_tts/infer/README.md +191 -0
  38. xinference/thirdparty/f5_tts/infer/SHARED.md +74 -0
  39. xinference/thirdparty/f5_tts/infer/examples/basic/basic.toml +11 -0
  40. xinference/thirdparty/f5_tts/infer/examples/basic/basic_ref_en.wav +0 -0
  41. xinference/thirdparty/f5_tts/infer/examples/basic/basic_ref_zh.wav +0 -0
  42. xinference/thirdparty/f5_tts/infer/examples/multi/country.flac +0 -0
  43. xinference/thirdparty/f5_tts/infer/examples/multi/main.flac +0 -0
  44. xinference/thirdparty/f5_tts/infer/examples/multi/story.toml +19 -0
  45. xinference/thirdparty/f5_tts/infer/examples/multi/story.txt +1 -0
  46. xinference/thirdparty/f5_tts/infer/examples/multi/town.flac +0 -0
  47. xinference/thirdparty/f5_tts/infer/examples/vocab.txt +2545 -0
  48. xinference/thirdparty/f5_tts/infer/infer_cli.py +226 -0
  49. xinference/thirdparty/f5_tts/infer/infer_gradio.py +851 -0
  50. xinference/thirdparty/f5_tts/infer/speech_edit.py +193 -0
  51. xinference/thirdparty/f5_tts/infer/utils_infer.py +538 -0
  52. xinference/thirdparty/f5_tts/model/__init__.py +10 -0
  53. xinference/thirdparty/f5_tts/model/backbones/README.md +20 -0
  54. xinference/thirdparty/f5_tts/model/backbones/dit.py +163 -0
  55. xinference/thirdparty/f5_tts/model/backbones/mmdit.py +146 -0
  56. xinference/thirdparty/f5_tts/model/backbones/unett.py +219 -0
  57. xinference/thirdparty/f5_tts/model/cfm.py +285 -0
  58. xinference/thirdparty/f5_tts/model/dataset.py +319 -0
  59. xinference/thirdparty/f5_tts/model/modules.py +658 -0
  60. xinference/thirdparty/f5_tts/model/trainer.py +366 -0
  61. xinference/thirdparty/f5_tts/model/utils.py +185 -0
  62. xinference/thirdparty/f5_tts/scripts/count_max_epoch.py +33 -0
  63. xinference/thirdparty/f5_tts/scripts/count_params_gflops.py +39 -0
  64. xinference/thirdparty/f5_tts/socket_server.py +159 -0
  65. xinference/thirdparty/f5_tts/train/README.md +77 -0
  66. xinference/thirdparty/f5_tts/train/datasets/prepare_csv_wavs.py +139 -0
  67. xinference/thirdparty/f5_tts/train/datasets/prepare_emilia.py +230 -0
  68. xinference/thirdparty/f5_tts/train/datasets/prepare_libritts.py +92 -0
  69. xinference/thirdparty/f5_tts/train/datasets/prepare_ljspeech.py +65 -0
  70. xinference/thirdparty/f5_tts/train/datasets/prepare_wenetspeech4tts.py +125 -0
  71. xinference/thirdparty/f5_tts/train/finetune_cli.py +174 -0
  72. xinference/thirdparty/f5_tts/train/finetune_gradio.py +1846 -0
  73. xinference/thirdparty/f5_tts/train/train.py +75 -0
  74. xinference/web/ui/build/asset-manifest.json +3 -3
  75. xinference/web/ui/build/index.html +1 -1
  76. xinference/web/ui/build/static/js/{main.2f269bb3.js → main.4eb4ee80.js} +3 -3
  77. xinference/web/ui/build/static/js/main.4eb4ee80.js.map +1 -0
  78. xinference/web/ui/node_modules/.cache/babel-loader/8c5eeb02f772d02cbe8b89c05428d0dd41a97866f75f7dc1c2164a67f5a1cf98.json +1 -0
  79. {xinference-1.0.1.dist-info → xinference-1.1.0.dist-info}/METADATA +33 -14
  80. {xinference-1.0.1.dist-info → xinference-1.1.0.dist-info}/RECORD +85 -34
  81. xinference/web/ui/build/static/js/main.2f269bb3.js.map +0 -1
  82. xinference/web/ui/node_modules/.cache/babel-loader/bd6ad8159341315a1764c397621a560809f7eb7219ab5174c801fca7e969d943.json +0 -1
  83. /xinference/web/ui/build/static/js/{main.2f269bb3.js.LICENSE.txt → main.4eb4ee80.js.LICENSE.txt} +0 -0
  84. {xinference-1.0.1.dist-info → xinference-1.1.0.dist-info}/LICENSE +0 -0
  85. {xinference-1.0.1.dist-info → xinference-1.1.0.dist-info}/WHEEL +0 -0
  86. {xinference-1.0.1.dist-info → xinference-1.1.0.dist-info}/entry_points.txt +0 -0
  87. {xinference-1.0.1.dist-info → xinference-1.1.0.dist-info}/top_level.txt +0 -0
@@ -205,8 +205,8 @@
205
205
  "8-bit",
206
206
  "none"
207
207
  ],
208
- "model_id": "THUDM/glm-4-9b-chat",
209
- "model_revision": "eb55a443d66541f30869f6caac5ad0d2e95bcbaa"
208
+ "model_id": "THUDM/glm-4-9b-chat-hf",
209
+ "model_revision": "c7f73fd9e0f378c87f3c8f2c25aec6ad705043cd"
210
210
  },
211
211
  {
212
212
  "model_format": "ggufv2",
@@ -269,8 +269,8 @@
269
269
  "8-bit",
270
270
  "none"
271
271
  ],
272
- "model_id": "THUDM/glm-4-9b-chat-1m",
273
- "model_revision": "0aa722c7e0745dd21453427dd44c257dd253304f"
272
+ "model_id": "THUDM/glm-4-9b-chat-1m-hf",
273
+ "model_revision": "0588cb62942f0f0a5545c695e5c1b019d64eabdc"
274
274
  },
275
275
  {
276
276
  "model_format": "ggufv2",
@@ -952,7 +952,7 @@
952
952
  "model_format": "mlx",
953
953
  "model_size_in_billions": 8,
954
954
  "quantizations": [
955
- "4-bit"
955
+ "4bit"
956
956
  ],
957
957
  "model_id": "mlx-community/Meta-Llama-3-8B-Instruct-4bit"
958
958
  },
@@ -960,7 +960,7 @@
960
960
  "model_format": "mlx",
961
961
  "model_size_in_billions": 8,
962
962
  "quantizations": [
963
- "8-bit"
963
+ "8bit"
964
964
  ],
965
965
  "model_id": "mlx-community/Meta-Llama-3-8B-Instruct-8bit"
966
966
  },
@@ -976,7 +976,7 @@
976
976
  "model_format": "mlx",
977
977
  "model_size_in_billions": 70,
978
978
  "quantizations": [
979
- "4-bit"
979
+ "4bit"
980
980
  ],
981
981
  "model_id": "mlx-community/Meta-Llama-3-70B-Instruct-4bit-mlx"
982
982
  },
@@ -984,7 +984,7 @@
984
984
  "model_format": "mlx",
985
985
  "model_size_in_billions": 70,
986
986
  "quantizations": [
987
- "8-bit"
987
+ "8bit"
988
988
  ],
989
989
  "model_id": "mlx-community/Meta-Llama-3-70B-Instruct-8bit"
990
990
  },
@@ -1229,7 +1229,7 @@
1229
1229
  "model_format": "mlx",
1230
1230
  "model_size_in_billions": 8,
1231
1231
  "quantizations": [
1232
- "4-bit"
1232
+ "4bit"
1233
1233
  ],
1234
1234
  "model_id": "mlx-community/Meta-Llama-3.1-8B-Instruct-4bit"
1235
1235
  },
@@ -1237,7 +1237,7 @@
1237
1237
  "model_format": "mlx",
1238
1238
  "model_size_in_billions": 8,
1239
1239
  "quantizations": [
1240
- "8-bit"
1240
+ "8bit"
1241
1241
  ],
1242
1242
  "model_id": "mlx-community/Meta-Llama-3.1-8B-Instruct-8bit"
1243
1243
  },
@@ -1253,7 +1253,7 @@
1253
1253
  "model_format": "mlx",
1254
1254
  "model_size_in_billions": 70,
1255
1255
  "quantizations": [
1256
- "4-bit"
1256
+ "4bit"
1257
1257
  ],
1258
1258
  "model_id": "mlx-community/Meta-Llama-3.1-70B-Instruct-4bit"
1259
1259
  },
@@ -1261,7 +1261,7 @@
1261
1261
  "model_format": "mlx",
1262
1262
  "model_size_in_billions": 70,
1263
1263
  "quantizations": [
1264
- "8-bit"
1264
+ "8bit"
1265
1265
  ],
1266
1266
  "model_id": "mlx-community/Meta-Llama-3.1-70B-Instruct-8bit"
1267
1267
  },
@@ -1399,6 +1399,98 @@
1399
1399
  }
1400
1400
  ]
1401
1401
  },
1402
+ {
1403
+ "version": 1,
1404
+ "context_length": 131072,
1405
+ "model_name": "llama-3.3-instruct",
1406
+ "model_lang": [
1407
+ "en",
1408
+ "de",
1409
+ "fr",
1410
+ "it",
1411
+ "pt",
1412
+ "hi",
1413
+ "es",
1414
+ "th"
1415
+ ],
1416
+ "model_ability": [
1417
+ "chat",
1418
+ "tools"
1419
+ ],
1420
+ "model_description": "The Llama 3.3 instruction tuned models are optimized for dialogue use cases and outperform many of the available open source chat models on common industry benchmarks..",
1421
+ "model_specs": [
1422
+ {
1423
+ "model_format": "pytorch",
1424
+ "model_size_in_billions": 70,
1425
+ "quantizations": [
1426
+ "none"
1427
+ ],
1428
+ "model_id": "meta-llama/Llama-3.3-70B-Instruct"
1429
+ },
1430
+ {
1431
+ "model_format": "gptq",
1432
+ "model_size_in_billions": 70,
1433
+ "quantizations": [
1434
+ "Int4"
1435
+ ],
1436
+ "model_id": "shuyuej/Llama-3.3-70B-Instruct-GPTQ"
1437
+ },
1438
+ {
1439
+ "model_format": "awq",
1440
+ "model_size_in_billions": 70,
1441
+ "quantizations": [
1442
+ "Int4"
1443
+ ],
1444
+ "model_id": "casperhansen/llama-3.3-70b-instruct-awq"
1445
+ },
1446
+ {
1447
+ "model_format": "mlx",
1448
+ "model_size_in_billions": 70,
1449
+ "quantizations": [
1450
+ "3bit",
1451
+ "4bit",
1452
+ "6bit",
1453
+ "8bit",
1454
+ "fp16"
1455
+ ],
1456
+ "model_id": "mlx-community/Llama-3.3-70B-Instruct-{quantization}"
1457
+ },
1458
+ {
1459
+ "model_format": "ggufv2",
1460
+ "model_size_in_billions": 70,
1461
+ "quantizations": [
1462
+ "Q3_K_L",
1463
+ "Q4_K_M",
1464
+ "Q6_K",
1465
+ "Q8_0"
1466
+ ],
1467
+ "quantization_parts": {
1468
+ "Q6_K": [
1469
+ "00001-of-00002",
1470
+ "00002-of-00002"
1471
+ ],
1472
+ "Q8_0": [
1473
+ "00001-of-00002",
1474
+ "00002-of-00002"
1475
+ ]
1476
+ },
1477
+ "model_id": "lmstudio-community/Llama-3.3-70B-Instruct-GGUF",
1478
+ "model_file_name_template": "Llama-3.3-70B-Instruct-{quantization}.gguf",
1479
+ "model_file_name_split_template": "Llama-3.3-70B-Instruct-{quantization}-{part}.gguf"
1480
+ }
1481
+ ],
1482
+ "chat_template": "{{- bos_token }}\n{%- if custom_tools is defined %}\n {%- set tools = custom_tools %}\n{%- endif %}\n{%- if not tools_in_user_message is defined %}\n {%- set tools_in_user_message = true %}\n{%- endif %}\n{%- if not date_string is defined %}\n {%- set date_string = \"26 Jul 2024\" %}\n{%- endif %}\n{%- if not tools is defined %}\n {%- set tools = none %}\n{%- endif %}\n\n{#- This block extracts the system message, so we can slot it into the right place. #}\n{%- if messages[0]['role'] == 'system' %}\n {%- set system_message = messages[0]['content']|trim %}\n {%- set messages = messages[1:] %}\n{%- else %}\n {%- set system_message = \"\" %}\n{%- endif %}\n\n{#- System message + builtin tools #}\n{{- \"<|start_header_id|>system<|end_header_id|>\\n\\n\" }}\n{%- if builtin_tools is defined or tools is not none %}\n {{- \"Environment: ipython\\n\" }}\n{%- endif %}\n{%- if builtin_tools is defined %}\n {{- \"Tools: \" + builtin_tools | reject('equalto', 'code_interpreter') | join(\", \") + \"\\n\\n\"}}\n{%- endif %}\n{{- \"Cutting Knowledge Date: December 2023\\n\" }}\n{{- \"Today Date: \" + date_string + \"\\n\\n\" }}\n{%- if tools is not none and not tools_in_user_message %}\n {{- \"You have access to the following functions. To call a function, please respond with JSON for a function call.\" }}\n {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n {{- \"Do not use variables.\\n\\n\" }}\n {%- for t in tools %}\n {{- t | tojson(indent=4) }}\n {{- \"\\n\\n\" }}\n {%- endfor %}\n{%- endif %}\n{{- system_message }}\n{{- \"<|eot_id|>\" }}\n\n{#- Custom tools are passed in a user message with some extra guidance #}\n{%- if tools_in_user_message and not tools is none %}\n {#- Extract the first user message so we can plug it in here #}\n {%- if messages | length != 0 %}\n {%- set first_user_message = messages[0]['content']|trim %}\n {%- set messages = messages[1:] %}\n {%- else %}\n {{- raise_exception(\"Cannot put tools in the first user message when there's no first user message!\") }}\n{%- endif %}\n {{- '<|start_header_id|>user<|end_header_id|>\\n\\n' -}}\n {{- \"Given the following functions, please respond with a JSON for a function call \" }}\n {{- \"with its proper arguments that best answers the given prompt.\\n\\n\" }}\n {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n {{- \"Do not use variables.\\n\\n\" }}\n {%- for t in tools %}\n {{- t | tojson(indent=4) }}\n {{- \"\\n\\n\" }}\n {%- endfor %}\n {{- first_user_message + \"<|eot_id|>\"}}\n{%- endif %}\n\n{%- for message in messages %}\n {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %}\n {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\\n\\n'+ message['content'] | trim + '<|eot_id|>' }}\n {%- elif 'tool_calls' in message %}\n {%- if not message.tool_calls|length == 1 %}\n {{- raise_exception(\"This model only supports single tool-calls at once!\") }}\n {%- endif %}\n {%- set tool_call = message.tool_calls[0].function %}\n {%- if builtin_tools is defined and tool_call.name in builtin_tools %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' -}}\n {{- \"<|python_tag|>\" + tool_call.name + \".call(\" }}\n {%- for arg_name, arg_val in tool_call.arguments | items %}\n {{- arg_name + '=\"' + arg_val + '\"' }}\n {%- if not loop.last %}\n {{- \", \" }}\n {%- endif %}\n {%- endfor %}\n {{- \")\" }}\n {%- else %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' -}}\n {{- '{\"name\": \"' + tool_call.name + '\", ' }}\n {{- '\"parameters\": ' }}\n {{- tool_call.arguments | tojson }}\n {{- \"}\" }}\n {%- endif %}\n {%- if builtin_tools is defined %}\n {#- This means we're in ipython mode #}\n {{- \"<|eom_id|>\" }}\n {%- else %}\n {{- \"<|eot_id|>\" }}\n {%- endif %}\n {%- elif message.role == \"tool\" or message.role == \"ipython\" %}\n {{- \"<|start_header_id|>ipython<|end_header_id|>\\n\\n\" }}\n {%- if message.content is mapping or message.content is iterable %}\n {{- message.content | tojson }}\n {%- else %}\n {{- message.content }}\n {%- endif %}\n {{- \"<|eot_id|>\" }}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' }}\n{%- endif %}\n",
1483
+ "stop_token_ids": [
1484
+ 128001,
1485
+ 128008,
1486
+ 128009
1487
+ ],
1488
+ "stop": [
1489
+ "<|end_of_text|>",
1490
+ "<|eot_id|>",
1491
+ "<|eom_id|>"
1492
+ ]
1493
+ },
1402
1494
  {
1403
1495
  "version": 1,
1404
1496
  "context_length": 2048,
@@ -2199,7 +2291,7 @@
2199
2291
  "model_format": "mlx",
2200
2292
  "model_size_in_billions": "0_5",
2201
2293
  "quantizations": [
2202
- "4-bit"
2294
+ "4bit"
2203
2295
  ],
2204
2296
  "model_id": "Qwen/Qwen2-0.5B-Instruct-MLX"
2205
2297
  },
@@ -2207,7 +2299,7 @@
2207
2299
  "model_format": "mlx",
2208
2300
  "model_size_in_billions": "1_5",
2209
2301
  "quantizations": [
2210
- "4-bit"
2302
+ "4bit"
2211
2303
  ],
2212
2304
  "model_id": "Qwen/Qwen2-1.5B-Instruct-MLX"
2213
2305
  },
@@ -2215,7 +2307,7 @@
2215
2307
  "model_format": "mlx",
2216
2308
  "model_size_in_billions": 7,
2217
2309
  "quantizations": [
2218
- "4-bit"
2310
+ "4bit"
2219
2311
  ],
2220
2312
  "model_id": "Qwen/Qwen2-7B-Instruct-MLX"
2221
2313
  },
@@ -2223,7 +2315,7 @@
2223
2315
  "model_format": "mlx",
2224
2316
  "model_size_in_billions": 72,
2225
2317
  "quantizations": [
2226
- "4-bit"
2318
+ "4bit"
2227
2319
  ],
2228
2320
  "model_id": "mlx-community/Qwen2-72B-Instruct-4bit"
2229
2321
  },
@@ -3222,7 +3314,7 @@
3222
3314
  "model_format": "mlx",
3223
3315
  "model_size_in_billions": 12,
3224
3316
  "quantizations": [
3225
- "4-bit"
3317
+ "4bit"
3226
3318
  ],
3227
3319
  "model_id": "mlx-community/Mistral-Nemo-Instruct-2407-4bit"
3228
3320
  },
@@ -3230,7 +3322,7 @@
3230
3322
  "model_format": "mlx",
3231
3323
  "model_size_in_billions": 12,
3232
3324
  "quantizations": [
3233
- "8-bit"
3325
+ "8bit"
3234
3326
  ],
3235
3327
  "model_id": "mlx-community/Mistral-Nemo-Instruct-2407-8bit"
3236
3328
  }
@@ -3370,7 +3462,7 @@
3370
3462
  "model_format": "mlx",
3371
3463
  "model_size_in_billions": 123,
3372
3464
  "quantizations": [
3373
- "4-bit"
3465
+ "4bit"
3374
3466
  ],
3375
3467
  "model_id": "mlx-community/Mistral-Large-Instruct-2407-4bit"
3376
3468
  },
@@ -3378,7 +3470,7 @@
3378
3470
  "model_format": "mlx",
3379
3471
  "model_size_in_billions": 123,
3380
3472
  "quantizations": [
3381
- "8-bit"
3473
+ "8bit"
3382
3474
  ],
3383
3475
  "model_id": "mlx-community/Mistral-Large-Instruct-2407-8bit"
3384
3476
  }
@@ -3436,7 +3528,7 @@
3436
3528
  "model_format": "mlx",
3437
3529
  "model_size_in_billions": 22,
3438
3530
  "quantizations": [
3439
- "4-bit"
3531
+ "4bit"
3440
3532
  ],
3441
3533
  "model_id": "mlx-community/Codestral-22B-v0.1-4bit",
3442
3534
  "model_revision": "544626b38eb1c9524f0fa570ec7b29550c26b78d"
@@ -3445,7 +3537,7 @@
3445
3537
  "model_format": "mlx",
3446
3538
  "model_size_in_billions": 22,
3447
3539
  "quantizations": [
3448
- "8-bit"
3540
+ "8bit"
3449
3541
  ],
3450
3542
  "model_id": "mlx-community/Codestral-22B-v0.1-8bit",
3451
3543
  "model_revision": "0399a53970663950d57010e61a2796af524a1588"
@@ -4170,7 +4262,7 @@
4170
4262
  "model_format": "mlx",
4171
4263
  "model_size_in_billions": 6,
4172
4264
  "quantizations": [
4173
- "4-bit"
4265
+ "4bit"
4174
4266
  ],
4175
4267
  "model_id": "mlx-community/Yi-1.5-6B-Chat-4bit",
4176
4268
  "model_revision": "0177c9a12b869d6bc73f772b5a1981a7c966adb6"
@@ -4179,7 +4271,7 @@
4179
4271
  "model_format": "mlx",
4180
4272
  "model_size_in_billions": 6,
4181
4273
  "quantizations": [
4182
- "8-bit"
4274
+ "8bit"
4183
4275
  ],
4184
4276
  "model_id": "mlx-community/Yi-1.5-6B-Chat-8bit",
4185
4277
  "model_revision": "7756e65d1bf1e2e6e97aef6bc9484307225f536b"
@@ -4188,7 +4280,7 @@
4188
4280
  "model_format": "mlx",
4189
4281
  "model_size_in_billions": 9,
4190
4282
  "quantizations": [
4191
- "4-bit"
4283
+ "4bit"
4192
4284
  ],
4193
4285
  "model_id": "mlx-community/Yi-1.5-9B-Chat-4bit",
4194
4286
  "model_revision": "e15f886479c44e7d90f0ac13ace69b2319b71c2f"
@@ -4197,7 +4289,7 @@
4197
4289
  "model_format": "mlx",
4198
4290
  "model_size_in_billions": 9,
4199
4291
  "quantizations": [
4200
- "8-bit"
4292
+ "8bit"
4201
4293
  ],
4202
4294
  "model_id": "mlx-community/Yi-1.5-9B-Chat-8bit",
4203
4295
  "model_revision": "c1f742fcf3683edbe2d2c2fd1ad7ac2bb6c5ca36"
@@ -4206,7 +4298,7 @@
4206
4298
  "model_format": "mlx",
4207
4299
  "model_size_in_billions": 34,
4208
4300
  "quantizations": [
4209
- "4-bit"
4301
+ "4bit"
4210
4302
  ],
4211
4303
  "model_id": "mlx-community/Yi-1.5-34B-Chat-4bit",
4212
4304
  "model_revision": "945e3b306ef37c46ab444fdc857d1f3ea7247374"
@@ -4215,7 +4307,7 @@
4215
4307
  "model_format": "mlx",
4216
4308
  "model_size_in_billions": 34,
4217
4309
  "quantizations": [
4218
- "8-bit"
4310
+ "8bit"
4219
4311
  ],
4220
4312
  "model_id": "mlx-community/Yi-1.5-34B-Chat-8bit",
4221
4313
  "model_revision": "3c12761a2c6663f216caab6dff84b0dd29b472ac"
@@ -5266,7 +5358,7 @@
5266
5358
  "model_format": "mlx",
5267
5359
  "model_size_in_billions": 7,
5268
5360
  "quantizations": [
5269
- "4-bit"
5361
+ "4bit"
5270
5362
  ],
5271
5363
  "model_id": "mlx-community/internlm2_5-7b-chat-4bit",
5272
5364
  "model_revision": "d12097a867721978142a6048399f470a3d18beee"
@@ -5275,7 +5367,7 @@
5275
5367
  "model_format": "mlx",
5276
5368
  "model_size_in_billions": 7,
5277
5369
  "quantizations": [
5278
- "8-bit"
5370
+ "8bit"
5279
5371
  ],
5280
5372
  "model_id": "mlx-community/internlm2_5-7b-chat-8bit",
5281
5373
  "model_revision": "0ec94d61d30ab161b49c69f9bf92ec2b9986d234"
@@ -5803,7 +5895,7 @@
5803
5895
  "model_format": "mlx",
5804
5896
  "model_size_in_billions": 2,
5805
5897
  "quantizations": [
5806
- "4-bit"
5898
+ "4bit"
5807
5899
  ],
5808
5900
  "model_id": "mlx-community/gemma-2-2b-it-4bit"
5809
5901
  },
@@ -5811,7 +5903,7 @@
5811
5903
  "model_format": "mlx",
5812
5904
  "model_size_in_billions": 2,
5813
5905
  "quantizations": [
5814
- "8-bit"
5906
+ "8bit"
5815
5907
  ],
5816
5908
  "model_id": "mlx-community/gemma-2-2b-it-8bit"
5817
5909
  },
@@ -5827,7 +5919,7 @@
5827
5919
  "model_format": "mlx",
5828
5920
  "model_size_in_billions": 9,
5829
5921
  "quantizations": [
5830
- "4-bit"
5922
+ "4bit"
5831
5923
  ],
5832
5924
  "model_id": "mlx-community/gemma-2-9b-it-4bit"
5833
5925
  },
@@ -5835,7 +5927,7 @@
5835
5927
  "model_format": "mlx",
5836
5928
  "model_size_in_billions": 9,
5837
5929
  "quantizations": [
5838
- "8-bit"
5930
+ "8bit"
5839
5931
  ],
5840
5932
  "model_id": "mlx-community/gemma-2-9b-it-8bit"
5841
5933
  },
@@ -5851,7 +5943,7 @@
5851
5943
  "model_format": "mlx",
5852
5944
  "model_size_in_billions": 27,
5853
5945
  "quantizations": [
5854
- "4-bit"
5946
+ "4bit"
5855
5947
  ],
5856
5948
  "model_id": "mlx-community/gemma-2-27b-it-4bit"
5857
5949
  },
@@ -5859,7 +5951,7 @@
5859
5951
  "model_format": "mlx",
5860
5952
  "model_size_in_billions": 27,
5861
5953
  "quantizations": [
5862
- "8-bit"
5954
+ "8bit"
5863
5955
  ],
5864
5956
  "model_id": "mlx-community/gemma-2-27b-it-8bit"
5865
5957
  },
@@ -6925,7 +7017,7 @@
6925
7017
  "model_id":"Qwen/Qwen2-VL-2B-Instruct-GPTQ-Int4",
6926
7018
  "model_revision":"800d396518c82960ce6d231adecd07bbc474f0a9"
6927
7019
  },
6928
- {
7020
+ {
6929
7021
  "model_format":"awq",
6930
7022
  "model_size_in_billions":2,
6931
7023
  "quantizations":[
@@ -6934,6 +7026,15 @@
6934
7026
  "model_id":"Qwen/Qwen2-VL-2B-Instruct-AWQ",
6935
7027
  "model_revision":"ea8c5854c0044e28626719292de0d9b1a671f6fc"
6936
7028
  },
7029
+ {
7030
+ "model_format":"mlx",
7031
+ "model_size_in_billions":2,
7032
+ "quantizations":[
7033
+ "4bit",
7034
+ "8bit"
7035
+ ],
7036
+ "model_id":"mlx-community/Qwen2-VL-2B-Instruct-{quantization}"
7037
+ },
6937
7038
  {
6938
7039
  "model_format":"pytorch",
6939
7040
  "model_size_in_billions":7,
@@ -6970,6 +7071,15 @@
6970
7071
  "model_id":"Qwen/Qwen2-VL-7B-Instruct-AWQ",
6971
7072
  "model_revision":"f94216e8b513933bccd567bcd9b7350199f32538"
6972
7073
  },
7074
+ {
7075
+ "model_format":"mlx",
7076
+ "model_size_in_billions":7,
7077
+ "quantizations":[
7078
+ "4bit",
7079
+ "8bit"
7080
+ ],
7081
+ "model_id":"mlx-community/Qwen2-VL-7B-Instruct-{quantization}"
7082
+ },
6973
7083
  {
6974
7084
  "model_format":"pytorch",
6975
7085
  "model_size_in_billions":72,
@@ -6994,6 +7104,15 @@
6994
7104
  "Int8"
6995
7105
  ],
6996
7106
  "model_id":"Qwen/Qwen2-VL-72B-Instruct-GPTQ-{quantization}"
7107
+ },
7108
+ {
7109
+ "model_format":"mlx",
7110
+ "model_size_in_billions":72,
7111
+ "quantizations":[
7112
+ "4bit",
7113
+ "8bit"
7114
+ ],
7115
+ "model_id":"mlx-community/Qwen2-VL-72B-Instruct-{quantization}"
6997
7116
  }
6998
7117
  ],
6999
7118
  "chat_template": "{% set image_count = namespace(value=0) %}{% set video_count = namespace(value=0) %}{% for message in messages %}{% if loop.first and message['role'] != 'system' %}<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n{% endif %}<|im_start|>{{ message['role'] }}\n{% if message['content'] is string %}{{ message['content'] }}<|im_end|>\n{% else %}{% for content in message['content'] %}{% if content['type'] == 'image' or 'image' in content or 'image_url' in content %}{% set image_count.value = image_count.value + 1 %}{% if add_vision_id %}Picture {{ image_count.value }}: {% endif %}<|vision_start|><|image_pad|><|vision_end|>{% elif content['type'] == 'video' or 'video' in content %}{% set video_count.value = video_count.value + 1 %}{% if add_vision_id %}Video {{ video_count.value }}: {% endif %}<|vision_start|><|video_pad|><|vision_end|>{% elif 'text' in content %}{{ content['text'] }}{% endif %}{% endfor %}<|im_end|>\n{% endif %}{% endfor %}{% if add_generation_prompt %}<|im_start|>assistant\n{% endif %}",
@@ -8015,7 +8134,7 @@
8015
8134
  "model_format": "mlx",
8016
8135
  "model_size_in_billions": "0_5",
8017
8136
  "quantizations": [
8018
- "4-bit"
8137
+ "4bit"
8019
8138
  ],
8020
8139
  "model_id": "mlx-community/Qwen2.5-0.5B-Instruct-4bit"
8021
8140
  },
@@ -8023,7 +8142,7 @@
8023
8142
  "model_format": "mlx",
8024
8143
  "model_size_in_billions": "0_5",
8025
8144
  "quantizations": [
8026
- "8-bit"
8145
+ "8bit"
8027
8146
  ],
8028
8147
  "model_id": "mlx-community/Qwen2.5-0.5B-Instruct-8bit"
8029
8148
  },
@@ -8039,7 +8158,7 @@
8039
8158
  "model_format": "mlx",
8040
8159
  "model_size_in_billions": "1_5",
8041
8160
  "quantizations": [
8042
- "4-bit"
8161
+ "4bit"
8043
8162
  ],
8044
8163
  "model_id": "mlx-community/Qwen2.5-1.5B-Instruct-4bit"
8045
8164
  },
@@ -8047,7 +8166,7 @@
8047
8166
  "model_format": "mlx",
8048
8167
  "model_size_in_billions": "1_5",
8049
8168
  "quantizations": [
8050
- "8-bit"
8169
+ "8bit"
8051
8170
  ],
8052
8171
  "model_id": "mlx-community/Qwen2.5-1.5B-Instruct-8bit"
8053
8172
  },
@@ -8063,7 +8182,7 @@
8063
8182
  "model_format": "mlx",
8064
8183
  "model_size_in_billions": 3,
8065
8184
  "quantizations": [
8066
- "4-bit"
8185
+ "4bit"
8067
8186
  ],
8068
8187
  "model_id": "mlx-community/Qwen2.5-3B-Instruct-4bit"
8069
8188
  },
@@ -8071,7 +8190,7 @@
8071
8190
  "model_format": "mlx",
8072
8191
  "model_size_in_billions": 3,
8073
8192
  "quantizations": [
8074
- "8-bit"
8193
+ "8bit"
8075
8194
  ],
8076
8195
  "model_id": "mlx-community/Qwen2.5-3B-Instruct-8bit"
8077
8196
  },
@@ -8087,7 +8206,7 @@
8087
8206
  "model_format": "mlx",
8088
8207
  "model_size_in_billions": 7,
8089
8208
  "quantizations": [
8090
- "4-bit"
8209
+ "4bit"
8091
8210
  ],
8092
8211
  "model_id": "mlx-community/Qwen2.5-7B-Instruct-4bit"
8093
8212
  },
@@ -8095,7 +8214,7 @@
8095
8214
  "model_format": "mlx",
8096
8215
  "model_size_in_billions": 7,
8097
8216
  "quantizations": [
8098
- "8-bit"
8217
+ "8bit"
8099
8218
  ],
8100
8219
  "model_id": "mlx-community/Qwen2.5-7B-Instruct-8bit"
8101
8220
  },
@@ -8111,7 +8230,7 @@
8111
8230
  "model_format": "mlx",
8112
8231
  "model_size_in_billions": 14,
8113
8232
  "quantizations": [
8114
- "4-bit"
8233
+ "4bit"
8115
8234
  ],
8116
8235
  "model_id": "mlx-community/Qwen2.5-14B-Instruct-4bit"
8117
8236
  },
@@ -8119,7 +8238,7 @@
8119
8238
  "model_format": "mlx",
8120
8239
  "model_size_in_billions": 14,
8121
8240
  "quantizations": [
8122
- "8-bit"
8241
+ "8bit"
8123
8242
  ],
8124
8243
  "model_id": "mlx-community/Qwen2.5-14B-Instruct-8bit"
8125
8244
  },
@@ -8135,7 +8254,7 @@
8135
8254
  "model_format": "mlx",
8136
8255
  "model_size_in_billions": 32,
8137
8256
  "quantizations": [
8138
- "4-bit"
8257
+ "4bit"
8139
8258
  ],
8140
8259
  "model_id": "mlx-community/Qwen2.5-32B-Instruct-4bit"
8141
8260
  },
@@ -8143,7 +8262,7 @@
8143
8262
  "model_format": "mlx",
8144
8263
  "model_size_in_billions": 32,
8145
8264
  "quantizations": [
8146
- "8-bit"
8265
+ "8bit"
8147
8266
  ],
8148
8267
  "model_id": "mlx-community/Qwen2.5-32B-Instruct-8bit"
8149
8268
  },
@@ -8159,7 +8278,7 @@
8159
8278
  "model_format": "mlx",
8160
8279
  "model_size_in_billions": 72,
8161
8280
  "quantizations": [
8162
- "4-bit"
8281
+ "4bit"
8163
8282
  ],
8164
8283
  "model_id": "mlx-community/Qwen2.5-72B-Instruct-4bit"
8165
8284
  },
@@ -8167,7 +8286,7 @@
8167
8286
  "model_format": "mlx",
8168
8287
  "model_size_in_billions": 72,
8169
8288
  "quantizations": [
8170
- "8-bit"
8289
+ "8bit"
8171
8290
  ],
8172
8291
  "model_id": "mlx-community/Qwen2.5-72B-Instruct-8bit"
8173
8292
  },
@@ -8564,7 +8683,7 @@
8564
8683
  "model_format": "mlx",
8565
8684
  "model_size_in_billions": 32,
8566
8685
  "quantizations": [
8567
- "4-bit"
8686
+ "4bit"
8568
8687
  ],
8569
8688
  "model_id": "mlx-community/Qwen_QwQ-32B-Preview_MLX-4bit"
8570
8689
  },
@@ -8572,7 +8691,7 @@
8572
8691
  "model_format": "mlx",
8573
8692
  "model_size_in_billions": 32,
8574
8693
  "quantizations": [
8575
- "8-bit"
8694
+ "8bit"
8576
8695
  ],
8577
8696
  "model_id": "mlx-community/Qwen_QwQ-32B-Preview_MLX-8bit"
8578
8697
  },