xinference 0.16.3__py3-none-any.whl → 1.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of xinference might be problematic. Click here for more details.

Files changed (69) hide show
  1. xinference/_compat.py +22 -2
  2. xinference/_version.py +3 -3
  3. xinference/api/restful_api.py +148 -12
  4. xinference/client/restful/restful_client.py +47 -2
  5. xinference/constants.py +1 -0
  6. xinference/core/model.py +45 -15
  7. xinference/core/supervisor.py +8 -2
  8. xinference/core/utils.py +67 -2
  9. xinference/model/audio/__init__.py +12 -0
  10. xinference/model/audio/core.py +21 -4
  11. xinference/model/audio/fish_speech.py +70 -35
  12. xinference/model/audio/model_spec.json +81 -1
  13. xinference/model/audio/whisper_mlx.py +208 -0
  14. xinference/model/embedding/core.py +259 -4
  15. xinference/model/embedding/model_spec.json +1 -1
  16. xinference/model/embedding/model_spec_modelscope.json +1 -1
  17. xinference/model/image/stable_diffusion/core.py +5 -2
  18. xinference/model/llm/__init__.py +2 -0
  19. xinference/model/llm/llm_family.json +485 -6
  20. xinference/model/llm/llm_family_modelscope.json +519 -0
  21. xinference/model/llm/mlx/core.py +45 -3
  22. xinference/model/llm/sglang/core.py +1 -0
  23. xinference/model/llm/transformers/core.py +1 -0
  24. xinference/model/llm/transformers/glm_edge_v.py +230 -0
  25. xinference/model/llm/utils.py +19 -0
  26. xinference/model/llm/vllm/core.py +84 -2
  27. xinference/model/rerank/core.py +11 -4
  28. xinference/thirdparty/fish_speech/fish_speech/conversation.py +254 -0
  29. xinference/thirdparty/fish_speech/fish_speech/i18n/locale/en_US.json +2 -1
  30. xinference/thirdparty/fish_speech/fish_speech/i18n/locale/es_ES.json +2 -1
  31. xinference/thirdparty/fish_speech/fish_speech/i18n/locale/ja_JP.json +2 -2
  32. xinference/thirdparty/fish_speech/fish_speech/i18n/locale/ko_KR.json +123 -0
  33. xinference/thirdparty/fish_speech/fish_speech/i18n/locale/zh_CN.json +2 -1
  34. xinference/thirdparty/fish_speech/fish_speech/models/text2semantic/llama.py +76 -11
  35. xinference/thirdparty/fish_speech/fish_speech/models/vqgan/modules/firefly.py +9 -9
  36. xinference/thirdparty/fish_speech/fish_speech/models/vqgan/modules/fsq.py +1 -1
  37. xinference/thirdparty/fish_speech/fish_speech/text/clean.py +32 -1
  38. xinference/thirdparty/fish_speech/fish_speech/utils/__init__.py +2 -1
  39. xinference/thirdparty/fish_speech/fish_speech/utils/utils.py +22 -0
  40. xinference/thirdparty/fish_speech/fish_speech/webui/launch_utils.py +1 -1
  41. xinference/thirdparty/fish_speech/fish_speech/webui/manage.py +1 -1
  42. xinference/thirdparty/fish_speech/tools/api.py +578 -75
  43. xinference/thirdparty/fish_speech/tools/e2e_webui.py +232 -0
  44. xinference/thirdparty/fish_speech/tools/fish_e2e.py +298 -0
  45. xinference/thirdparty/fish_speech/tools/llama/generate.py +393 -9
  46. xinference/thirdparty/fish_speech/tools/msgpack_api.py +90 -29
  47. xinference/thirdparty/fish_speech/tools/post_api.py +37 -15
  48. xinference/thirdparty/fish_speech/tools/schema.py +187 -0
  49. xinference/thirdparty/fish_speech/tools/vqgan/extract_vq.py +7 -1
  50. xinference/thirdparty/fish_speech/tools/vqgan/inference.py +2 -3
  51. xinference/thirdparty/fish_speech/tools/webui.py +138 -75
  52. xinference/types.py +2 -1
  53. {xinference-0.16.3.dist-info → xinference-1.0.1.dist-info}/METADATA +30 -6
  54. {xinference-0.16.3.dist-info → xinference-1.0.1.dist-info}/RECORD +58 -63
  55. {xinference-0.16.3.dist-info → xinference-1.0.1.dist-info}/WHEEL +1 -1
  56. xinference/thirdparty/fish_speech/fish_speech/configs/__init__.py +0 -0
  57. xinference/thirdparty/fish_speech/fish_speech/configs/lora/__init__.py +0 -0
  58. xinference/thirdparty/fish_speech/fish_speech/datasets/__init__.py +0 -0
  59. xinference/thirdparty/fish_speech/fish_speech/datasets/protos/__init__.py +0 -0
  60. xinference/thirdparty/fish_speech/fish_speech/i18n/locale/__init__.py +0 -0
  61. xinference/thirdparty/fish_speech/fish_speech/models/__init__.py +0 -0
  62. xinference/thirdparty/fish_speech/fish_speech/models/vqgan/modules/__init__.py +0 -0
  63. xinference/thirdparty/fish_speech/fish_speech/webui/__init__.py +0 -0
  64. xinference/thirdparty/fish_speech/tools/commons.py +0 -35
  65. xinference/thirdparty/fish_speech/tools/llama/__init__.py +0 -0
  66. xinference/thirdparty/fish_speech/tools/vqgan/__init__.py +0 -0
  67. {xinference-0.16.3.dist-info → xinference-1.0.1.dist-info}/LICENSE +0 -0
  68. {xinference-0.16.3.dist-info → xinference-1.0.1.dist-info}/entry_points.txt +0 -0
  69. {xinference-0.16.3.dist-info → xinference-1.0.1.dist-info}/top_level.txt +0 -0
@@ -3411,8 +3411,8 @@
3411
3411
  "8-bit",
3412
3412
  "none"
3413
3413
  ],
3414
- "model_id": "mistralai/Mistral-7B-Instruct-v0.2",
3415
- "model_revision": "9552e7b1d9b2d5bbd87a5aa7221817285dbb6366"
3414
+ "model_id": "mistralai/Codestral-22B-v0.1",
3415
+ "model_revision": "8f5fe23af91885222a1563283c87416745a5e212"
3416
3416
  },
3417
3417
  {
3418
3418
  "model_format": "ggufv2",
@@ -8205,6 +8205,16 @@
8205
8205
  ],
8206
8206
  "model_description": "Qwen2.5-Coder is the latest series of Code-Specific Qwen large language models (formerly known as CodeQwen).",
8207
8207
  "model_specs": [
8208
+ {
8209
+ "model_format": "pytorch",
8210
+ "model_size_in_billions": "0_5",
8211
+ "quantizations": [
8212
+ "4-bit",
8213
+ "8-bit",
8214
+ "none"
8215
+ ],
8216
+ "model_id": "Qwen/Qwen2.5-Coder-0.5B"
8217
+ },
8208
8218
  {
8209
8219
  "model_format": "pytorch",
8210
8220
  "model_size_in_billions": "1_5",
@@ -8213,8 +8223,17 @@
8213
8223
  "8-bit",
8214
8224
  "none"
8215
8225
  ],
8216
- "model_id": "Qwen/Qwen2.5-Coder-1.5B",
8217
- "model_revision": "d3586cfe793730945f8e4d7ef31032a3ee50247d"
8226
+ "model_id": "Qwen/Qwen2.5-Coder-1.5B"
8227
+ },
8228
+ {
8229
+ "model_format": "pytorch",
8230
+ "model_size_in_billions": "3",
8231
+ "quantizations": [
8232
+ "4-bit",
8233
+ "8-bit",
8234
+ "none"
8235
+ ],
8236
+ "model_id": "Qwen/Qwen2.5-Coder-3B"
8218
8237
  },
8219
8238
  {
8220
8239
  "model_format": "pytorch",
@@ -8224,8 +8243,27 @@
8224
8243
  "8-bit",
8225
8244
  "none"
8226
8245
  ],
8227
- "model_id": "Qwen/Qwen2.5-Coder-7B",
8228
- "model_revision": "30b6a7e874a78d46b80fa1db3194ea427dd41b08"
8246
+ "model_id": "Qwen/Qwen2.5-Coder-7B"
8247
+ },
8248
+ {
8249
+ "model_format": "pytorch",
8250
+ "model_size_in_billions": 14,
8251
+ "quantizations": [
8252
+ "4-bit",
8253
+ "8-bit",
8254
+ "none"
8255
+ ],
8256
+ "model_id": "Qwen/Qwen2.5-Coder-14B"
8257
+ },
8258
+ {
8259
+ "model_format": "pytorch",
8260
+ "model_size_in_billions": 32,
8261
+ "quantizations": [
8262
+ "4-bit",
8263
+ "8-bit",
8264
+ "none"
8265
+ ],
8266
+ "model_id": "Qwen/Qwen2.5-Coder-32B"
8229
8267
  }
8230
8268
  ]
8231
8269
  },
@@ -8243,6 +8281,16 @@
8243
8281
  ],
8244
8282
  "model_description": "Qwen2.5-Coder is the latest series of Code-Specific Qwen large language models (formerly known as CodeQwen).",
8245
8283
  "model_specs": [
8284
+ {
8285
+ "model_format": "pytorch",
8286
+ "model_size_in_billions": "0_5",
8287
+ "quantizations": [
8288
+ "4-bit",
8289
+ "8-bit",
8290
+ "none"
8291
+ ],
8292
+ "model_id": "Qwen/Qwen2.5-Coder-0.5B-Instruct"
8293
+ },
8246
8294
  {
8247
8295
  "model_format": "pytorch",
8248
8296
  "model_size_in_billions": "1_5",
@@ -8253,6 +8301,16 @@
8253
8301
  ],
8254
8302
  "model_id": "Qwen/Qwen2.5-Coder-1.5B-Instruct"
8255
8303
  },
8304
+ {
8305
+ "model_format": "pytorch",
8306
+ "model_size_in_billions": "3",
8307
+ "quantizations": [
8308
+ "4-bit",
8309
+ "8-bit",
8310
+ "none"
8311
+ ],
8312
+ "model_id": "Qwen/Qwen2.5-Coder-3B-Instruct"
8313
+ },
8256
8314
  {
8257
8315
  "model_format": "pytorch",
8258
8316
  "model_size_in_billions": 7,
@@ -8263,6 +8321,53 @@
8263
8321
  ],
8264
8322
  "model_id": "Qwen/Qwen2.5-Coder-7B-Instruct"
8265
8323
  },
8324
+ {
8325
+ "model_format": "pytorch",
8326
+ "model_size_in_billions": 14,
8327
+ "quantizations": [
8328
+ "4-bit",
8329
+ "8-bit",
8330
+ "none"
8331
+ ],
8332
+ "model_id": "Qwen/Qwen2.5-Coder-14B-Instruct"
8333
+ },
8334
+ {
8335
+ "model_format": "pytorch",
8336
+ "model_size_in_billions": 32,
8337
+ "quantizations": [
8338
+ "4-bit",
8339
+ "8-bit",
8340
+ "none"
8341
+ ],
8342
+ "model_id": "Qwen/Qwen2.5-Coder-32B-Instruct"
8343
+ },
8344
+ {
8345
+ "model_format": "gptq",
8346
+ "model_size_in_billions": "0_5",
8347
+ "quantizations": [
8348
+ "Int4",
8349
+ "Int8"
8350
+ ],
8351
+ "model_id": "Qwen/Qwen2.5-Coder-0.5B-Instruct-GPTQ-{quantization}"
8352
+ },
8353
+ {
8354
+ "model_format": "gptq",
8355
+ "model_size_in_billions": "1_5",
8356
+ "quantizations": [
8357
+ "Int4",
8358
+ "Int8"
8359
+ ],
8360
+ "model_id": "Qwen/Qwen2.5-Coder-1.5B-Instruct-GPTQ-{quantization}"
8361
+ },
8362
+ {
8363
+ "model_format": "gptq",
8364
+ "model_size_in_billions": "3",
8365
+ "quantizations": [
8366
+ "Int4",
8367
+ "Int8"
8368
+ ],
8369
+ "model_id": "Qwen/Qwen2.5-Coder-3B-Instruct-GPTQ-{quantization}"
8370
+ },
8266
8371
  {
8267
8372
  "model_format": "gptq",
8268
8373
  "model_size_in_billions": "7",
@@ -8272,6 +8377,73 @@
8272
8377
  ],
8273
8378
  "model_id": "Qwen/Qwen2.5-Coder-7B-Instruct-GPTQ-{quantization}"
8274
8379
  },
8380
+ {
8381
+ "model_format": "gptq",
8382
+ "model_size_in_billions": "14",
8383
+ "quantizations": [
8384
+ "Int4",
8385
+ "Int8"
8386
+ ],
8387
+ "model_id": "Qwen/Qwen2.5-Coder-14B-Instruct-GPTQ-{quantization}"
8388
+ },
8389
+ {
8390
+ "model_format": "gptq",
8391
+ "model_size_in_billions": "32",
8392
+ "quantizations": [
8393
+ "Int4",
8394
+ "Int8"
8395
+ ],
8396
+ "model_id": "Qwen/Qwen2.5-Coder-32B-Instruct-GPTQ-{quantization}"
8397
+ },
8398
+ {
8399
+ "model_format": "awq",
8400
+ "model_size_in_billions": "0_5",
8401
+ "quantizations": [
8402
+ "Int4"
8403
+ ],
8404
+ "model_id": "Qwen/Qwen2.5-Coder-0.5B-Instruct-AWQ"
8405
+ },
8406
+ {
8407
+ "model_format": "awq",
8408
+ "model_size_in_billions": "1_5",
8409
+ "quantizations": [
8410
+ "Int4"
8411
+ ],
8412
+ "model_id": "Qwen/Qwen2.5-Coder-1.5B-Instruct-AWQ"
8413
+ },
8414
+ {
8415
+ "model_format": "awq",
8416
+ "model_size_in_billions": "3",
8417
+ "quantizations": [
8418
+ "Int4"
8419
+ ],
8420
+ "model_id": "Qwen/Qwen2.5-Coder-3B-Instruct-AWQ"
8421
+ },
8422
+ {
8423
+ "model_format": "awq",
8424
+ "model_size_in_billions": "7",
8425
+ "quantizations": [
8426
+ "Int4"
8427
+ ],
8428
+ "model_id": "Qwen/Qwen2.5-Coder-7B-Instruct-AWQ"
8429
+ },
8430
+ {
8431
+ "model_format": "awq",
8432
+ "model_size_in_billions": "14",
8433
+ "quantizations": [
8434
+ "Int4"
8435
+ ],
8436
+ "model_id": "Qwen/Qwen2.5-Coder-14B-Instruct-AWQ"
8437
+ },
8438
+ {
8439
+ "model_format": "awq",
8440
+ "model_size_in_billions": "32",
8441
+ "quantizations": [
8442
+ "Int4"
8443
+ ],
8444
+ "model_id": "Qwen/Qwen2.5-Coder-32B-Instruct-AWQ"
8445
+ },
8446
+
8275
8447
  {
8276
8448
  "model_format": "ggufv2",
8277
8449
  "model_size_in_billions": "1_5",
@@ -8344,5 +8516,312 @@
8344
8516
  "<|im_start|>",
8345
8517
  "<|im_end|>"
8346
8518
  ]
8519
+ },
8520
+ {
8521
+ "version": 1,
8522
+ "context_length": 32768,
8523
+ "model_name": "QwQ-32B-Preview",
8524
+ "model_lang": [
8525
+ "en",
8526
+ "zh"
8527
+ ],
8528
+ "model_ability": [
8529
+ "chat"
8530
+ ],
8531
+ "model_description": "QwQ-32B-Preview is an experimental research model developed by the Qwen Team, focused on advancing AI reasoning capabilities.",
8532
+ "model_specs": [
8533
+ {
8534
+ "model_format": "pytorch",
8535
+ "model_size_in_billions": 32,
8536
+ "quantizations": [
8537
+ "4-bit",
8538
+ "8-bit",
8539
+ "none"
8540
+ ],
8541
+ "model_id": "Qwen/QwQ-32B-Preview"
8542
+ },
8543
+ {
8544
+ "model_format": "awq",
8545
+ "model_size_in_billions": 32,
8546
+ "quantizations": [
8547
+ "Int4"
8548
+ ],
8549
+ "model_id": "KirillR/QwQ-32B-Preview-AWQ"
8550
+ },
8551
+ {
8552
+ "model_format": "ggufv2",
8553
+ "model_size_in_billions": 32,
8554
+ "quantizations": [
8555
+ "Q3_K_L",
8556
+ "Q4_K_M",
8557
+ "Q6_K",
8558
+ "Q8_0"
8559
+ ],
8560
+ "model_id": "lmstudio-community/QwQ-32B-Preview-GGUF",
8561
+ "model_file_name_template": "QwQ-32B-Preview-{quantization}.gguf"
8562
+ },
8563
+ {
8564
+ "model_format": "mlx",
8565
+ "model_size_in_billions": 32,
8566
+ "quantizations": [
8567
+ "4-bit"
8568
+ ],
8569
+ "model_id": "mlx-community/Qwen_QwQ-32B-Preview_MLX-4bit"
8570
+ },
8571
+ {
8572
+ "model_format": "mlx",
8573
+ "model_size_in_billions": 32,
8574
+ "quantizations": [
8575
+ "8-bit"
8576
+ ],
8577
+ "model_id": "mlx-community/Qwen_QwQ-32B-Preview_MLX-8bit"
8578
+ },
8579
+ {
8580
+ "model_format": "mlx",
8581
+ "model_size_in_billions": 32,
8582
+ "quantizations": [
8583
+ "none"
8584
+ ],
8585
+ "model_id": "mlx-community/QwQ-32B-Preview-bf16"
8586
+ }
8587
+ ],
8588
+ "chat_template": "{%- if tools %}\n {{- '<|im_start|>system\\n' }}\n {%- if messages[0]['role'] == 'system' %}\n {{- messages[0]['content'] }}\n {%- else %}\n {{- 'You are Qwen, created by Alibaba Cloud. You are a helpful assistant.' }}\n {%- endif %}\n {{- \"\\n\\n# Tools\\n\\nYou may call one or more functions to assist with the user query.\\n\\nYou are provided with function signatures within <tools></tools> XML tags:\\n<tools>\" }}\n {%- for tool in tools %}\n {{- \"\\n\" }}\n {{- tool | tojson }}\n {%- endfor %}\n {{- \"\\n</tools>\\n\\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\\n<tool_call>\\n{\\\"name\\\": <function-name>, \\\"arguments\\\": <args-json-object>}\\n</tool_call><|im_end|>\\n\" }}\n{%- else %}\n {%- if messages[0]['role'] == 'system' %}\n {{- '<|im_start|>system\\n' + messages[0]['content'] + '<|im_end|>\\n' }}\n {%- else %}\n {{- '<|im_start|>system\\nYou are Qwen, created by Alibaba Cloud. You are a helpful assistant.<|im_end|>\\n' }}\n {%- endif %}\n{%- endif %}\n{%- for message in messages %}\n {%- if (message.role == \"user\") or (message.role == \"system\" and not loop.first) or (message.role == \"assistant\" and not message.tool_calls) %}\n {{- '<|im_start|>' + message.role + '\\n' + message.content + '<|im_end|>' + '\\n' }}\n {%- elif message.role == \"assistant\" %}\n {{- '<|im_start|>' + message.role }}\n {%- if message.content %}\n {{- '\\n' + message.content }}\n {%- endif %}\n {%- for tool_call in message.tool_calls %}\n {%- if tool_call.function is defined %}\n {%- set tool_call = tool_call.function %}\n {%- endif %}\n {{- '\\n<tool_call>\\n{\"name\": \"' }}\n {{- tool_call.name }}\n {{- '\", \"arguments\": ' }}\n {{- tool_call.arguments | tojson }}\n {{- '}\\n</tool_call>' }}\n {%- endfor %}\n {{- '<|im_end|>\\n' }}\n {%- elif message.role == \"tool\" %}\n {%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != \"tool\") %}\n {{- '<|im_start|>user' }}\n {%- endif %}\n {{- '\\n<tool_response>\\n' }}\n {{- message.content }}\n {{- '\\n</tool_response>' }}\n {%- if loop.last or (messages[loop.index0 + 1].role != \"tool\") %}\n {{- '<|im_end|>\\n' }}\n {%- endif %}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|im_start|>assistant\\n' }}\n{%- endif %}\n",
8589
+ "stop_token_ids": [
8590
+ 151643,
8591
+ 151644,
8592
+ 151645
8593
+ ],
8594
+ "stop": [
8595
+ "<|endoftext|>",
8596
+ "<|im_start|>",
8597
+ "<|im_end|>"
8598
+ ]
8599
+ },
8600
+ {
8601
+ "version": 1,
8602
+ "context_length": 8192,
8603
+ "model_name": "glm-edge-chat",
8604
+ "model_lang": [
8605
+ "en",
8606
+ "zh"
8607
+ ],
8608
+ "model_ability": [
8609
+ "chat"
8610
+ ],
8611
+ "model_description": "The GLM-Edge series is our attempt to face the end-side real-life scenarios, which consists of two sizes of large-language dialogue models and multimodal comprehension models (GLM-Edge-1.5B-Chat, GLM-Edge-4B-Chat, GLM-Edge-V-2B, GLM-Edge-V-5B). Among them, the 1.5B / 2B model is mainly for platforms such as mobile phones and cars, and the 4B / 5B model is mainly for platforms such as PCs.",
8612
+ "model_specs": [
8613
+ {
8614
+ "model_format": "pytorch",
8615
+ "model_size_in_billions": "1_5",
8616
+ "quantizations": [
8617
+ "4-bit",
8618
+ "8-bit",
8619
+ "none"
8620
+ ],
8621
+ "model_id": "THUDM/glm-edge-1.5b-chat"
8622
+ },
8623
+ {
8624
+ "model_format": "pytorch",
8625
+ "model_size_in_billions": "4",
8626
+ "quantizations": [
8627
+ "4-bit",
8628
+ "8-bit",
8629
+ "none"
8630
+ ],
8631
+ "model_id": "THUDM/glm-edge-4b-chat"
8632
+ },
8633
+ {
8634
+ "model_format": "ggufv2",
8635
+ "model_size_in_billions": "1_5",
8636
+ "quantizations": [
8637
+ "Q4_0",
8638
+ "Q4_1",
8639
+ "Q4_K",
8640
+ "Q4_K_M",
8641
+ "Q4_K_S",
8642
+ "Q5_0",
8643
+ "Q5_1",
8644
+ "Q5_K",
8645
+ "Q5_K_M",
8646
+ "Q5_K_S",
8647
+ "Q6_K",
8648
+ "Q8_0"
8649
+ ],
8650
+ "model_file_name_template": "ggml-model-{quantization}.gguf",
8651
+ "model_id": "THUDM/glm-edge-1.5b-chat-gguf"
8652
+ },
8653
+ {
8654
+ "model_format": "ggufv2",
8655
+ "model_size_in_billions": "1_5",
8656
+ "quantizations": [
8657
+ "F16"
8658
+ ],
8659
+ "model_file_name_template": "glm-edge-1.5B-chat-{quantization}.gguf",
8660
+ "model_id": "THUDM/glm-edge-1.5b-chat-gguf"
8661
+ },
8662
+ {
8663
+ "model_format": "ggufv2",
8664
+ "model_size_in_billions": "4",
8665
+ "quantizations": [
8666
+ "Q4_0",
8667
+ "Q4_1",
8668
+ "Q4_K",
8669
+ "Q4_K_M",
8670
+ "Q4_K_S",
8671
+ "Q5_0",
8672
+ "Q5_1",
8673
+ "Q5_K",
8674
+ "Q5_K_M",
8675
+ "Q5_K_S",
8676
+ "Q6_K",
8677
+ "Q8_0"
8678
+ ],
8679
+ "model_file_name_template": "ggml-model-{quantization}.gguf",
8680
+ "model_id": "THUDM/glm-edge-4b-chat-gguf"
8681
+ },
8682
+ {
8683
+ "model_format": "ggufv2",
8684
+ "model_size_in_billions": "4",
8685
+ "quantizations": [
8686
+ "F16"
8687
+ ],
8688
+ "model_file_name_template": "glm-edge-4B-chat-{quantization}.gguf",
8689
+ "model_id": "THUDM/glm-edge-4b-chat-gguf"
8690
+ }
8691
+ ],
8692
+ "chat_template": "{% for item in messages %}{% if item['role'] == 'system' %}<|system|>\n{{ item['content'] }}{% elif item['role'] == 'user' %}<|user|>\n{{ item['content'] }}{% elif item['role'] == 'assistant' %}<|assistant|>\n{{ item['content'] }}{% endif %}{% endfor %}{% if add_generation_prompt %}<|assistant|>\n{% endif %}",
8693
+ "stop_token_ids": [
8694
+ 59246,
8695
+ 59253,
8696
+ 59255
8697
+ ],
8698
+ "stop": [
8699
+ "<|endoftext|>",
8700
+ "<|user|>",
8701
+ "<|observation|>"
8702
+ ]
8703
+ },
8704
+ {
8705
+ "version": 1,
8706
+ "context_length": 8192,
8707
+ "model_name": "glm-edge-v",
8708
+ "model_lang": [
8709
+ "en",
8710
+ "zh"
8711
+ ],
8712
+ "model_ability": [
8713
+ "chat",
8714
+ "vision"
8715
+ ],
8716
+ "model_description": "The GLM-Edge series is our attempt to face the end-side real-life scenarios, which consists of two sizes of large-language dialogue models and multimodal comprehension models (GLM-Edge-1.5B-Chat, GLM-Edge-4B-Chat, GLM-Edge-V-2B, GLM-Edge-V-5B). Among them, the 1.5B / 2B model is mainly for platforms such as mobile phones and cars, and the 4B / 5B model is mainly for platforms such as PCs.",
8717
+ "model_specs": [
8718
+ {
8719
+ "model_format": "pytorch",
8720
+ "model_size_in_billions": "2",
8721
+ "quantizations": [
8722
+ "4-bit",
8723
+ "8-bit",
8724
+ "none"
8725
+ ],
8726
+ "model_id": "THUDM/glm-edge-v-2b"
8727
+ },
8728
+ {
8729
+ "model_format": "pytorch",
8730
+ "model_size_in_billions": "5",
8731
+ "quantizations": [
8732
+ "4-bit",
8733
+ "8-bit",
8734
+ "none"
8735
+ ],
8736
+ "model_id": "THUDM/glm-edge-v-5b"
8737
+ },
8738
+ {
8739
+ "model_format": "ggufv2",
8740
+ "model_size_in_billions": "2",
8741
+ "quantizations": [
8742
+ "Q4_0",
8743
+ "Q4_1",
8744
+ "Q4_K",
8745
+ "Q4_K_M",
8746
+ "Q4_K_S",
8747
+ "Q5_0",
8748
+ "Q5_1",
8749
+ "Q5_K",
8750
+ "Q5_K_M",
8751
+ "Q5_K_S",
8752
+ "Q6_K",
8753
+ "Q8_0"
8754
+ ],
8755
+ "model_file_name_template": "ggml-model-{quantization}.gguf",
8756
+ "model_id": "THUDM/glm-edge-v-2b-gguf"
8757
+ },
8758
+ {
8759
+ "model_format": "ggufv2",
8760
+ "model_size_in_billions": "2",
8761
+ "quantizations": [
8762
+ "F16"
8763
+ ],
8764
+ "model_file_name_template": "glm-edge-v-2B-{quantization}.gguf",
8765
+ "model_id": "THUDM/glm-edge-v-2b-gguf"
8766
+ },
8767
+ {
8768
+ "model_format": "ggufv2",
8769
+ "model_size_in_billions": "2",
8770
+ "quantizations": [
8771
+ "f16"
8772
+ ],
8773
+ "model_file_name_template": "mmproj-model-{quantization}.gguf",
8774
+ "model_id": "THUDM/glm-edge-v-2b-gguf"
8775
+ },
8776
+ {
8777
+ "model_format": "ggufv2",
8778
+ "model_size_in_billions": "5",
8779
+ "quantizations": [
8780
+ "Q4_0",
8781
+ "Q4_1",
8782
+ "Q4_K",
8783
+ "Q4_K_M",
8784
+ "Q4_K_S",
8785
+ "Q5_0",
8786
+ "Q5_1",
8787
+ "Q5_K",
8788
+ "Q5_K_M",
8789
+ "Q5_K_S",
8790
+ "Q6_K",
8791
+ "Q8_0"
8792
+ ],
8793
+ "model_file_name_template": "ggml-model-{quantization}.gguf",
8794
+ "model_id": "THUDM/glm-edge-v-5b-gguf"
8795
+ },
8796
+ {
8797
+ "model_format": "ggufv2",
8798
+ "model_size_in_billions": "5",
8799
+ "quantizations": [
8800
+ "F16"
8801
+ ],
8802
+ "model_file_name_template": "glm-edge-v-5B-{quantization}.gguf",
8803
+ "model_id": "THUDM/glm-edge-v-5b-gguf"
8804
+ },
8805
+ {
8806
+ "model_format": "ggufv2",
8807
+ "model_size_in_billions": "5",
8808
+ "quantizations": [
8809
+ "f16"
8810
+ ],
8811
+ "model_file_name_template": "mmproj-model-{quantization}.gguf",
8812
+ "model_id": "THUDM/glm-edge-v-5b-gguf"
8813
+ }
8814
+ ],
8815
+ "chat_template": "{% for item in messages %}{% if item['role'] != 'system' %}<|{{ item['role'] }}|>\n{% for content in item['content'] %}{% if content['type'] == 'image' %}{% for _ in range(578) %}<|begin_of_image|>{% endfor %}{% elif content['type'] == 'text' %}{{ content['text'] }}{% endif %}{% endfor %}\n{% endif %}{% endfor %}{% if add_generation_prompt %}<|assistant|>\n{% endif %}",
8816
+ "stop_token_ids": [
8817
+ 59246,
8818
+ 59253,
8819
+ 59255
8820
+ ],
8821
+ "stop": [
8822
+ "<|endoftext|>",
8823
+ "<|user|>",
8824
+ "<|observation|>"
8825
+ ]
8347
8826
  }
8348
8827
  ]