xinference 0.16.3__py3-none-any.whl → 1.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of xinference might be problematic. Click here for more details.
- xinference/_compat.py +22 -2
- xinference/_version.py +3 -3
- xinference/api/restful_api.py +148 -12
- xinference/client/restful/restful_client.py +47 -2
- xinference/constants.py +1 -0
- xinference/core/model.py +45 -15
- xinference/core/supervisor.py +8 -2
- xinference/core/utils.py +67 -2
- xinference/model/audio/__init__.py +12 -0
- xinference/model/audio/core.py +21 -4
- xinference/model/audio/fish_speech.py +70 -35
- xinference/model/audio/model_spec.json +81 -1
- xinference/model/audio/whisper_mlx.py +208 -0
- xinference/model/embedding/core.py +259 -4
- xinference/model/embedding/model_spec.json +1 -1
- xinference/model/embedding/model_spec_modelscope.json +1 -1
- xinference/model/image/stable_diffusion/core.py +5 -2
- xinference/model/llm/__init__.py +2 -0
- xinference/model/llm/llm_family.json +485 -6
- xinference/model/llm/llm_family_modelscope.json +519 -0
- xinference/model/llm/mlx/core.py +45 -3
- xinference/model/llm/sglang/core.py +1 -0
- xinference/model/llm/transformers/core.py +1 -0
- xinference/model/llm/transformers/glm_edge_v.py +230 -0
- xinference/model/llm/utils.py +19 -0
- xinference/model/llm/vllm/core.py +84 -2
- xinference/model/rerank/core.py +11 -4
- xinference/thirdparty/fish_speech/fish_speech/conversation.py +254 -0
- xinference/thirdparty/fish_speech/fish_speech/i18n/locale/en_US.json +2 -1
- xinference/thirdparty/fish_speech/fish_speech/i18n/locale/es_ES.json +2 -1
- xinference/thirdparty/fish_speech/fish_speech/i18n/locale/ja_JP.json +2 -2
- xinference/thirdparty/fish_speech/fish_speech/i18n/locale/ko_KR.json +123 -0
- xinference/thirdparty/fish_speech/fish_speech/i18n/locale/zh_CN.json +2 -1
- xinference/thirdparty/fish_speech/fish_speech/models/text2semantic/llama.py +76 -11
- xinference/thirdparty/fish_speech/fish_speech/models/vqgan/modules/firefly.py +9 -9
- xinference/thirdparty/fish_speech/fish_speech/models/vqgan/modules/fsq.py +1 -1
- xinference/thirdparty/fish_speech/fish_speech/text/clean.py +32 -1
- xinference/thirdparty/fish_speech/fish_speech/utils/__init__.py +2 -1
- xinference/thirdparty/fish_speech/fish_speech/utils/utils.py +22 -0
- xinference/thirdparty/fish_speech/fish_speech/webui/launch_utils.py +1 -1
- xinference/thirdparty/fish_speech/fish_speech/webui/manage.py +1 -1
- xinference/thirdparty/fish_speech/tools/api.py +578 -75
- xinference/thirdparty/fish_speech/tools/e2e_webui.py +232 -0
- xinference/thirdparty/fish_speech/tools/fish_e2e.py +298 -0
- xinference/thirdparty/fish_speech/tools/llama/generate.py +393 -9
- xinference/thirdparty/fish_speech/tools/msgpack_api.py +90 -29
- xinference/thirdparty/fish_speech/tools/post_api.py +37 -15
- xinference/thirdparty/fish_speech/tools/schema.py +187 -0
- xinference/thirdparty/fish_speech/tools/vqgan/extract_vq.py +7 -1
- xinference/thirdparty/fish_speech/tools/vqgan/inference.py +2 -3
- xinference/thirdparty/fish_speech/tools/webui.py +138 -75
- xinference/types.py +2 -1
- {xinference-0.16.3.dist-info → xinference-1.0.1.dist-info}/METADATA +30 -6
- {xinference-0.16.3.dist-info → xinference-1.0.1.dist-info}/RECORD +58 -63
- {xinference-0.16.3.dist-info → xinference-1.0.1.dist-info}/WHEEL +1 -1
- xinference/thirdparty/fish_speech/fish_speech/configs/__init__.py +0 -0
- xinference/thirdparty/fish_speech/fish_speech/configs/lora/__init__.py +0 -0
- xinference/thirdparty/fish_speech/fish_speech/datasets/__init__.py +0 -0
- xinference/thirdparty/fish_speech/fish_speech/datasets/protos/__init__.py +0 -0
- xinference/thirdparty/fish_speech/fish_speech/i18n/locale/__init__.py +0 -0
- xinference/thirdparty/fish_speech/fish_speech/models/__init__.py +0 -0
- xinference/thirdparty/fish_speech/fish_speech/models/vqgan/modules/__init__.py +0 -0
- xinference/thirdparty/fish_speech/fish_speech/webui/__init__.py +0 -0
- xinference/thirdparty/fish_speech/tools/commons.py +0 -35
- xinference/thirdparty/fish_speech/tools/llama/__init__.py +0 -0
- xinference/thirdparty/fish_speech/tools/vqgan/__init__.py +0 -0
- {xinference-0.16.3.dist-info → xinference-1.0.1.dist-info}/LICENSE +0 -0
- {xinference-0.16.3.dist-info → xinference-1.0.1.dist-info}/entry_points.txt +0 -0
- {xinference-0.16.3.dist-info → xinference-1.0.1.dist-info}/top_level.txt +0 -0
|
@@ -3411,8 +3411,8 @@
|
|
|
3411
3411
|
"8-bit",
|
|
3412
3412
|
"none"
|
|
3413
3413
|
],
|
|
3414
|
-
"model_id": "mistralai/
|
|
3415
|
-
"model_revision": "
|
|
3414
|
+
"model_id": "mistralai/Codestral-22B-v0.1",
|
|
3415
|
+
"model_revision": "8f5fe23af91885222a1563283c87416745a5e212"
|
|
3416
3416
|
},
|
|
3417
3417
|
{
|
|
3418
3418
|
"model_format": "ggufv2",
|
|
@@ -8205,6 +8205,16 @@
|
|
|
8205
8205
|
],
|
|
8206
8206
|
"model_description": "Qwen2.5-Coder is the latest series of Code-Specific Qwen large language models (formerly known as CodeQwen).",
|
|
8207
8207
|
"model_specs": [
|
|
8208
|
+
{
|
|
8209
|
+
"model_format": "pytorch",
|
|
8210
|
+
"model_size_in_billions": "0_5",
|
|
8211
|
+
"quantizations": [
|
|
8212
|
+
"4-bit",
|
|
8213
|
+
"8-bit",
|
|
8214
|
+
"none"
|
|
8215
|
+
],
|
|
8216
|
+
"model_id": "Qwen/Qwen2.5-Coder-0.5B"
|
|
8217
|
+
},
|
|
8208
8218
|
{
|
|
8209
8219
|
"model_format": "pytorch",
|
|
8210
8220
|
"model_size_in_billions": "1_5",
|
|
@@ -8213,8 +8223,17 @@
|
|
|
8213
8223
|
"8-bit",
|
|
8214
8224
|
"none"
|
|
8215
8225
|
],
|
|
8216
|
-
"model_id": "Qwen/Qwen2.5-Coder-1.5B"
|
|
8217
|
-
|
|
8226
|
+
"model_id": "Qwen/Qwen2.5-Coder-1.5B"
|
|
8227
|
+
},
|
|
8228
|
+
{
|
|
8229
|
+
"model_format": "pytorch",
|
|
8230
|
+
"model_size_in_billions": "3",
|
|
8231
|
+
"quantizations": [
|
|
8232
|
+
"4-bit",
|
|
8233
|
+
"8-bit",
|
|
8234
|
+
"none"
|
|
8235
|
+
],
|
|
8236
|
+
"model_id": "Qwen/Qwen2.5-Coder-3B"
|
|
8218
8237
|
},
|
|
8219
8238
|
{
|
|
8220
8239
|
"model_format": "pytorch",
|
|
@@ -8224,8 +8243,27 @@
|
|
|
8224
8243
|
"8-bit",
|
|
8225
8244
|
"none"
|
|
8226
8245
|
],
|
|
8227
|
-
"model_id": "Qwen/Qwen2.5-Coder-7B"
|
|
8228
|
-
|
|
8246
|
+
"model_id": "Qwen/Qwen2.5-Coder-7B"
|
|
8247
|
+
},
|
|
8248
|
+
{
|
|
8249
|
+
"model_format": "pytorch",
|
|
8250
|
+
"model_size_in_billions": 14,
|
|
8251
|
+
"quantizations": [
|
|
8252
|
+
"4-bit",
|
|
8253
|
+
"8-bit",
|
|
8254
|
+
"none"
|
|
8255
|
+
],
|
|
8256
|
+
"model_id": "Qwen/Qwen2.5-Coder-14B"
|
|
8257
|
+
},
|
|
8258
|
+
{
|
|
8259
|
+
"model_format": "pytorch",
|
|
8260
|
+
"model_size_in_billions": 32,
|
|
8261
|
+
"quantizations": [
|
|
8262
|
+
"4-bit",
|
|
8263
|
+
"8-bit",
|
|
8264
|
+
"none"
|
|
8265
|
+
],
|
|
8266
|
+
"model_id": "Qwen/Qwen2.5-Coder-32B"
|
|
8229
8267
|
}
|
|
8230
8268
|
]
|
|
8231
8269
|
},
|
|
@@ -8243,6 +8281,16 @@
|
|
|
8243
8281
|
],
|
|
8244
8282
|
"model_description": "Qwen2.5-Coder is the latest series of Code-Specific Qwen large language models (formerly known as CodeQwen).",
|
|
8245
8283
|
"model_specs": [
|
|
8284
|
+
{
|
|
8285
|
+
"model_format": "pytorch",
|
|
8286
|
+
"model_size_in_billions": "0_5",
|
|
8287
|
+
"quantizations": [
|
|
8288
|
+
"4-bit",
|
|
8289
|
+
"8-bit",
|
|
8290
|
+
"none"
|
|
8291
|
+
],
|
|
8292
|
+
"model_id": "Qwen/Qwen2.5-Coder-0.5B-Instruct"
|
|
8293
|
+
},
|
|
8246
8294
|
{
|
|
8247
8295
|
"model_format": "pytorch",
|
|
8248
8296
|
"model_size_in_billions": "1_5",
|
|
@@ -8253,6 +8301,16 @@
|
|
|
8253
8301
|
],
|
|
8254
8302
|
"model_id": "Qwen/Qwen2.5-Coder-1.5B-Instruct"
|
|
8255
8303
|
},
|
|
8304
|
+
{
|
|
8305
|
+
"model_format": "pytorch",
|
|
8306
|
+
"model_size_in_billions": "3",
|
|
8307
|
+
"quantizations": [
|
|
8308
|
+
"4-bit",
|
|
8309
|
+
"8-bit",
|
|
8310
|
+
"none"
|
|
8311
|
+
],
|
|
8312
|
+
"model_id": "Qwen/Qwen2.5-Coder-3B-Instruct"
|
|
8313
|
+
},
|
|
8256
8314
|
{
|
|
8257
8315
|
"model_format": "pytorch",
|
|
8258
8316
|
"model_size_in_billions": 7,
|
|
@@ -8263,6 +8321,53 @@
|
|
|
8263
8321
|
],
|
|
8264
8322
|
"model_id": "Qwen/Qwen2.5-Coder-7B-Instruct"
|
|
8265
8323
|
},
|
|
8324
|
+
{
|
|
8325
|
+
"model_format": "pytorch",
|
|
8326
|
+
"model_size_in_billions": 14,
|
|
8327
|
+
"quantizations": [
|
|
8328
|
+
"4-bit",
|
|
8329
|
+
"8-bit",
|
|
8330
|
+
"none"
|
|
8331
|
+
],
|
|
8332
|
+
"model_id": "Qwen/Qwen2.5-Coder-14B-Instruct"
|
|
8333
|
+
},
|
|
8334
|
+
{
|
|
8335
|
+
"model_format": "pytorch",
|
|
8336
|
+
"model_size_in_billions": 32,
|
|
8337
|
+
"quantizations": [
|
|
8338
|
+
"4-bit",
|
|
8339
|
+
"8-bit",
|
|
8340
|
+
"none"
|
|
8341
|
+
],
|
|
8342
|
+
"model_id": "Qwen/Qwen2.5-Coder-32B-Instruct"
|
|
8343
|
+
},
|
|
8344
|
+
{
|
|
8345
|
+
"model_format": "gptq",
|
|
8346
|
+
"model_size_in_billions": "0_5",
|
|
8347
|
+
"quantizations": [
|
|
8348
|
+
"Int4",
|
|
8349
|
+
"Int8"
|
|
8350
|
+
],
|
|
8351
|
+
"model_id": "Qwen/Qwen2.5-Coder-0.5B-Instruct-GPTQ-{quantization}"
|
|
8352
|
+
},
|
|
8353
|
+
{
|
|
8354
|
+
"model_format": "gptq",
|
|
8355
|
+
"model_size_in_billions": "1_5",
|
|
8356
|
+
"quantizations": [
|
|
8357
|
+
"Int4",
|
|
8358
|
+
"Int8"
|
|
8359
|
+
],
|
|
8360
|
+
"model_id": "Qwen/Qwen2.5-Coder-1.5B-Instruct-GPTQ-{quantization}"
|
|
8361
|
+
},
|
|
8362
|
+
{
|
|
8363
|
+
"model_format": "gptq",
|
|
8364
|
+
"model_size_in_billions": "3",
|
|
8365
|
+
"quantizations": [
|
|
8366
|
+
"Int4",
|
|
8367
|
+
"Int8"
|
|
8368
|
+
],
|
|
8369
|
+
"model_id": "Qwen/Qwen2.5-Coder-3B-Instruct-GPTQ-{quantization}"
|
|
8370
|
+
},
|
|
8266
8371
|
{
|
|
8267
8372
|
"model_format": "gptq",
|
|
8268
8373
|
"model_size_in_billions": "7",
|
|
@@ -8272,6 +8377,73 @@
|
|
|
8272
8377
|
],
|
|
8273
8378
|
"model_id": "Qwen/Qwen2.5-Coder-7B-Instruct-GPTQ-{quantization}"
|
|
8274
8379
|
},
|
|
8380
|
+
{
|
|
8381
|
+
"model_format": "gptq",
|
|
8382
|
+
"model_size_in_billions": "14",
|
|
8383
|
+
"quantizations": [
|
|
8384
|
+
"Int4",
|
|
8385
|
+
"Int8"
|
|
8386
|
+
],
|
|
8387
|
+
"model_id": "Qwen/Qwen2.5-Coder-14B-Instruct-GPTQ-{quantization}"
|
|
8388
|
+
},
|
|
8389
|
+
{
|
|
8390
|
+
"model_format": "gptq",
|
|
8391
|
+
"model_size_in_billions": "32",
|
|
8392
|
+
"quantizations": [
|
|
8393
|
+
"Int4",
|
|
8394
|
+
"Int8"
|
|
8395
|
+
],
|
|
8396
|
+
"model_id": "Qwen/Qwen2.5-Coder-32B-Instruct-GPTQ-{quantization}"
|
|
8397
|
+
},
|
|
8398
|
+
{
|
|
8399
|
+
"model_format": "awq",
|
|
8400
|
+
"model_size_in_billions": "0_5",
|
|
8401
|
+
"quantizations": [
|
|
8402
|
+
"Int4"
|
|
8403
|
+
],
|
|
8404
|
+
"model_id": "Qwen/Qwen2.5-Coder-0.5B-Instruct-AWQ"
|
|
8405
|
+
},
|
|
8406
|
+
{
|
|
8407
|
+
"model_format": "awq",
|
|
8408
|
+
"model_size_in_billions": "1_5",
|
|
8409
|
+
"quantizations": [
|
|
8410
|
+
"Int4"
|
|
8411
|
+
],
|
|
8412
|
+
"model_id": "Qwen/Qwen2.5-Coder-1.5B-Instruct-AWQ"
|
|
8413
|
+
},
|
|
8414
|
+
{
|
|
8415
|
+
"model_format": "awq",
|
|
8416
|
+
"model_size_in_billions": "3",
|
|
8417
|
+
"quantizations": [
|
|
8418
|
+
"Int4"
|
|
8419
|
+
],
|
|
8420
|
+
"model_id": "Qwen/Qwen2.5-Coder-3B-Instruct-AWQ"
|
|
8421
|
+
},
|
|
8422
|
+
{
|
|
8423
|
+
"model_format": "awq",
|
|
8424
|
+
"model_size_in_billions": "7",
|
|
8425
|
+
"quantizations": [
|
|
8426
|
+
"Int4"
|
|
8427
|
+
],
|
|
8428
|
+
"model_id": "Qwen/Qwen2.5-Coder-7B-Instruct-AWQ"
|
|
8429
|
+
},
|
|
8430
|
+
{
|
|
8431
|
+
"model_format": "awq",
|
|
8432
|
+
"model_size_in_billions": "14",
|
|
8433
|
+
"quantizations": [
|
|
8434
|
+
"Int4"
|
|
8435
|
+
],
|
|
8436
|
+
"model_id": "Qwen/Qwen2.5-Coder-14B-Instruct-AWQ"
|
|
8437
|
+
},
|
|
8438
|
+
{
|
|
8439
|
+
"model_format": "awq",
|
|
8440
|
+
"model_size_in_billions": "32",
|
|
8441
|
+
"quantizations": [
|
|
8442
|
+
"Int4"
|
|
8443
|
+
],
|
|
8444
|
+
"model_id": "Qwen/Qwen2.5-Coder-32B-Instruct-AWQ"
|
|
8445
|
+
},
|
|
8446
|
+
|
|
8275
8447
|
{
|
|
8276
8448
|
"model_format": "ggufv2",
|
|
8277
8449
|
"model_size_in_billions": "1_5",
|
|
@@ -8344,5 +8516,312 @@
|
|
|
8344
8516
|
"<|im_start|>",
|
|
8345
8517
|
"<|im_end|>"
|
|
8346
8518
|
]
|
|
8519
|
+
},
|
|
8520
|
+
{
|
|
8521
|
+
"version": 1,
|
|
8522
|
+
"context_length": 32768,
|
|
8523
|
+
"model_name": "QwQ-32B-Preview",
|
|
8524
|
+
"model_lang": [
|
|
8525
|
+
"en",
|
|
8526
|
+
"zh"
|
|
8527
|
+
],
|
|
8528
|
+
"model_ability": [
|
|
8529
|
+
"chat"
|
|
8530
|
+
],
|
|
8531
|
+
"model_description": "QwQ-32B-Preview is an experimental research model developed by the Qwen Team, focused on advancing AI reasoning capabilities.",
|
|
8532
|
+
"model_specs": [
|
|
8533
|
+
{
|
|
8534
|
+
"model_format": "pytorch",
|
|
8535
|
+
"model_size_in_billions": 32,
|
|
8536
|
+
"quantizations": [
|
|
8537
|
+
"4-bit",
|
|
8538
|
+
"8-bit",
|
|
8539
|
+
"none"
|
|
8540
|
+
],
|
|
8541
|
+
"model_id": "Qwen/QwQ-32B-Preview"
|
|
8542
|
+
},
|
|
8543
|
+
{
|
|
8544
|
+
"model_format": "awq",
|
|
8545
|
+
"model_size_in_billions": 32,
|
|
8546
|
+
"quantizations": [
|
|
8547
|
+
"Int4"
|
|
8548
|
+
],
|
|
8549
|
+
"model_id": "KirillR/QwQ-32B-Preview-AWQ"
|
|
8550
|
+
},
|
|
8551
|
+
{
|
|
8552
|
+
"model_format": "ggufv2",
|
|
8553
|
+
"model_size_in_billions": 32,
|
|
8554
|
+
"quantizations": [
|
|
8555
|
+
"Q3_K_L",
|
|
8556
|
+
"Q4_K_M",
|
|
8557
|
+
"Q6_K",
|
|
8558
|
+
"Q8_0"
|
|
8559
|
+
],
|
|
8560
|
+
"model_id": "lmstudio-community/QwQ-32B-Preview-GGUF",
|
|
8561
|
+
"model_file_name_template": "QwQ-32B-Preview-{quantization}.gguf"
|
|
8562
|
+
},
|
|
8563
|
+
{
|
|
8564
|
+
"model_format": "mlx",
|
|
8565
|
+
"model_size_in_billions": 32,
|
|
8566
|
+
"quantizations": [
|
|
8567
|
+
"4-bit"
|
|
8568
|
+
],
|
|
8569
|
+
"model_id": "mlx-community/Qwen_QwQ-32B-Preview_MLX-4bit"
|
|
8570
|
+
},
|
|
8571
|
+
{
|
|
8572
|
+
"model_format": "mlx",
|
|
8573
|
+
"model_size_in_billions": 32,
|
|
8574
|
+
"quantizations": [
|
|
8575
|
+
"8-bit"
|
|
8576
|
+
],
|
|
8577
|
+
"model_id": "mlx-community/Qwen_QwQ-32B-Preview_MLX-8bit"
|
|
8578
|
+
},
|
|
8579
|
+
{
|
|
8580
|
+
"model_format": "mlx",
|
|
8581
|
+
"model_size_in_billions": 32,
|
|
8582
|
+
"quantizations": [
|
|
8583
|
+
"none"
|
|
8584
|
+
],
|
|
8585
|
+
"model_id": "mlx-community/QwQ-32B-Preview-bf16"
|
|
8586
|
+
}
|
|
8587
|
+
],
|
|
8588
|
+
"chat_template": "{%- if tools %}\n {{- '<|im_start|>system\\n' }}\n {%- if messages[0]['role'] == 'system' %}\n {{- messages[0]['content'] }}\n {%- else %}\n {{- 'You are Qwen, created by Alibaba Cloud. You are a helpful assistant.' }}\n {%- endif %}\n {{- \"\\n\\n# Tools\\n\\nYou may call one or more functions to assist with the user query.\\n\\nYou are provided with function signatures within <tools></tools> XML tags:\\n<tools>\" }}\n {%- for tool in tools %}\n {{- \"\\n\" }}\n {{- tool | tojson }}\n {%- endfor %}\n {{- \"\\n</tools>\\n\\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\\n<tool_call>\\n{\\\"name\\\": <function-name>, \\\"arguments\\\": <args-json-object>}\\n</tool_call><|im_end|>\\n\" }}\n{%- else %}\n {%- if messages[0]['role'] == 'system' %}\n {{- '<|im_start|>system\\n' + messages[0]['content'] + '<|im_end|>\\n' }}\n {%- else %}\n {{- '<|im_start|>system\\nYou are Qwen, created by Alibaba Cloud. You are a helpful assistant.<|im_end|>\\n' }}\n {%- endif %}\n{%- endif %}\n{%- for message in messages %}\n {%- if (message.role == \"user\") or (message.role == \"system\" and not loop.first) or (message.role == \"assistant\" and not message.tool_calls) %}\n {{- '<|im_start|>' + message.role + '\\n' + message.content + '<|im_end|>' + '\\n' }}\n {%- elif message.role == \"assistant\" %}\n {{- '<|im_start|>' + message.role }}\n {%- if message.content %}\n {{- '\\n' + message.content }}\n {%- endif %}\n {%- for tool_call in message.tool_calls %}\n {%- if tool_call.function is defined %}\n {%- set tool_call = tool_call.function %}\n {%- endif %}\n {{- '\\n<tool_call>\\n{\"name\": \"' }}\n {{- tool_call.name }}\n {{- '\", \"arguments\": ' }}\n {{- tool_call.arguments | tojson }}\n {{- '}\\n</tool_call>' }}\n {%- endfor %}\n {{- '<|im_end|>\\n' }}\n {%- elif message.role == \"tool\" %}\n {%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != \"tool\") %}\n {{- '<|im_start|>user' }}\n {%- endif %}\n {{- '\\n<tool_response>\\n' }}\n {{- message.content }}\n {{- '\\n</tool_response>' }}\n {%- if loop.last or (messages[loop.index0 + 1].role != \"tool\") %}\n {{- '<|im_end|>\\n' }}\n {%- endif %}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|im_start|>assistant\\n' }}\n{%- endif %}\n",
|
|
8589
|
+
"stop_token_ids": [
|
|
8590
|
+
151643,
|
|
8591
|
+
151644,
|
|
8592
|
+
151645
|
|
8593
|
+
],
|
|
8594
|
+
"stop": [
|
|
8595
|
+
"<|endoftext|>",
|
|
8596
|
+
"<|im_start|>",
|
|
8597
|
+
"<|im_end|>"
|
|
8598
|
+
]
|
|
8599
|
+
},
|
|
8600
|
+
{
|
|
8601
|
+
"version": 1,
|
|
8602
|
+
"context_length": 8192,
|
|
8603
|
+
"model_name": "glm-edge-chat",
|
|
8604
|
+
"model_lang": [
|
|
8605
|
+
"en",
|
|
8606
|
+
"zh"
|
|
8607
|
+
],
|
|
8608
|
+
"model_ability": [
|
|
8609
|
+
"chat"
|
|
8610
|
+
],
|
|
8611
|
+
"model_description": "The GLM-Edge series is our attempt to face the end-side real-life scenarios, which consists of two sizes of large-language dialogue models and multimodal comprehension models (GLM-Edge-1.5B-Chat, GLM-Edge-4B-Chat, GLM-Edge-V-2B, GLM-Edge-V-5B). Among them, the 1.5B / 2B model is mainly for platforms such as mobile phones and cars, and the 4B / 5B model is mainly for platforms such as PCs.",
|
|
8612
|
+
"model_specs": [
|
|
8613
|
+
{
|
|
8614
|
+
"model_format": "pytorch",
|
|
8615
|
+
"model_size_in_billions": "1_5",
|
|
8616
|
+
"quantizations": [
|
|
8617
|
+
"4-bit",
|
|
8618
|
+
"8-bit",
|
|
8619
|
+
"none"
|
|
8620
|
+
],
|
|
8621
|
+
"model_id": "THUDM/glm-edge-1.5b-chat"
|
|
8622
|
+
},
|
|
8623
|
+
{
|
|
8624
|
+
"model_format": "pytorch",
|
|
8625
|
+
"model_size_in_billions": "4",
|
|
8626
|
+
"quantizations": [
|
|
8627
|
+
"4-bit",
|
|
8628
|
+
"8-bit",
|
|
8629
|
+
"none"
|
|
8630
|
+
],
|
|
8631
|
+
"model_id": "THUDM/glm-edge-4b-chat"
|
|
8632
|
+
},
|
|
8633
|
+
{
|
|
8634
|
+
"model_format": "ggufv2",
|
|
8635
|
+
"model_size_in_billions": "1_5",
|
|
8636
|
+
"quantizations": [
|
|
8637
|
+
"Q4_0",
|
|
8638
|
+
"Q4_1",
|
|
8639
|
+
"Q4_K",
|
|
8640
|
+
"Q4_K_M",
|
|
8641
|
+
"Q4_K_S",
|
|
8642
|
+
"Q5_0",
|
|
8643
|
+
"Q5_1",
|
|
8644
|
+
"Q5_K",
|
|
8645
|
+
"Q5_K_M",
|
|
8646
|
+
"Q5_K_S",
|
|
8647
|
+
"Q6_K",
|
|
8648
|
+
"Q8_0"
|
|
8649
|
+
],
|
|
8650
|
+
"model_file_name_template": "ggml-model-{quantization}.gguf",
|
|
8651
|
+
"model_id": "THUDM/glm-edge-1.5b-chat-gguf"
|
|
8652
|
+
},
|
|
8653
|
+
{
|
|
8654
|
+
"model_format": "ggufv2",
|
|
8655
|
+
"model_size_in_billions": "1_5",
|
|
8656
|
+
"quantizations": [
|
|
8657
|
+
"F16"
|
|
8658
|
+
],
|
|
8659
|
+
"model_file_name_template": "glm-edge-1.5B-chat-{quantization}.gguf",
|
|
8660
|
+
"model_id": "THUDM/glm-edge-1.5b-chat-gguf"
|
|
8661
|
+
},
|
|
8662
|
+
{
|
|
8663
|
+
"model_format": "ggufv2",
|
|
8664
|
+
"model_size_in_billions": "4",
|
|
8665
|
+
"quantizations": [
|
|
8666
|
+
"Q4_0",
|
|
8667
|
+
"Q4_1",
|
|
8668
|
+
"Q4_K",
|
|
8669
|
+
"Q4_K_M",
|
|
8670
|
+
"Q4_K_S",
|
|
8671
|
+
"Q5_0",
|
|
8672
|
+
"Q5_1",
|
|
8673
|
+
"Q5_K",
|
|
8674
|
+
"Q5_K_M",
|
|
8675
|
+
"Q5_K_S",
|
|
8676
|
+
"Q6_K",
|
|
8677
|
+
"Q8_0"
|
|
8678
|
+
],
|
|
8679
|
+
"model_file_name_template": "ggml-model-{quantization}.gguf",
|
|
8680
|
+
"model_id": "THUDM/glm-edge-4b-chat-gguf"
|
|
8681
|
+
},
|
|
8682
|
+
{
|
|
8683
|
+
"model_format": "ggufv2",
|
|
8684
|
+
"model_size_in_billions": "4",
|
|
8685
|
+
"quantizations": [
|
|
8686
|
+
"F16"
|
|
8687
|
+
],
|
|
8688
|
+
"model_file_name_template": "glm-edge-4B-chat-{quantization}.gguf",
|
|
8689
|
+
"model_id": "THUDM/glm-edge-4b-chat-gguf"
|
|
8690
|
+
}
|
|
8691
|
+
],
|
|
8692
|
+
"chat_template": "{% for item in messages %}{% if item['role'] == 'system' %}<|system|>\n{{ item['content'] }}{% elif item['role'] == 'user' %}<|user|>\n{{ item['content'] }}{% elif item['role'] == 'assistant' %}<|assistant|>\n{{ item['content'] }}{% endif %}{% endfor %}{% if add_generation_prompt %}<|assistant|>\n{% endif %}",
|
|
8693
|
+
"stop_token_ids": [
|
|
8694
|
+
59246,
|
|
8695
|
+
59253,
|
|
8696
|
+
59255
|
|
8697
|
+
],
|
|
8698
|
+
"stop": [
|
|
8699
|
+
"<|endoftext|>",
|
|
8700
|
+
"<|user|>",
|
|
8701
|
+
"<|observation|>"
|
|
8702
|
+
]
|
|
8703
|
+
},
|
|
8704
|
+
{
|
|
8705
|
+
"version": 1,
|
|
8706
|
+
"context_length": 8192,
|
|
8707
|
+
"model_name": "glm-edge-v",
|
|
8708
|
+
"model_lang": [
|
|
8709
|
+
"en",
|
|
8710
|
+
"zh"
|
|
8711
|
+
],
|
|
8712
|
+
"model_ability": [
|
|
8713
|
+
"chat",
|
|
8714
|
+
"vision"
|
|
8715
|
+
],
|
|
8716
|
+
"model_description": "The GLM-Edge series is our attempt to face the end-side real-life scenarios, which consists of two sizes of large-language dialogue models and multimodal comprehension models (GLM-Edge-1.5B-Chat, GLM-Edge-4B-Chat, GLM-Edge-V-2B, GLM-Edge-V-5B). Among them, the 1.5B / 2B model is mainly for platforms such as mobile phones and cars, and the 4B / 5B model is mainly for platforms such as PCs.",
|
|
8717
|
+
"model_specs": [
|
|
8718
|
+
{
|
|
8719
|
+
"model_format": "pytorch",
|
|
8720
|
+
"model_size_in_billions": "2",
|
|
8721
|
+
"quantizations": [
|
|
8722
|
+
"4-bit",
|
|
8723
|
+
"8-bit",
|
|
8724
|
+
"none"
|
|
8725
|
+
],
|
|
8726
|
+
"model_id": "THUDM/glm-edge-v-2b"
|
|
8727
|
+
},
|
|
8728
|
+
{
|
|
8729
|
+
"model_format": "pytorch",
|
|
8730
|
+
"model_size_in_billions": "5",
|
|
8731
|
+
"quantizations": [
|
|
8732
|
+
"4-bit",
|
|
8733
|
+
"8-bit",
|
|
8734
|
+
"none"
|
|
8735
|
+
],
|
|
8736
|
+
"model_id": "THUDM/glm-edge-v-5b"
|
|
8737
|
+
},
|
|
8738
|
+
{
|
|
8739
|
+
"model_format": "ggufv2",
|
|
8740
|
+
"model_size_in_billions": "2",
|
|
8741
|
+
"quantizations": [
|
|
8742
|
+
"Q4_0",
|
|
8743
|
+
"Q4_1",
|
|
8744
|
+
"Q4_K",
|
|
8745
|
+
"Q4_K_M",
|
|
8746
|
+
"Q4_K_S",
|
|
8747
|
+
"Q5_0",
|
|
8748
|
+
"Q5_1",
|
|
8749
|
+
"Q5_K",
|
|
8750
|
+
"Q5_K_M",
|
|
8751
|
+
"Q5_K_S",
|
|
8752
|
+
"Q6_K",
|
|
8753
|
+
"Q8_0"
|
|
8754
|
+
],
|
|
8755
|
+
"model_file_name_template": "ggml-model-{quantization}.gguf",
|
|
8756
|
+
"model_id": "THUDM/glm-edge-v-2b-gguf"
|
|
8757
|
+
},
|
|
8758
|
+
{
|
|
8759
|
+
"model_format": "ggufv2",
|
|
8760
|
+
"model_size_in_billions": "2",
|
|
8761
|
+
"quantizations": [
|
|
8762
|
+
"F16"
|
|
8763
|
+
],
|
|
8764
|
+
"model_file_name_template": "glm-edge-v-2B-{quantization}.gguf",
|
|
8765
|
+
"model_id": "THUDM/glm-edge-v-2b-gguf"
|
|
8766
|
+
},
|
|
8767
|
+
{
|
|
8768
|
+
"model_format": "ggufv2",
|
|
8769
|
+
"model_size_in_billions": "2",
|
|
8770
|
+
"quantizations": [
|
|
8771
|
+
"f16"
|
|
8772
|
+
],
|
|
8773
|
+
"model_file_name_template": "mmproj-model-{quantization}.gguf",
|
|
8774
|
+
"model_id": "THUDM/glm-edge-v-2b-gguf"
|
|
8775
|
+
},
|
|
8776
|
+
{
|
|
8777
|
+
"model_format": "ggufv2",
|
|
8778
|
+
"model_size_in_billions": "5",
|
|
8779
|
+
"quantizations": [
|
|
8780
|
+
"Q4_0",
|
|
8781
|
+
"Q4_1",
|
|
8782
|
+
"Q4_K",
|
|
8783
|
+
"Q4_K_M",
|
|
8784
|
+
"Q4_K_S",
|
|
8785
|
+
"Q5_0",
|
|
8786
|
+
"Q5_1",
|
|
8787
|
+
"Q5_K",
|
|
8788
|
+
"Q5_K_M",
|
|
8789
|
+
"Q5_K_S",
|
|
8790
|
+
"Q6_K",
|
|
8791
|
+
"Q8_0"
|
|
8792
|
+
],
|
|
8793
|
+
"model_file_name_template": "ggml-model-{quantization}.gguf",
|
|
8794
|
+
"model_id": "THUDM/glm-edge-v-5b-gguf"
|
|
8795
|
+
},
|
|
8796
|
+
{
|
|
8797
|
+
"model_format": "ggufv2",
|
|
8798
|
+
"model_size_in_billions": "5",
|
|
8799
|
+
"quantizations": [
|
|
8800
|
+
"F16"
|
|
8801
|
+
],
|
|
8802
|
+
"model_file_name_template": "glm-edge-v-5B-{quantization}.gguf",
|
|
8803
|
+
"model_id": "THUDM/glm-edge-v-5b-gguf"
|
|
8804
|
+
},
|
|
8805
|
+
{
|
|
8806
|
+
"model_format": "ggufv2",
|
|
8807
|
+
"model_size_in_billions": "5",
|
|
8808
|
+
"quantizations": [
|
|
8809
|
+
"f16"
|
|
8810
|
+
],
|
|
8811
|
+
"model_file_name_template": "mmproj-model-{quantization}.gguf",
|
|
8812
|
+
"model_id": "THUDM/glm-edge-v-5b-gguf"
|
|
8813
|
+
}
|
|
8814
|
+
],
|
|
8815
|
+
"chat_template": "{% for item in messages %}{% if item['role'] != 'system' %}<|{{ item['role'] }}|>\n{% for content in item['content'] %}{% if content['type'] == 'image' %}{% for _ in range(578) %}<|begin_of_image|>{% endfor %}{% elif content['type'] == 'text' %}{{ content['text'] }}{% endif %}{% endfor %}\n{% endif %}{% endfor %}{% if add_generation_prompt %}<|assistant|>\n{% endif %}",
|
|
8816
|
+
"stop_token_ids": [
|
|
8817
|
+
59246,
|
|
8818
|
+
59253,
|
|
8819
|
+
59255
|
|
8820
|
+
],
|
|
8821
|
+
"stop": [
|
|
8822
|
+
"<|endoftext|>",
|
|
8823
|
+
"<|user|>",
|
|
8824
|
+
"<|observation|>"
|
|
8825
|
+
]
|
|
8347
8826
|
}
|
|
8348
8827
|
]
|