xinference 0.11.1__py3-none-any.whl → 0.11.2.post1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of xinference might be problematic. Click here for more details.

Files changed (31) hide show
  1. xinference/_version.py +3 -3
  2. xinference/api/restful_api.py +30 -0
  3. xinference/client/restful/restful_client.py +29 -0
  4. xinference/core/cache_tracker.py +12 -1
  5. xinference/core/supervisor.py +30 -2
  6. xinference/core/utils.py +12 -0
  7. xinference/core/worker.py +4 -1
  8. xinference/deploy/cmdline.py +126 -0
  9. xinference/deploy/test/test_cmdline.py +24 -0
  10. xinference/model/llm/__init__.py +2 -0
  11. xinference/model/llm/llm_family.json +501 -6
  12. xinference/model/llm/llm_family.py +84 -10
  13. xinference/model/llm/llm_family_modelscope.json +198 -7
  14. xinference/model/llm/memory.py +332 -0
  15. xinference/model/llm/pytorch/core.py +2 -0
  16. xinference/model/llm/pytorch/intern_vl.py +347 -0
  17. xinference/model/llm/utils.py +13 -0
  18. xinference/model/llm/vllm/core.py +5 -2
  19. xinference/model/rerank/core.py +23 -1
  20. xinference/model/utils.py +17 -7
  21. xinference/thirdparty/deepseek_vl/models/processing_vlm.py +1 -1
  22. xinference/thirdparty/deepseek_vl/models/siglip_vit.py +2 -2
  23. xinference/thirdparty/llava/mm_utils.py +3 -2
  24. xinference/thirdparty/llava/model/llava_arch.py +1 -1
  25. xinference/thirdparty/omnilmm/chat.py +6 -5
  26. {xinference-0.11.1.dist-info → xinference-0.11.2.post1.dist-info}/METADATA +8 -7
  27. {xinference-0.11.1.dist-info → xinference-0.11.2.post1.dist-info}/RECORD +31 -29
  28. {xinference-0.11.1.dist-info → xinference-0.11.2.post1.dist-info}/LICENSE +0 -0
  29. {xinference-0.11.1.dist-info → xinference-0.11.2.post1.dist-info}/WHEEL +0 -0
  30. {xinference-0.11.1.dist-info → xinference-0.11.2.post1.dist-info}/entry_points.txt +0 -0
  31. {xinference-0.11.1.dist-info → xinference-0.11.2.post1.dist-info}/top_level.txt +0 -0
@@ -2198,6 +2198,31 @@
2198
2198
  ]
2199
2199
  }
2200
2200
  },
2201
+ {
2202
+ "version": 1,
2203
+ "context_length": 65536,
2204
+ "model_name": "codeqwen1.5",
2205
+ "model_lang": [
2206
+ "en",
2207
+ "zh"
2208
+ ],
2209
+ "model_ability": [
2210
+ "generate"
2211
+ ],
2212
+ "model_description": "CodeQwen1.5 is the Code-Specific version of Qwen1.5. It is a transformer-based decoder-only language model pretrained on a large amount of data of codes.",
2213
+ "model_specs": [
2214
+ {
2215
+ "model_format": "pytorch",
2216
+ "model_size_in_billions": 7,
2217
+ "quantizations": [
2218
+ "4-bit",
2219
+ "8-bit",
2220
+ "none"
2221
+ ],
2222
+ "model_id": "Qwen/CodeQwen1.5-7B"
2223
+ }
2224
+ ]
2225
+ },
2201
2226
  {
2202
2227
  "version": 1,
2203
2228
  "context_length": 65536,
@@ -4335,6 +4360,83 @@
4335
4360
  ]
4336
4361
  }
4337
4362
  },
4363
+ {
4364
+ "version": 1,
4365
+ "context_length": 4096,
4366
+ "model_name": "deepseek",
4367
+ "model_lang": [
4368
+ "en",
4369
+ "zh"
4370
+ ],
4371
+ "model_ability": [
4372
+ "generate"
4373
+ ],
4374
+ "model_description": "DeepSeek LLM, trained from scratch on a vast dataset of 2 trillion tokens in both English and Chinese. ",
4375
+ "model_specs": [
4376
+ {
4377
+ "model_format": "pytorch",
4378
+ "model_size_in_billions": 7,
4379
+ "quantizations": [
4380
+ "4-bit",
4381
+ "8-bit",
4382
+ "none"
4383
+ ],
4384
+ "model_id": "deepseek-ai/deepseek-llm-7b-base",
4385
+ "model_revision": "7683fea62db869066ddaff6a41d032262c490d4f"
4386
+ },
4387
+ {
4388
+ "model_format": "pytorch",
4389
+ "model_size_in_billions": 67,
4390
+ "quantizations": [
4391
+ "4-bit",
4392
+ "8-bit",
4393
+ "none"
4394
+ ],
4395
+ "model_id": "deepseek-ai/deepseek-llm-67b-base",
4396
+ "model_revision": "c3f813a1121c95488a20132d3a4da89f4a46452f"
4397
+ },
4398
+ {
4399
+ "model_format": "ggufv2",
4400
+ "model_size_in_billions": 7,
4401
+ "quantizations": [
4402
+ "Q2_K",
4403
+ "Q3_K_L",
4404
+ "Q3_K_M",
4405
+ "Q3_K_S",
4406
+ "Q4_0",
4407
+ "Q4_K_M",
4408
+ "Q4_K_S",
4409
+ "Q5_0",
4410
+ "Q5_K_M",
4411
+ "Q5_K_S",
4412
+ "Q6_K",
4413
+ "Q8_0"
4414
+ ],
4415
+ "model_id": "TheBloke/deepseek-llm-7B-chat-GGUF",
4416
+ "model_file_name_template": "deepseek-llm-7b-chat.{quantization}.gguf"
4417
+ },
4418
+ {
4419
+ "model_format": "ggufv2",
4420
+ "model_size_in_billions": 67,
4421
+ "quantizations": [
4422
+ "Q2_K",
4423
+ "Q3_K_L",
4424
+ "Q3_K_M",
4425
+ "Q3_K_S",
4426
+ "Q4_0",
4427
+ "Q4_K_M",
4428
+ "Q4_K_S",
4429
+ "Q5_0",
4430
+ "Q5_K_M",
4431
+ "Q5_K_S",
4432
+ "Q6_K",
4433
+ "Q8_0"
4434
+ ],
4435
+ "model_id": "TheBloke/deepseek-llm-67b-chat-GGUF",
4436
+ "model_file_name_template": "deepseek-llm-67b-chat.{quantization}.gguf"
4437
+ }
4438
+ ]
4439
+ },
4338
4440
  {
4339
4441
  "version": 1,
4340
4442
  "context_length": 4096,
@@ -4427,7 +4529,199 @@
4427
4529
  },
4428
4530
  {
4429
4531
  "version": 1,
4430
- "context_length": 4096,
4532
+ "context_length": 16384,
4533
+ "model_name": "deepseek-coder",
4534
+ "model_lang": [
4535
+ "en",
4536
+ "zh"
4537
+ ],
4538
+ "model_ability": [
4539
+ "generate"
4540
+ ],
4541
+ "model_description": "Deepseek Coder is composed of a series of code language models, each trained from scratch on 2T tokens, with a composition of 87% code and 13% natural language in both English and Chinese. ",
4542
+ "model_specs": [
4543
+ {
4544
+ "model_format": "pytorch",
4545
+ "model_size_in_billions": "1_3",
4546
+ "quantizations": [
4547
+ "4-bit",
4548
+ "8-bit",
4549
+ "none"
4550
+ ],
4551
+ "model_id": "deepseek-ai/deepseek-coder-1.3b-base",
4552
+ "model_revision": "c919139c3a9b4070729c8b2cca4847ab29ca8d94"
4553
+ },
4554
+ {
4555
+ "model_format": "pytorch",
4556
+ "model_size_in_billions": "6_7",
4557
+ "quantizations": [
4558
+ "4-bit",
4559
+ "8-bit",
4560
+ "none"
4561
+ ],
4562
+ "model_id": "deepseek-ai/deepseek-coder-6.7b-base",
4563
+ "model_revision": "ce2207a8bfef3ee92bd7dd4cc31c52cfa0046912"
4564
+ },
4565
+ {
4566
+ "model_format": "pytorch",
4567
+ "model_size_in_billions": 7,
4568
+ "quantizations": [
4569
+ "4-bit",
4570
+ "8-bit",
4571
+ "none"
4572
+ ],
4573
+ "model_id": "deepseek-ai/deepseek-coder-7b-base-v1.5",
4574
+ "model_revision": "98f0904cee2237e235f10408ae12292037b21dac"
4575
+ },
4576
+ {
4577
+ "model_format": "pytorch",
4578
+ "model_size_in_billions": 33,
4579
+ "quantizations": [
4580
+ "4-bit",
4581
+ "8-bit",
4582
+ "none"
4583
+ ],
4584
+ "model_id": "deepseek-ai/deepseek-coder-33b-base",
4585
+ "model_revision": "45c85cadf3720ef3e85a492e24fd4b8c5d21d8ac"
4586
+ },
4587
+ {
4588
+ "model_format": "ggufv2",
4589
+ "model_size_in_billions": "1_3",
4590
+ "quantizations": [
4591
+ "Q2_K",
4592
+ "Q3_K_L",
4593
+ "Q3_K_M",
4594
+ "Q3_K_S",
4595
+ "Q4_0",
4596
+ "Q4_K_M",
4597
+ "Q4_K_S",
4598
+ "Q5_0",
4599
+ "Q5_K_M",
4600
+ "Q5_K_S",
4601
+ "Q6_K",
4602
+ "Q8_0"
4603
+ ],
4604
+ "model_id": "TheBloke/deepseek-coder-1.3b-base-GGUF",
4605
+ "model_file_name_template": "deepseek-coder-1.3b-base.{quantization}.gguf"
4606
+ },
4607
+ {
4608
+ "model_format": "ggufv2",
4609
+ "model_size_in_billions": "6_7",
4610
+ "quantizations": [
4611
+ "Q2_K",
4612
+ "Q3_K_L",
4613
+ "Q3_K_M",
4614
+ "Q3_K_S",
4615
+ "Q4_0",
4616
+ "Q4_K_M",
4617
+ "Q4_K_S",
4618
+ "Q5_0",
4619
+ "Q5_K_M",
4620
+ "Q5_K_S",
4621
+ "Q6_K",
4622
+ "Q8_0"
4623
+ ],
4624
+ "model_id": "TheBloke/deepseek-coder-6.7B-base-GGUF",
4625
+ "model_file_name_template": "deepseek-coder-6.7b-base.{quantization}.gguf"
4626
+ },
4627
+ {
4628
+ "model_format": "ggufv2",
4629
+ "model_size_in_billions": 7,
4630
+ "quantizations": [
4631
+ "Q2_K",
4632
+ "Q3_K_L",
4633
+ "Q3_K_M",
4634
+ "Q3_K_S",
4635
+ "Q4_K_M",
4636
+ "Q4_K_S",
4637
+ "Q5_0",
4638
+ "Q5_K_M",
4639
+ "Q5_K_S",
4640
+ "Q6_K",
4641
+ "Q8_0"
4642
+ ],
4643
+ "model_id": "dagbs/deepseek-coder-7b-base-v1.5-GGUF",
4644
+ "model_file_name_template": "deepseek-coder-7b-base-v1.5.{quantization}.gguf"
4645
+ },
4646
+ {
4647
+ "model_format": "ggufv2",
4648
+ "model_size_in_billions": 33,
4649
+ "quantizations": [
4650
+ "Q2_K",
4651
+ "Q3_K_L",
4652
+ "Q3_K_M",
4653
+ "Q3_K_S",
4654
+ "Q4_0",
4655
+ "Q4_K_M",
4656
+ "Q4_K_S",
4657
+ "Q5_0",
4658
+ "Q5_K_M",
4659
+ "Q5_K_S",
4660
+ "Q6_K",
4661
+ "Q8_0"
4662
+ ],
4663
+ "model_id": "TheBloke/deepseek-coder-33B-base-GGUF",
4664
+ "model_file_name_template": "deepseek-coder-33b-base.{quantization}.gguf"
4665
+ },
4666
+ {
4667
+ "model_format": "gptq",
4668
+ "model_size_in_billions": "1_3",
4669
+ "quantizations": [
4670
+ "Int4"
4671
+ ],
4672
+ "model_id": "TheBloke/deepseek-coder-1.3b-base-GPTQ",
4673
+ "model_revision": "a5bf3b76d70cda53327311a631b1003024d5de29"
4674
+ },
4675
+ {
4676
+ "model_format": "gptq",
4677
+ "model_size_in_billions": "6_7",
4678
+ "quantizations": [
4679
+ "Int4"
4680
+ ],
4681
+ "model_id": "TheBloke/deepseek-coder-6.7B-base-GPTQ",
4682
+ "model_revision": "6476ea3d6e623a1313d363dbc6e172773e031bb1"
4683
+ },
4684
+ {
4685
+ "model_format": "gptq",
4686
+ "model_size_in_billions": 33,
4687
+ "quantizations": [
4688
+ "Int4"
4689
+ ],
4690
+ "model_id": "TheBloke/deepseek-coder-33B-base-GPTQ",
4691
+ "model_revision": "f527d7325e463a5cb091d044e4f2b15902674a70"
4692
+ },
4693
+ {
4694
+ "model_format": "awq",
4695
+ "model_size_in_billions": "1_3",
4696
+ "quantizations": [
4697
+ "Int4"
4698
+ ],
4699
+ "model_id": "TheBloke/deepseek-coder-1.3b-base-AWQ",
4700
+ "model_revision": "ffb66f1a2a194401b4f29025edcd261d7f0a08a7"
4701
+ },
4702
+ {
4703
+ "model_format": "awq",
4704
+ "model_size_in_billions": "6_7",
4705
+ "quantizations": [
4706
+ "Int4"
4707
+ ],
4708
+ "model_id": "TheBloke/deepseek-coder-6.7B-base-AWQ",
4709
+ "model_revision": "e3d4bdf39712665f5e9d5c05c9df6f20fe1e2d5a"
4710
+ },
4711
+ {
4712
+ "model_format": "awq",
4713
+ "model_size_in_billions": 33,
4714
+ "quantizations": [
4715
+ "Int4"
4716
+ ],
4717
+ "model_id": "TheBloke/deepseek-coder-33B-base-AWQ",
4718
+ "model_revision": "c7edb2d5868d61a5dcf2591933a8992c8cbe3ef4"
4719
+ }
4720
+ ]
4721
+ },
4722
+ {
4723
+ "version": 1,
4724
+ "context_length": 16384,
4431
4725
  "model_name": "deepseek-coder-instruct",
4432
4726
  "model_lang": [
4433
4727
  "en",
@@ -4460,6 +4754,17 @@
4460
4754
  "model_id": "deepseek-ai/deepseek-coder-6.7b-instruct",
4461
4755
  "model_revision": "cbb77d7448ea3168d884758817e7f895e3828d1c"
4462
4756
  },
4757
+ {
4758
+ "model_format": "pytorch",
4759
+ "model_size_in_billions": 7,
4760
+ "quantizations": [
4761
+ "4-bit",
4762
+ "8-bit",
4763
+ "none"
4764
+ ],
4765
+ "model_id": "deepseek-ai/deepseek-coder-7b-instruct-v1.5",
4766
+ "model_revision": "2a050a4c59d687a85324d32e147517992117ed30"
4767
+ },
4463
4768
  {
4464
4769
  "model_format": "pytorch",
4465
4770
  "model_size_in_billions": 33,
@@ -4511,6 +4816,25 @@
4511
4816
  "model_id": "TheBloke/deepseek-coder-6.7B-instruct-GGUF",
4512
4817
  "model_file_name_template": "deepseek-coder-6.7b-instruct.{quantization}.gguf"
4513
4818
  },
4819
+ {
4820
+ "model_format": "ggufv2",
4821
+ "model_size_in_billions": 7,
4822
+ "quantizations": [
4823
+ "Q3_K_L",
4824
+ "Q3_K_M",
4825
+ "Q3_K_S",
4826
+ "Q4_0",
4827
+ "Q4_K_M",
4828
+ "Q4_K_S",
4829
+ "Q5_0",
4830
+ "Q5_K_M",
4831
+ "Q5_K_S",
4832
+ "Q6_K",
4833
+ "Q8_0"
4834
+ ],
4835
+ "model_id": "LoneStriker/deepseek-coder-7b-instruct-v1.5-GGUF",
4836
+ "model_file_name_template": "deepseek-coder-7b-instruct-v1.5-{quantization}.gguf"
4837
+ },
4514
4838
  {
4515
4839
  "model_format": "ggufv2",
4516
4840
  "model_size_in_billions": 33,
@@ -4530,6 +4854,60 @@
4530
4854
  ],
4531
4855
  "model_id": "TheBloke/deepseek-coder-33B-instruct-GGUF",
4532
4856
  "model_file_name_template": "deepseek-coder-33b-instruct.{quantization}.gguf"
4857
+ },
4858
+ {
4859
+ "model_format": "gptq",
4860
+ "model_size_in_billions": "1_3",
4861
+ "quantizations": [
4862
+ "Int4"
4863
+ ],
4864
+ "model_id": "TheBloke/deepseek-coder-1.3b-instruct-GPTQ",
4865
+ "model_revision": "9c002e9af6cbdf3bd9244e2d7264b6a35d1dcacf"
4866
+ },
4867
+ {
4868
+ "model_format": "gptq",
4869
+ "model_size_in_billions": "6_7",
4870
+ "quantizations": [
4871
+ "Int4"
4872
+ ],
4873
+ "model_id": "TheBloke/deepseek-coder-6.7B-instruct-GPTQ",
4874
+ "model_revision": "13ccea6e3a43dcfdcb655d92097610018b431a17"
4875
+ },
4876
+ {
4877
+ "model_format": "gptq",
4878
+ "model_size_in_billions": 33,
4879
+ "quantizations": [
4880
+ "Int4"
4881
+ ],
4882
+ "model_id": "TheBloke/deepseek-coder-33B-instruct-GPTQ",
4883
+ "model_revision": "08372729d98dfc248f9531a412fe69e14e607027"
4884
+ },
4885
+ {
4886
+ "model_format": "awq",
4887
+ "model_size_in_billions": "1_3",
4888
+ "quantizations": [
4889
+ "Int4"
4890
+ ],
4891
+ "model_id": "TheBloke/deepseek-coder-1.3b-instruct-AWQ",
4892
+ "model_revision": "a2a484da6e4146d055316a9a63cf5b13955715a4"
4893
+ },
4894
+ {
4895
+ "model_format": "awq",
4896
+ "model_size_in_billions": "6_7",
4897
+ "quantizations": [
4898
+ "Int4"
4899
+ ],
4900
+ "model_id": "TheBloke/deepseek-coder-6.7B-instruct-AWQ",
4901
+ "model_revision": "502ae3e19e57ae78dc30a791ba33c565da72dc62"
4902
+ },
4903
+ {
4904
+ "model_format": "awq",
4905
+ "model_size_in_billions": 33,
4906
+ "quantizations": [
4907
+ "Int4"
4908
+ ],
4909
+ "model_id": "TheBloke/deepseek-coder-33B-instruct-AWQ",
4910
+ "model_revision": "c40b499bac2712cd3c445cf1b05d2c6558ab0d29"
4533
4911
  }
4534
4912
  ],
4535
4913
  "prompt_style": {
@@ -5455,9 +5833,9 @@
5455
5833
  "ar"
5456
5834
  ],
5457
5835
  "model_ability": [
5458
- "generate"
5836
+ "chat"
5459
5837
  ],
5460
- "model_description": "C4AI Command-R is a research release of a 35 billion parameter highly performant generative model.",
5838
+ "model_description": "C4AI Command-R(+) is a research release of a 35 and 104 billion parameter highly performant generative model.",
5461
5839
  "model_specs": [
5462
5840
  {
5463
5841
  "model_format": "pytorch",
@@ -5506,7 +5884,21 @@
5506
5884
  "model_id": "alpindale/c4ai-command-r-plus-GPTQ",
5507
5885
  "model_revision": "35febfc08f723ac0df32480eb4af349a7d08656e"
5508
5886
  }
5509
- ]
5887
+ ],
5888
+ "prompt_style": {
5889
+ "style_name": "c4ai-command-r",
5890
+ "system_prompt": "You are Command-R, a brilliant, sophisticated, AI-assistant trained to assist human users by providing thorough responses. You are trained by Cohere.",
5891
+ "roles": [
5892
+ "<|USER_TOKEN|>",
5893
+ "<|CHATBOT_TOKEN|>"
5894
+ ],
5895
+ "intra_message_sep": "",
5896
+ "inter_message_sep": "<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|>",
5897
+ "stop_token_ids": [
5898
+ 6,
5899
+ 255001
5900
+ ]
5901
+ }
5510
5902
  },
5511
5903
  {
5512
5904
  "version": 1,
@@ -5547,7 +5939,21 @@
5547
5939
  "model_id": "CohereForAI/c4ai-command-r-plus-4bit",
5548
5940
  "model_revision": "bb63b5b7005ecedb30b0cfd0d5953b02a5817f7b"
5549
5941
  }
5550
- ]
5942
+ ],
5943
+ "prompt_style": {
5944
+ "style_name": "c4ai-command-r",
5945
+ "system_prompt": "You are Command-R, a brilliant, sophisticated, AI-assistant trained to assist human users by providing thorough responses. You are trained by Cohere.",
5946
+ "roles": [
5947
+ "<|USER_TOKEN|>",
5948
+ "<|CHATBOT_TOKEN|>"
5949
+ ],
5950
+ "intra_message_sep": "",
5951
+ "inter_message_sep": "<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|>",
5952
+ "stop_token_ids": [
5953
+ 6,
5954
+ 255001
5955
+ ]
5956
+ }
5551
5957
  },
5552
5958
  {
5553
5959
  "version": 1,
@@ -5588,5 +5994,94 @@
5588
5994
  32000
5589
5995
  ]
5590
5996
  }
5591
- }
5997
+ },
5998
+ {
5999
+ "version": 1,
6000
+ "context_length": 32768,
6001
+ "model_name": "internvl-chat",
6002
+ "model_lang": [
6003
+ "en",
6004
+ "zh"
6005
+ ],
6006
+ "model_ability": [
6007
+ "chat",
6008
+ "vision"
6009
+ ],
6010
+ "model_description": "InternVL 1.5 is an open-source multimodal large language model (MLLM) to bridge the capability gap between open-source and proprietary commercial models in multimodal understanding. ",
6011
+ "model_specs": [
6012
+ {
6013
+ "model_format": "pytorch",
6014
+ "model_size_in_billions": 26,
6015
+ "quantizations": [
6016
+ "none"
6017
+ ],
6018
+ "model_id": "OpenGVLab/InternVL-Chat-V1-5",
6019
+ "model_revision": "e822119e5806946ce128043023a73d715ecabf8d"
6020
+ },
6021
+ {
6022
+ "model_format": "pytorch",
6023
+ "model_size_in_billions": 26,
6024
+ "quantizations": [
6025
+ "Int8"
6026
+ ],
6027
+ "model_id": "OpenGVLab/InternVL-Chat-V1-5-{quantization}",
6028
+ "model_revision": "acaaed06937c603ab04f084216ecb0268160f538"
6029
+ }
6030
+ ],
6031
+ "prompt_style": {
6032
+ "style_name": "INTERNLM2",
6033
+ "system_prompt": "You are InternLM (书生·浦语), a helpful, honest, and harmless AI assistant developed by Shanghai AI Laboratory (上海人工智能实验室).",
6034
+ "roles": [
6035
+ "<|im_start|>user",
6036
+ "<|im_start|>assistant"
6037
+ ],
6038
+ "intra_message_sep": "<|im_end|>",
6039
+ "stop_token_ids": [
6040
+ 92542
6041
+ ],
6042
+ "stop": [
6043
+ "<|im_end|>"
6044
+ ]
6045
+ }
6046
+ },
6047
+ {
6048
+ "version": 1,
6049
+ "context_length": 32768,
6050
+ "model_name": "mini-internvl-chat",
6051
+ "model_lang": [
6052
+ "en",
6053
+ "zh"
6054
+ ],
6055
+ "model_ability": [
6056
+ "chat",
6057
+ "vision"
6058
+ ],
6059
+ "model_description": "InternVL 1.5 is an open-source multimodal large language model (MLLM) to bridge the capability gap between open-source and proprietary commercial models in multimodal understanding. ",
6060
+ "model_specs": [
6061
+ {
6062
+ "model_format": "pytorch",
6063
+ "model_size_in_billions": 2,
6064
+ "quantizations": [
6065
+ "none"
6066
+ ],
6067
+ "model_id": "OpenGVLab/Mini-InternVL-Chat-2B-V1-5",
6068
+ "model_revision": "ce3f67acff17281bacbf4b156f402a0580fb9605"
6069
+ }
6070
+ ],
6071
+ "prompt_style": {
6072
+ "style_name": "INTERNLM2",
6073
+ "system_prompt": "You are InternLM (书生·浦语), a helpful, honest, and harmless AI assistant developed by Shanghai AI Laboratory (上海人工智能实验室).",
6074
+ "roles": [
6075
+ "<|im_start|>user",
6076
+ "<|im_start|>assistant"
6077
+ ],
6078
+ "intra_message_sep": "<|im_end|>",
6079
+ "stop_token_ids": [
6080
+ 92542
6081
+ ],
6082
+ "stop": [
6083
+ "<|im_end|>"
6084
+ ]
6085
+ }
6086
+ }
5592
6087
  ]