xinference 0.13.1__py3-none-any.whl → 0.13.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of xinference might be problematic. Click here for more details.
- xinference/__init__.py +0 -1
- xinference/_version.py +3 -3
- xinference/api/restful_api.py +99 -5
- xinference/client/restful/restful_client.py +98 -1
- xinference/core/chat_interface.py +2 -2
- xinference/core/model.py +85 -26
- xinference/core/scheduler.py +4 -4
- xinference/model/audio/chattts.py +40 -8
- xinference/model/audio/core.py +5 -2
- xinference/model/audio/cosyvoice.py +136 -0
- xinference/model/audio/model_spec.json +24 -0
- xinference/model/audio/model_spec_modelscope.json +27 -0
- xinference/model/flexible/launchers/__init__.py +1 -0
- xinference/model/flexible/launchers/image_process_launcher.py +70 -0
- xinference/model/image/core.py +3 -0
- xinference/model/image/model_spec.json +21 -0
- xinference/model/image/stable_diffusion/core.py +49 -7
- xinference/model/llm/llm_family.json +1065 -106
- xinference/model/llm/llm_family.py +26 -6
- xinference/model/llm/llm_family_csghub.json +39 -0
- xinference/model/llm/llm_family_modelscope.json +460 -47
- xinference/model/llm/pytorch/chatglm.py +243 -5
- xinference/model/llm/pytorch/cogvlm2.py +1 -1
- xinference/model/llm/sglang/core.py +7 -2
- xinference/model/llm/utils.py +78 -1
- xinference/model/llm/vllm/core.py +11 -0
- xinference/thirdparty/cosyvoice/__init__.py +0 -0
- xinference/thirdparty/cosyvoice/bin/__init__.py +0 -0
- xinference/thirdparty/cosyvoice/bin/inference.py +114 -0
- xinference/thirdparty/cosyvoice/bin/train.py +136 -0
- xinference/thirdparty/cosyvoice/cli/__init__.py +0 -0
- xinference/thirdparty/cosyvoice/cli/cosyvoice.py +83 -0
- xinference/thirdparty/cosyvoice/cli/frontend.py +168 -0
- xinference/thirdparty/cosyvoice/cli/model.py +60 -0
- xinference/thirdparty/cosyvoice/dataset/__init__.py +0 -0
- xinference/thirdparty/cosyvoice/dataset/dataset.py +160 -0
- xinference/thirdparty/cosyvoice/dataset/processor.py +369 -0
- xinference/thirdparty/cosyvoice/flow/__init__.py +0 -0
- xinference/thirdparty/cosyvoice/flow/decoder.py +222 -0
- xinference/thirdparty/cosyvoice/flow/flow.py +135 -0
- xinference/thirdparty/cosyvoice/flow/flow_matching.py +138 -0
- xinference/thirdparty/cosyvoice/flow/length_regulator.py +49 -0
- xinference/thirdparty/cosyvoice/hifigan/__init__.py +0 -0
- xinference/thirdparty/cosyvoice/hifigan/f0_predictor.py +55 -0
- xinference/thirdparty/cosyvoice/hifigan/generator.py +391 -0
- xinference/thirdparty/cosyvoice/llm/__init__.py +0 -0
- xinference/thirdparty/cosyvoice/llm/llm.py +206 -0
- xinference/thirdparty/cosyvoice/transformer/__init__.py +0 -0
- xinference/thirdparty/cosyvoice/transformer/activation.py +84 -0
- xinference/thirdparty/cosyvoice/transformer/attention.py +326 -0
- xinference/thirdparty/cosyvoice/transformer/convolution.py +145 -0
- xinference/thirdparty/cosyvoice/transformer/decoder.py +396 -0
- xinference/thirdparty/cosyvoice/transformer/decoder_layer.py +132 -0
- xinference/thirdparty/cosyvoice/transformer/embedding.py +293 -0
- xinference/thirdparty/cosyvoice/transformer/encoder.py +472 -0
- xinference/thirdparty/cosyvoice/transformer/encoder_layer.py +236 -0
- xinference/thirdparty/cosyvoice/transformer/label_smoothing_loss.py +96 -0
- xinference/thirdparty/cosyvoice/transformer/positionwise_feed_forward.py +115 -0
- xinference/thirdparty/cosyvoice/transformer/subsampling.py +383 -0
- xinference/thirdparty/cosyvoice/utils/__init__.py +0 -0
- xinference/thirdparty/cosyvoice/utils/class_utils.py +70 -0
- xinference/thirdparty/cosyvoice/utils/common.py +103 -0
- xinference/thirdparty/cosyvoice/utils/executor.py +110 -0
- xinference/thirdparty/cosyvoice/utils/file_utils.py +41 -0
- xinference/thirdparty/cosyvoice/utils/frontend_utils.py +125 -0
- xinference/thirdparty/cosyvoice/utils/mask.py +227 -0
- xinference/thirdparty/cosyvoice/utils/scheduler.py +739 -0
- xinference/thirdparty/cosyvoice/utils/train_utils.py +289 -0
- xinference/web/ui/build/asset-manifest.json +3 -3
- xinference/web/ui/build/index.html +1 -1
- xinference/web/ui/build/static/js/{main.95c1d652.js → main.2ef0cfaf.js} +3 -3
- xinference/web/ui/build/static/js/main.2ef0cfaf.js.map +1 -0
- xinference/web/ui/node_modules/.cache/babel-loader/b6807ecc0c231fea699533518a0eb2a2bf68a081ce00d452be40600dbffa17a7.json +1 -0
- {xinference-0.13.1.dist-info → xinference-0.13.3.dist-info}/METADATA +18 -8
- {xinference-0.13.1.dist-info → xinference-0.13.3.dist-info}/RECORD +80 -36
- xinference/web/ui/build/static/js/main.95c1d652.js.map +0 -1
- xinference/web/ui/node_modules/.cache/babel-loader/709711edada3f1596b309d571285fd31f1c364d66f4425bc28723d0088cc351a.json +0 -1
- /xinference/web/ui/build/static/js/{main.95c1d652.js.LICENSE.txt → main.2ef0cfaf.js.LICENSE.txt} +0 -0
- {xinference-0.13.1.dist-info → xinference-0.13.3.dist-info}/LICENSE +0 -0
- {xinference-0.13.1.dist-info → xinference-0.13.3.dist-info}/WHEEL +0 -0
- {xinference-0.13.1.dist-info → xinference-0.13.3.dist-info}/entry_points.txt +0 -0
- {xinference-0.13.1.dist-info → xinference-0.13.3.dist-info}/top_level.txt +0 -0
|
@@ -819,7 +819,7 @@
|
|
|
819
819
|
"none"
|
|
820
820
|
],
|
|
821
821
|
"model_id": "THUDM/glm-4-9b-chat",
|
|
822
|
-
"model_revision": "
|
|
822
|
+
"model_revision": "76f3474a854145aa4a9ed2612fee9bc8d4a8966b"
|
|
823
823
|
},
|
|
824
824
|
{
|
|
825
825
|
"model_format": "ggufv2",
|
|
@@ -983,6 +983,65 @@
|
|
|
983
983
|
]
|
|
984
984
|
}
|
|
985
985
|
},
|
|
986
|
+
{
|
|
987
|
+
"version": 1,
|
|
988
|
+
"context_length": 131072,
|
|
989
|
+
"model_name": "codegeex4",
|
|
990
|
+
"model_lang": [
|
|
991
|
+
"en",
|
|
992
|
+
"zh"
|
|
993
|
+
],
|
|
994
|
+
"model_ability": [
|
|
995
|
+
"chat"
|
|
996
|
+
],
|
|
997
|
+
"model_description": "the open-source version of the latest CodeGeeX4 model series",
|
|
998
|
+
"model_specs": [
|
|
999
|
+
{
|
|
1000
|
+
"model_format": "pytorch",
|
|
1001
|
+
"model_size_in_billions": 9,
|
|
1002
|
+
"quantizations": [
|
|
1003
|
+
"4-bit",
|
|
1004
|
+
"8-bit",
|
|
1005
|
+
"none"
|
|
1006
|
+
],
|
|
1007
|
+
"model_id": "THUDM/codegeex4-all-9b",
|
|
1008
|
+
"model_revision": "8c4ec1d2f2888412640825a7aa23355939a8f4c6"
|
|
1009
|
+
},
|
|
1010
|
+
{
|
|
1011
|
+
"model_format": "ggufv2",
|
|
1012
|
+
"model_size_in_billions": 9,
|
|
1013
|
+
"quantizations": [
|
|
1014
|
+
"IQ2_M",
|
|
1015
|
+
"IQ3_M",
|
|
1016
|
+
"Q4_K_M",
|
|
1017
|
+
"Q5_K_M",
|
|
1018
|
+
"Q6_K_L",
|
|
1019
|
+
"Q8_0"
|
|
1020
|
+
],
|
|
1021
|
+
"model_file_name_template": "codegeex4-all-9b-{quantization}.gguf",
|
|
1022
|
+
"model_id": "THUDM/codegeex4-all-9b-GGUF",
|
|
1023
|
+
"model_revision": "6a04071c54c943949826d4815ee00717ed8cf153"
|
|
1024
|
+
}
|
|
1025
|
+
],
|
|
1026
|
+
"prompt_style": {
|
|
1027
|
+
"style_name": "CHATGLM3",
|
|
1028
|
+
"system_prompt": "",
|
|
1029
|
+
"roles": [
|
|
1030
|
+
"user",
|
|
1031
|
+
"assistant"
|
|
1032
|
+
],
|
|
1033
|
+
"stop_token_ids": [
|
|
1034
|
+
151329,
|
|
1035
|
+
151336,
|
|
1036
|
+
151338
|
|
1037
|
+
],
|
|
1038
|
+
"stop": [
|
|
1039
|
+
"<|endoftext|>",
|
|
1040
|
+
"<|user|>",
|
|
1041
|
+
"<|observation|>"
|
|
1042
|
+
]
|
|
1043
|
+
}
|
|
1044
|
+
},
|
|
986
1045
|
{
|
|
987
1046
|
"version": 1,
|
|
988
1047
|
"context_length": 2048,
|
|
@@ -1593,6 +1652,329 @@
|
|
|
1593
1652
|
"none"
|
|
1594
1653
|
],
|
|
1595
1654
|
"model_id": "meta-llama/Meta-Llama-3-70B-Instruct"
|
|
1655
|
+
},
|
|
1656
|
+
{
|
|
1657
|
+
"model_format": "mlx",
|
|
1658
|
+
"model_size_in_billions": 8,
|
|
1659
|
+
"quantizations": [
|
|
1660
|
+
"4-bit"
|
|
1661
|
+
],
|
|
1662
|
+
"model_id": "mlx-community/Meta-Llama-3-8B-Instruct-4bit"
|
|
1663
|
+
},
|
|
1664
|
+
{
|
|
1665
|
+
"model_format": "mlx",
|
|
1666
|
+
"model_size_in_billions": 8,
|
|
1667
|
+
"quantizations": [
|
|
1668
|
+
"8-bit"
|
|
1669
|
+
],
|
|
1670
|
+
"model_id": "mlx-community/Meta-Llama-3-8B-Instruct-8bit"
|
|
1671
|
+
},
|
|
1672
|
+
{
|
|
1673
|
+
"model_format": "mlx",
|
|
1674
|
+
"model_size_in_billions": 8,
|
|
1675
|
+
"quantizations": [
|
|
1676
|
+
"none"
|
|
1677
|
+
],
|
|
1678
|
+
"model_id": "mlx-community/Meta-Llama-3-8B-Instruct"
|
|
1679
|
+
},
|
|
1680
|
+
{
|
|
1681
|
+
"model_format": "mlx",
|
|
1682
|
+
"model_size_in_billions": 70,
|
|
1683
|
+
"quantizations": [
|
|
1684
|
+
"4-bit"
|
|
1685
|
+
],
|
|
1686
|
+
"model_id": "mlx-community/Meta-Llama-3-70B-Instruct-4bit-mlx"
|
|
1687
|
+
},
|
|
1688
|
+
{
|
|
1689
|
+
"model_format": "mlx",
|
|
1690
|
+
"model_size_in_billions": 70,
|
|
1691
|
+
"quantizations": [
|
|
1692
|
+
"8-bit"
|
|
1693
|
+
],
|
|
1694
|
+
"model_id": "mlx-community/Meta-Llama-3-70B-Instruct-8bit"
|
|
1695
|
+
},
|
|
1696
|
+
{
|
|
1697
|
+
"model_format": "mlx",
|
|
1698
|
+
"model_size_in_billions": 70,
|
|
1699
|
+
"quantizations": [
|
|
1700
|
+
"none"
|
|
1701
|
+
],
|
|
1702
|
+
"model_id": "mlx-community/Meta-Llama-3-70B-Instruct-mlx-unquantized"
|
|
1703
|
+
},
|
|
1704
|
+
{
|
|
1705
|
+
"model_format": "gptq",
|
|
1706
|
+
"model_size_in_billions": 8,
|
|
1707
|
+
"quantizations": [
|
|
1708
|
+
"Int4"
|
|
1709
|
+
],
|
|
1710
|
+
"model_id": "TechxGenus/Meta-Llama-3-8B-Instruct-GPTQ"
|
|
1711
|
+
},
|
|
1712
|
+
{
|
|
1713
|
+
"model_format": "gptq",
|
|
1714
|
+
"model_size_in_billions": 70,
|
|
1715
|
+
"quantizations": [
|
|
1716
|
+
"Int4"
|
|
1717
|
+
],
|
|
1718
|
+
"model_id": "TechxGenus/Meta-Llama-3-70B-Instruct-GPTQ"
|
|
1719
|
+
}
|
|
1720
|
+
],
|
|
1721
|
+
"prompt_style": {
|
|
1722
|
+
"style_name": "LLAMA3",
|
|
1723
|
+
"system_prompt": "You are a helpful assistant.",
|
|
1724
|
+
"roles": [
|
|
1725
|
+
"user",
|
|
1726
|
+
"assistant"
|
|
1727
|
+
],
|
|
1728
|
+
"intra_message_sep": "\n\n",
|
|
1729
|
+
"inter_message_sep": "<|eot_id|>",
|
|
1730
|
+
"stop_token_ids": [
|
|
1731
|
+
128001,
|
|
1732
|
+
128009
|
|
1733
|
+
],
|
|
1734
|
+
"stop": [
|
|
1735
|
+
"<|end_of_text|>",
|
|
1736
|
+
"<|eot_id|>"
|
|
1737
|
+
]
|
|
1738
|
+
}
|
|
1739
|
+
},
|
|
1740
|
+
{
|
|
1741
|
+
"version": 1,
|
|
1742
|
+
"context_length": 131072,
|
|
1743
|
+
"model_name": "llama-3.1",
|
|
1744
|
+
"model_lang": [
|
|
1745
|
+
"en",
|
|
1746
|
+
"de",
|
|
1747
|
+
"fr",
|
|
1748
|
+
"it",
|
|
1749
|
+
"pt",
|
|
1750
|
+
"hi",
|
|
1751
|
+
"es",
|
|
1752
|
+
"th"
|
|
1753
|
+
],
|
|
1754
|
+
"model_ability": [
|
|
1755
|
+
"generate"
|
|
1756
|
+
],
|
|
1757
|
+
"model_description": "Llama 3.1 is an auto-regressive language model that uses an optimized transformer architecture",
|
|
1758
|
+
"model_specs": [
|
|
1759
|
+
{
|
|
1760
|
+
"model_format": "pytorch",
|
|
1761
|
+
"model_size_in_billions": 8,
|
|
1762
|
+
"quantizations": [
|
|
1763
|
+
"4-bit",
|
|
1764
|
+
"8-bit",
|
|
1765
|
+
"none"
|
|
1766
|
+
],
|
|
1767
|
+
"model_id": "meta-llama/Meta-Llama-3.1-8B"
|
|
1768
|
+
},
|
|
1769
|
+
{
|
|
1770
|
+
"model_format": "ggufv2",
|
|
1771
|
+
"model_size_in_billions": 8,
|
|
1772
|
+
"quantizations": [
|
|
1773
|
+
"Q2_K",
|
|
1774
|
+
"Q3_K_L",
|
|
1775
|
+
"Q3_K_M",
|
|
1776
|
+
"Q3_K_S",
|
|
1777
|
+
"Q4_0",
|
|
1778
|
+
"Q4_1",
|
|
1779
|
+
"Q4_K_M",
|
|
1780
|
+
"Q4_K_S",
|
|
1781
|
+
"Q5_0",
|
|
1782
|
+
"Q5_1",
|
|
1783
|
+
"Q5_K_M",
|
|
1784
|
+
"Q5_K_S",
|
|
1785
|
+
"Q6_K",
|
|
1786
|
+
"Q8_0"
|
|
1787
|
+
],
|
|
1788
|
+
"model_id": "QuantFactory/Meta-Llama-3.1-8B-GGUF",
|
|
1789
|
+
"model_file_name_template": "Meta-Llama-3.1-8B.{quantization}.gguf"
|
|
1790
|
+
},
|
|
1791
|
+
{
|
|
1792
|
+
"model_format": "pytorch",
|
|
1793
|
+
"model_size_in_billions": 70,
|
|
1794
|
+
"quantizations": [
|
|
1795
|
+
"4-bit",
|
|
1796
|
+
"8-bit",
|
|
1797
|
+
"none"
|
|
1798
|
+
],
|
|
1799
|
+
"model_id": "meta-llama/Meta-Llama-3.1-70B"
|
|
1800
|
+
}
|
|
1801
|
+
]
|
|
1802
|
+
},
|
|
1803
|
+
{
|
|
1804
|
+
"version": 1,
|
|
1805
|
+
"context_length": 131072,
|
|
1806
|
+
"model_name": "llama-3.1-instruct",
|
|
1807
|
+
"model_lang": [
|
|
1808
|
+
"en",
|
|
1809
|
+
"de",
|
|
1810
|
+
"fr",
|
|
1811
|
+
"it",
|
|
1812
|
+
"pt",
|
|
1813
|
+
"hi",
|
|
1814
|
+
"es",
|
|
1815
|
+
"th"
|
|
1816
|
+
],
|
|
1817
|
+
"model_ability": [
|
|
1818
|
+
"chat"
|
|
1819
|
+
],
|
|
1820
|
+
"model_description": "The Llama 3.1 instruction tuned models are optimized for dialogue use cases and outperform many of the available open source chat models on common industry benchmarks..",
|
|
1821
|
+
"model_specs": [
|
|
1822
|
+
{
|
|
1823
|
+
"model_format": "ggufv2",
|
|
1824
|
+
"model_size_in_billions": 8,
|
|
1825
|
+
"quantizations": [
|
|
1826
|
+
"Q3_K_L",
|
|
1827
|
+
"IQ4_XS",
|
|
1828
|
+
"Q4_K_M",
|
|
1829
|
+
"Q5_K_M",
|
|
1830
|
+
"Q6_K",
|
|
1831
|
+
"Q8_0"
|
|
1832
|
+
],
|
|
1833
|
+
"model_id": "lmstudio-community/Meta-Llama-3.1-8B-Instruct-GGUF",
|
|
1834
|
+
"model_file_name_template": "Meta-Llama-3.1-8B-Instruct-{quantization}.gguf"
|
|
1835
|
+
},
|
|
1836
|
+
{
|
|
1837
|
+
"model_format": "pytorch",
|
|
1838
|
+
"model_size_in_billions": 8,
|
|
1839
|
+
"quantizations": [
|
|
1840
|
+
"none"
|
|
1841
|
+
],
|
|
1842
|
+
"model_id": "meta-llama/Meta-Llama-3.1-8B-Instruct"
|
|
1843
|
+
},
|
|
1844
|
+
{
|
|
1845
|
+
"model_format": "pytorch",
|
|
1846
|
+
"model_size_in_billions": 8,
|
|
1847
|
+
"quantizations": [
|
|
1848
|
+
"4-bit"
|
|
1849
|
+
],
|
|
1850
|
+
"model_id": "unsloth/Meta-Llama-3.1-8B-Instruct-bnb-4bit"
|
|
1851
|
+
},
|
|
1852
|
+
{
|
|
1853
|
+
"model_format": "gptq",
|
|
1854
|
+
"model_size_in_billions": 8,
|
|
1855
|
+
"quantizations": [
|
|
1856
|
+
"Int4"
|
|
1857
|
+
],
|
|
1858
|
+
"model_id": "hugging-quants/Meta-Llama-3.1-8B-Instruct-GPTQ-INT4"
|
|
1859
|
+
},
|
|
1860
|
+
{
|
|
1861
|
+
"model_format": "awq",
|
|
1862
|
+
"model_size_in_billions": 8,
|
|
1863
|
+
"quantizations": [
|
|
1864
|
+
"Int4"
|
|
1865
|
+
],
|
|
1866
|
+
"model_id": "hugging-quants/Meta-Llama-3.1-8B-Instruct-AWQ-INT4"
|
|
1867
|
+
},
|
|
1868
|
+
{
|
|
1869
|
+
"model_format": "ggufv2",
|
|
1870
|
+
"model_size_in_billions": 70,
|
|
1871
|
+
"quantizations": [
|
|
1872
|
+
"IQ2_M",
|
|
1873
|
+
"IQ4_XS",
|
|
1874
|
+
"Q2_K",
|
|
1875
|
+
"Q3_K_S",
|
|
1876
|
+
"Q4_K_M",
|
|
1877
|
+
"Q5_K_M",
|
|
1878
|
+
"Q6_K",
|
|
1879
|
+
"Q8_0"
|
|
1880
|
+
],
|
|
1881
|
+
"quantization_parts": {
|
|
1882
|
+
"Q5_K_M": [
|
|
1883
|
+
"00001-of-00002",
|
|
1884
|
+
"00002-of-00002"
|
|
1885
|
+
],
|
|
1886
|
+
"Q6_K": [
|
|
1887
|
+
"00001-of-00002",
|
|
1888
|
+
"00002-of-00002"
|
|
1889
|
+
],
|
|
1890
|
+
"Q8_0": [
|
|
1891
|
+
"00001-of-00002",
|
|
1892
|
+
"00002-of-00002"
|
|
1893
|
+
]
|
|
1894
|
+
},
|
|
1895
|
+
"model_id": "lmstudio-community/Meta-Llama-3.1-70B-Instruct-GGUF",
|
|
1896
|
+
"model_file_name_template": "Meta-Llama-3.1-70B-Instruct-{quantization}.gguf",
|
|
1897
|
+
"model_file_name_split_template": "Meta-Llama-3.1-70B-Instruct-{quantization}-{part}.gguf"
|
|
1898
|
+
},
|
|
1899
|
+
{
|
|
1900
|
+
"model_format": "pytorch",
|
|
1901
|
+
"model_size_in_billions": 70,
|
|
1902
|
+
"quantizations": [
|
|
1903
|
+
"none"
|
|
1904
|
+
],
|
|
1905
|
+
"model_id": "meta-llama/Meta-Llama-3.1-70B-Instruct"
|
|
1906
|
+
},
|
|
1907
|
+
{
|
|
1908
|
+
"model_format": "pytorch",
|
|
1909
|
+
"model_size_in_billions": 70,
|
|
1910
|
+
"quantizations": [
|
|
1911
|
+
"4-bit"
|
|
1912
|
+
],
|
|
1913
|
+
"model_id": "unsloth/Meta-Llama-3.1-70B-Instruct-bnb-4bit"
|
|
1914
|
+
},
|
|
1915
|
+
{
|
|
1916
|
+
"model_format": "gptq",
|
|
1917
|
+
"model_size_in_billions": 70,
|
|
1918
|
+
"quantizations": [
|
|
1919
|
+
"Int4"
|
|
1920
|
+
],
|
|
1921
|
+
"model_id": "hugging-quants/Meta-Llama-3.1-70B-Instruct-GPTQ-INT4"
|
|
1922
|
+
},
|
|
1923
|
+
{
|
|
1924
|
+
"model_format": "awq",
|
|
1925
|
+
"model_size_in_billions": 70,
|
|
1926
|
+
"quantizations": [
|
|
1927
|
+
"Int4"
|
|
1928
|
+
],
|
|
1929
|
+
"model_id": "hugging-quants/Meta-Llama-3.1-70B-Instruct-AWQ-INT4"
|
|
1930
|
+
},
|
|
1931
|
+
{
|
|
1932
|
+
"model_format": "mlx",
|
|
1933
|
+
"model_size_in_billions": 8,
|
|
1934
|
+
"quantizations": [
|
|
1935
|
+
"4-bit"
|
|
1936
|
+
],
|
|
1937
|
+
"model_id": "mlx-community/Meta-Llama-3.1-8B-Instruct-4bit"
|
|
1938
|
+
},
|
|
1939
|
+
{
|
|
1940
|
+
"model_format": "mlx",
|
|
1941
|
+
"model_size_in_billions": 8,
|
|
1942
|
+
"quantizations": [
|
|
1943
|
+
"8-bit"
|
|
1944
|
+
],
|
|
1945
|
+
"model_id": "mlx-community/Meta-Llama-3.1-8B-Instruct-8bit"
|
|
1946
|
+
},
|
|
1947
|
+
{
|
|
1948
|
+
"model_format": "mlx",
|
|
1949
|
+
"model_size_in_billions": 8,
|
|
1950
|
+
"quantizations": [
|
|
1951
|
+
"none"
|
|
1952
|
+
],
|
|
1953
|
+
"model_id": "mlx-community/Meta-Llama-3.1-8B-Instruct"
|
|
1954
|
+
},
|
|
1955
|
+
{
|
|
1956
|
+
"model_format": "mlx",
|
|
1957
|
+
"model_size_in_billions": 70,
|
|
1958
|
+
"quantizations": [
|
|
1959
|
+
"4-bit"
|
|
1960
|
+
],
|
|
1961
|
+
"model_id": "mlx-community/Meta-Llama-3.1-70B-Instruct-4bit"
|
|
1962
|
+
},
|
|
1963
|
+
{
|
|
1964
|
+
"model_format": "mlx",
|
|
1965
|
+
"model_size_in_billions": 70,
|
|
1966
|
+
"quantizations": [
|
|
1967
|
+
"8-bit"
|
|
1968
|
+
],
|
|
1969
|
+
"model_id": "mlx-community/Meta-Llama-3.1-70B-Instruct-8bit"
|
|
1970
|
+
},
|
|
1971
|
+
{
|
|
1972
|
+
"model_format": "mlx",
|
|
1973
|
+
"model_size_in_billions": 70,
|
|
1974
|
+
"quantizations": [
|
|
1975
|
+
"none"
|
|
1976
|
+
],
|
|
1977
|
+
"model_id": "mlx-community/Meta-Llama-3.1-70B-Instruct-bf16"
|
|
1596
1978
|
}
|
|
1597
1979
|
],
|
|
1598
1980
|
"prompt_style": {
|
|
@@ -3732,19 +4114,219 @@
|
|
|
3732
4114
|
"Q6_K",
|
|
3733
4115
|
"Q8_0"
|
|
3734
4116
|
],
|
|
3735
|
-
"model_id": "TheBloke/Mistral-7B-Instruct-v0.2-GGUF",
|
|
3736
|
-
"model_file_name_template": "mistral-7b-instruct-v0.2.{quantization}.gguf"
|
|
4117
|
+
"model_id": "TheBloke/Mistral-7B-Instruct-v0.2-GGUF",
|
|
4118
|
+
"model_file_name_template": "mistral-7b-instruct-v0.2.{quantization}.gguf"
|
|
4119
|
+
}
|
|
4120
|
+
],
|
|
4121
|
+
"prompt_style": {
|
|
4122
|
+
"style_name": "LLAMA2",
|
|
4123
|
+
"system_prompt": "[INST] ",
|
|
4124
|
+
"roles": [
|
|
4125
|
+
"[INST]",
|
|
4126
|
+
"[/INST]"
|
|
4127
|
+
],
|
|
4128
|
+
"intra_message_sep": " ",
|
|
4129
|
+
"inter_message_sep": "<s>",
|
|
4130
|
+
"stop_token_ids": [
|
|
4131
|
+
2
|
|
4132
|
+
],
|
|
4133
|
+
"stop": [
|
|
4134
|
+
"</s>"
|
|
4135
|
+
]
|
|
4136
|
+
}
|
|
4137
|
+
},
|
|
4138
|
+
{
|
|
4139
|
+
"version": 1,
|
|
4140
|
+
"context_length": 32768,
|
|
4141
|
+
"model_name": "mistral-instruct-v0.3",
|
|
4142
|
+
"model_lang": [
|
|
4143
|
+
"en"
|
|
4144
|
+
],
|
|
4145
|
+
"model_ability": [
|
|
4146
|
+
"chat"
|
|
4147
|
+
],
|
|
4148
|
+
"model_description": "The Mistral-7B-Instruct-v0.2 Large Language Model (LLM) is an improved instruct fine-tuned version of Mistral-7B-Instruct-v0.1.",
|
|
4149
|
+
"model_specs": [
|
|
4150
|
+
{
|
|
4151
|
+
"model_format": "pytorch",
|
|
4152
|
+
"model_size_in_billions": 7,
|
|
4153
|
+
"quantizations": [
|
|
4154
|
+
"4-bit",
|
|
4155
|
+
"8-bit",
|
|
4156
|
+
"none"
|
|
4157
|
+
],
|
|
4158
|
+
"model_id": "mistralai/Mistral-7B-Instruct-v0.3",
|
|
4159
|
+
"model_revision": "83e9aa141f2e28c82232fea5325f54edf17c43de"
|
|
4160
|
+
},
|
|
4161
|
+
{
|
|
4162
|
+
"model_format": "gptq",
|
|
4163
|
+
"model_size_in_billions": 7,
|
|
4164
|
+
"quantizations": [
|
|
4165
|
+
"Int4"
|
|
4166
|
+
],
|
|
4167
|
+
"model_id": "neuralmagic/Mistral-7B-Instruct-v0.3-GPTQ-4bit"
|
|
4168
|
+
},
|
|
4169
|
+
{
|
|
4170
|
+
"model_format": "awq",
|
|
4171
|
+
"model_size_in_billions": 7,
|
|
4172
|
+
"quantizations": [
|
|
4173
|
+
"Int4"
|
|
4174
|
+
],
|
|
4175
|
+
"model_id": "solidrust/Mistral-7B-Instruct-v0.3-AWQ"
|
|
4176
|
+
},
|
|
4177
|
+
{
|
|
4178
|
+
"model_format": "ggufv2",
|
|
4179
|
+
"model_size_in_billions": 7,
|
|
4180
|
+
"quantizations": [
|
|
4181
|
+
"Q2_K",
|
|
4182
|
+
"Q3_K_S",
|
|
4183
|
+
"Q3_K_M",
|
|
4184
|
+
"Q3_K_L",
|
|
4185
|
+
"Q4_K_S",
|
|
4186
|
+
"Q4_K_M",
|
|
4187
|
+
"Q5_K_S",
|
|
4188
|
+
"Q5_K_M",
|
|
4189
|
+
"Q6_K",
|
|
4190
|
+
"Q8_0",
|
|
4191
|
+
"fp16"
|
|
4192
|
+
],
|
|
4193
|
+
"model_id": "MaziyarPanahi/Mistral-7B-Instruct-v0.3-GGUF",
|
|
4194
|
+
"model_file_name_template": "Mistral-7B-Instruct-v0.3.{quantization}.gguf"
|
|
4195
|
+
}
|
|
4196
|
+
],
|
|
4197
|
+
"prompt_style": {
|
|
4198
|
+
"style_name": "LLAMA2",
|
|
4199
|
+
"system_prompt": "[INST] ",
|
|
4200
|
+
"roles": [
|
|
4201
|
+
"[INST]",
|
|
4202
|
+
"[/INST]"
|
|
4203
|
+
],
|
|
4204
|
+
"intra_message_sep": " ",
|
|
4205
|
+
"inter_message_sep": "<s>",
|
|
4206
|
+
"stop_token_ids": [
|
|
4207
|
+
2
|
|
4208
|
+
],
|
|
4209
|
+
"stop": [
|
|
4210
|
+
"</s>"
|
|
4211
|
+
]
|
|
4212
|
+
}
|
|
4213
|
+
},
|
|
4214
|
+
{
|
|
4215
|
+
"version": 1,
|
|
4216
|
+
"context_length": 1024000,
|
|
4217
|
+
"model_name": "mistral-nemo-instruct",
|
|
4218
|
+
"model_lang": [
|
|
4219
|
+
"en",
|
|
4220
|
+
"fr",
|
|
4221
|
+
"de",
|
|
4222
|
+
"es",
|
|
4223
|
+
"it",
|
|
4224
|
+
"pt",
|
|
4225
|
+
"zh",
|
|
4226
|
+
"ru",
|
|
4227
|
+
"ja"
|
|
4228
|
+
],
|
|
4229
|
+
"model_ability": [
|
|
4230
|
+
"chat"
|
|
4231
|
+
],
|
|
4232
|
+
"model_description": "The Mistral-Nemo-Instruct-2407 Large Language Model (LLM) is an instruct fine-tuned version of the Mistral-Nemo-Base-2407",
|
|
4233
|
+
"model_specs": [
|
|
4234
|
+
{
|
|
4235
|
+
"model_format": "pytorch",
|
|
4236
|
+
"model_size_in_billions": 12,
|
|
4237
|
+
"quantizations": [
|
|
4238
|
+
"none"
|
|
4239
|
+
],
|
|
4240
|
+
"model_id": "mistralai/Mistral-Nemo-Instruct-2407",
|
|
4241
|
+
"model_revision": "05b1e4f3e189ec1b5189fb3c973d4cf3369c27af"
|
|
4242
|
+
},
|
|
4243
|
+
{
|
|
4244
|
+
"model_format": "pytorch",
|
|
4245
|
+
"model_size_in_billions": 12,
|
|
4246
|
+
"quantizations": [
|
|
4247
|
+
"4-bit"
|
|
4248
|
+
],
|
|
4249
|
+
"model_id": "unsloth/Mistral-Nemo-Instruct-2407-bnb-4bit",
|
|
4250
|
+
"model_revision": "1d85adc9e0fff0b8e4479a037bd75fe1346333ca"
|
|
4251
|
+
},
|
|
4252
|
+
{
|
|
4253
|
+
"model_format": "pytorch",
|
|
4254
|
+
"model_size_in_billions": 12,
|
|
4255
|
+
"quantizations": [
|
|
4256
|
+
"8-bit"
|
|
4257
|
+
],
|
|
4258
|
+
"model_id": "afrizalha/Mistral-Nemo-Instruct-2407-bnb-8bit",
|
|
4259
|
+
"model_revision": "1d2dacf18a486c745219317d1507441406bc7e25"
|
|
4260
|
+
},
|
|
4261
|
+
{
|
|
4262
|
+
"model_format": "gptq",
|
|
4263
|
+
"model_size_in_billions": 12,
|
|
4264
|
+
"quantizations": [
|
|
4265
|
+
"Int4"
|
|
4266
|
+
],
|
|
4267
|
+
"model_id": "ModelCloud/Mistral-Nemo-Instruct-2407-gptq-4bit"
|
|
4268
|
+
},
|
|
4269
|
+
{
|
|
4270
|
+
"model_format": "awq",
|
|
4271
|
+
"model_size_in_billions": 12,
|
|
4272
|
+
"quantizations": [
|
|
4273
|
+
"Int4"
|
|
4274
|
+
],
|
|
4275
|
+
"model_id": "casperhansen/mistral-nemo-instruct-2407-awq"
|
|
4276
|
+
},
|
|
4277
|
+
{
|
|
4278
|
+
"model_format": "ggufv2",
|
|
4279
|
+
"model_size_in_billions": 12,
|
|
4280
|
+
"quantizations": [
|
|
4281
|
+
"Q2_K",
|
|
4282
|
+
"Q3_K_S",
|
|
4283
|
+
"Q3_K_M",
|
|
4284
|
+
"Q3_K_L",
|
|
4285
|
+
"Q4_K_S",
|
|
4286
|
+
"Q4_K_M",
|
|
4287
|
+
"Q5_K_S",
|
|
4288
|
+
"Q5_K_M",
|
|
4289
|
+
"Q6_K",
|
|
4290
|
+
"Q8_0",
|
|
4291
|
+
"fp16"
|
|
4292
|
+
],
|
|
4293
|
+
"model_id": "MaziyarPanahi/Mistral-Nemo-Instruct-2407-GGUF",
|
|
4294
|
+
"model_file_name_template": "Mistral-Nemo-Instruct-2407.{quantization}.gguf"
|
|
4295
|
+
},
|
|
4296
|
+
{
|
|
4297
|
+
"model_format": "mlx",
|
|
4298
|
+
"model_size_in_billions": 12,
|
|
4299
|
+
"quantizations": [
|
|
4300
|
+
"none"
|
|
4301
|
+
],
|
|
4302
|
+
"model_id": "mlx-community/Mistral-Nemo-Instruct-2407-bf16"
|
|
4303
|
+
},
|
|
4304
|
+
{
|
|
4305
|
+
"model_format": "mlx",
|
|
4306
|
+
"model_size_in_billions": 12,
|
|
4307
|
+
"quantizations": [
|
|
4308
|
+
"4-bit"
|
|
4309
|
+
],
|
|
4310
|
+
"model_id": "mlx-community/Mistral-Nemo-Instruct-2407-4bit"
|
|
4311
|
+
},
|
|
4312
|
+
{
|
|
4313
|
+
"model_format": "mlx",
|
|
4314
|
+
"model_size_in_billions": 12,
|
|
4315
|
+
"quantizations": [
|
|
4316
|
+
"8-bit"
|
|
4317
|
+
],
|
|
4318
|
+
"model_id": "mlx-community/Mistral-Nemo-Instruct-2407-8bit"
|
|
3737
4319
|
}
|
|
3738
4320
|
],
|
|
3739
4321
|
"prompt_style": {
|
|
3740
|
-
"style_name": "
|
|
3741
|
-
"system_prompt": "
|
|
4322
|
+
"style_name": "mistral-nemo",
|
|
4323
|
+
"system_prompt": "",
|
|
3742
4324
|
"roles": [
|
|
3743
4325
|
"[INST]",
|
|
3744
4326
|
"[/INST]"
|
|
3745
4327
|
],
|
|
3746
|
-
"intra_message_sep": "
|
|
3747
|
-
"inter_message_sep": "
|
|
4328
|
+
"intra_message_sep": "",
|
|
4329
|
+
"inter_message_sep": "</s>",
|
|
3748
4330
|
"stop_token_ids": [
|
|
3749
4331
|
2
|
|
3750
4332
|
],
|
|
@@ -3755,72 +4337,153 @@
|
|
|
3755
4337
|
},
|
|
3756
4338
|
{
|
|
3757
4339
|
"version": 1,
|
|
3758
|
-
"context_length":
|
|
3759
|
-
"model_name": "mistral-instruct
|
|
4340
|
+
"context_length": 131072,
|
|
4341
|
+
"model_name": "mistral-large-instruct",
|
|
3760
4342
|
"model_lang": [
|
|
3761
|
-
"en"
|
|
4343
|
+
"en",
|
|
4344
|
+
"fr",
|
|
4345
|
+
"de",
|
|
4346
|
+
"es",
|
|
4347
|
+
"it",
|
|
4348
|
+
"pt",
|
|
4349
|
+
"zh",
|
|
4350
|
+
"ru",
|
|
4351
|
+
"ja",
|
|
4352
|
+
"ko"
|
|
3762
4353
|
],
|
|
3763
4354
|
"model_ability": [
|
|
3764
4355
|
"chat"
|
|
3765
4356
|
],
|
|
3766
|
-
"model_description": "
|
|
4357
|
+
"model_description": "Mistral-Large-Instruct-2407 is an advanced dense Large Language Model (LLM) of 123B parameters with state-of-the-art reasoning, knowledge and coding capabilities.",
|
|
3767
4358
|
"model_specs": [
|
|
3768
4359
|
{
|
|
3769
4360
|
"model_format": "pytorch",
|
|
3770
|
-
"model_size_in_billions":
|
|
4361
|
+
"model_size_in_billions": 123,
|
|
3771
4362
|
"quantizations": [
|
|
3772
|
-
"4-bit",
|
|
3773
|
-
"8-bit",
|
|
3774
4363
|
"none"
|
|
3775
4364
|
],
|
|
3776
|
-
"model_id": "mistralai/Mistral-
|
|
3777
|
-
|
|
4365
|
+
"model_id": "mistralai/Mistral-Large-Instruct-2407"
|
|
4366
|
+
},
|
|
4367
|
+
{
|
|
4368
|
+
"model_format": "pytorch",
|
|
4369
|
+
"model_size_in_billions": 123,
|
|
4370
|
+
"quantizations": [
|
|
4371
|
+
"4-bit"
|
|
4372
|
+
],
|
|
4373
|
+
"model_id": "unsloth/Mistral-Large-Instruct-2407-bnb-4bit"
|
|
3778
4374
|
},
|
|
3779
4375
|
{
|
|
3780
4376
|
"model_format": "gptq",
|
|
3781
|
-
"model_size_in_billions":
|
|
4377
|
+
"model_size_in_billions": 123,
|
|
3782
4378
|
"quantizations": [
|
|
3783
4379
|
"Int4"
|
|
3784
4380
|
],
|
|
3785
|
-
"model_id": "
|
|
4381
|
+
"model_id": "ModelCloud/Mistral-Large-Instruct-2407-gptq-4bit"
|
|
3786
4382
|
},
|
|
3787
4383
|
{
|
|
3788
4384
|
"model_format": "awq",
|
|
3789
|
-
"model_size_in_billions":
|
|
4385
|
+
"model_size_in_billions": 123,
|
|
3790
4386
|
"quantizations": [
|
|
3791
4387
|
"Int4"
|
|
3792
4388
|
],
|
|
3793
|
-
"model_id": "
|
|
4389
|
+
"model_id": "TechxGenus/Mistral-Large-Instruct-2407-AWQ"
|
|
3794
4390
|
},
|
|
3795
4391
|
{
|
|
3796
4392
|
"model_format": "ggufv2",
|
|
3797
|
-
"model_size_in_billions":
|
|
4393
|
+
"model_size_in_billions": 123,
|
|
3798
4394
|
"quantizations": [
|
|
3799
4395
|
"Q2_K",
|
|
3800
4396
|
"Q3_K_S",
|
|
3801
4397
|
"Q3_K_M",
|
|
3802
4398
|
"Q3_K_L",
|
|
3803
4399
|
"Q4_K_S",
|
|
3804
|
-
"Q4_K_M"
|
|
3805
|
-
"Q5_K_S",
|
|
3806
|
-
"Q5_K_M",
|
|
3807
|
-
"Q6_K",
|
|
3808
|
-
"Q8_0",
|
|
3809
|
-
"fp16"
|
|
4400
|
+
"Q4_K_M"
|
|
3810
4401
|
],
|
|
3811
|
-
"model_id": "MaziyarPanahi/Mistral-
|
|
3812
|
-
"model_file_name_template": "Mistral-
|
|
4402
|
+
"model_id": "MaziyarPanahi/Mistral-Large-Instruct-2407-GGUF",
|
|
4403
|
+
"model_file_name_template": "Mistral-Large-Instruct-2407.{quantization}.gguf",
|
|
4404
|
+
"model_file_name_split_template": "Mixtral-8x22B-Instruct-v0.1.{quantization}-{part}.gguf",
|
|
4405
|
+
"quantization_parts": {
|
|
4406
|
+
"Q3_K_L": [
|
|
4407
|
+
"00001-of-00007",
|
|
4408
|
+
"00002-of-00007",
|
|
4409
|
+
"00003-of-00007",
|
|
4410
|
+
"00004-of-00007",
|
|
4411
|
+
"00005-of-00007",
|
|
4412
|
+
"00006-of-00007",
|
|
4413
|
+
"00007-of-00007"
|
|
4414
|
+
],
|
|
4415
|
+
"Q3_K_M": [
|
|
4416
|
+
"00001-of-00007",
|
|
4417
|
+
"00002-of-00007",
|
|
4418
|
+
"00003-of-00007",
|
|
4419
|
+
"00004-of-00007",
|
|
4420
|
+
"00005-of-00007",
|
|
4421
|
+
"00006-of-00007",
|
|
4422
|
+
"00007-of-00007"
|
|
4423
|
+
],
|
|
4424
|
+
"Q3_K_S": [
|
|
4425
|
+
"00001-of-00007",
|
|
4426
|
+
"00002-of-00007",
|
|
4427
|
+
"00003-of-00007",
|
|
4428
|
+
"00004-of-00007",
|
|
4429
|
+
"00005-of-00007",
|
|
4430
|
+
"00006-of-00007",
|
|
4431
|
+
"00007-of-00007"
|
|
4432
|
+
],
|
|
4433
|
+
"Q4_K_M": [
|
|
4434
|
+
"00001-of-00007",
|
|
4435
|
+
"00002-of-00007",
|
|
4436
|
+
"00003-of-00007",
|
|
4437
|
+
"00004-of-00007",
|
|
4438
|
+
"00005-of-00007",
|
|
4439
|
+
"00006-of-00007",
|
|
4440
|
+
"00007-of-00007"
|
|
4441
|
+
],
|
|
4442
|
+
"Q4_K_S": [
|
|
4443
|
+
"00001-of-00007",
|
|
4444
|
+
"00002-of-00007",
|
|
4445
|
+
"00003-of-00007",
|
|
4446
|
+
"00004-of-00007",
|
|
4447
|
+
"00005-of-00007",
|
|
4448
|
+
"00006-of-00007",
|
|
4449
|
+
"00007-of-00007"
|
|
4450
|
+
]
|
|
4451
|
+
}
|
|
4452
|
+
},
|
|
4453
|
+
{
|
|
4454
|
+
"model_format": "mlx",
|
|
4455
|
+
"model_size_in_billions": 123,
|
|
4456
|
+
"quantizations": [
|
|
4457
|
+
"none"
|
|
4458
|
+
],
|
|
4459
|
+
"model_id": "mlx-community/Mistral-Large-Instruct-2407-bf16"
|
|
4460
|
+
},
|
|
4461
|
+
{
|
|
4462
|
+
"model_format": "mlx",
|
|
4463
|
+
"model_size_in_billions": 123,
|
|
4464
|
+
"quantizations": [
|
|
4465
|
+
"4-bit"
|
|
4466
|
+
],
|
|
4467
|
+
"model_id": "mlx-community/Mistral-Large-Instruct-2407-4bit"
|
|
4468
|
+
},
|
|
4469
|
+
{
|
|
4470
|
+
"model_format": "mlx",
|
|
4471
|
+
"model_size_in_billions": 123,
|
|
4472
|
+
"quantizations": [
|
|
4473
|
+
"8-bit"
|
|
4474
|
+
],
|
|
4475
|
+
"model_id": "mlx-community/Mistral-Large-Instruct-2407-8bit"
|
|
3813
4476
|
}
|
|
3814
4477
|
],
|
|
3815
4478
|
"prompt_style": {
|
|
3816
|
-
"style_name": "
|
|
3817
|
-
"system_prompt": "
|
|
4479
|
+
"style_name": "mistral-nemo",
|
|
4480
|
+
"system_prompt": "",
|
|
3818
4481
|
"roles": [
|
|
3819
4482
|
"[INST]",
|
|
3820
4483
|
"[/INST]"
|
|
3821
4484
|
],
|
|
3822
|
-
"intra_message_sep": "
|
|
3823
|
-
"inter_message_sep": "
|
|
4485
|
+
"intra_message_sep": "",
|
|
4486
|
+
"inter_message_sep": "</s>",
|
|
3824
4487
|
"stop_token_ids": [
|
|
3825
4488
|
2
|
|
3826
4489
|
],
|
|
@@ -3869,6 +4532,24 @@
|
|
|
3869
4532
|
],
|
|
3870
4533
|
"model_id": "bartowski/Codestral-22B-v0.1-GGUF",
|
|
3871
4534
|
"model_file_name_template": "Codestral-22B-v0.1-{quantization}.gguf"
|
|
4535
|
+
},
|
|
4536
|
+
{
|
|
4537
|
+
"model_format": "mlx",
|
|
4538
|
+
"model_size_in_billions": 22,
|
|
4539
|
+
"quantizations": [
|
|
4540
|
+
"4-bit"
|
|
4541
|
+
],
|
|
4542
|
+
"model_id": "mlx-community/Codestral-22B-v0.1-4bit",
|
|
4543
|
+
"model_revision": "544626b38eb1c9524f0fa570ec7b29550c26b78d"
|
|
4544
|
+
},
|
|
4545
|
+
{
|
|
4546
|
+
"model_format": "mlx",
|
|
4547
|
+
"model_size_in_billions": 22,
|
|
4548
|
+
"quantizations": [
|
|
4549
|
+
"8-bit"
|
|
4550
|
+
],
|
|
4551
|
+
"model_id": "mlx-community/Codestral-22B-v0.1-8bit",
|
|
4552
|
+
"model_revision": "0399a53970663950d57010e61a2796af524a1588"
|
|
3872
4553
|
}
|
|
3873
4554
|
]
|
|
3874
4555
|
},
|
|
@@ -4609,6 +5290,61 @@
|
|
|
4609
5290
|
"model_id": "modelscope/Yi-1.5-34B-Chat-AWQ",
|
|
4610
5291
|
"model_revision": "26234fea6ac49d456f32f8017289021fb1087a04"
|
|
4611
5292
|
}
|
|
5293
|
+
,
|
|
5294
|
+
{
|
|
5295
|
+
"model_format": "mlx",
|
|
5296
|
+
"model_size_in_billions": 6,
|
|
5297
|
+
"quantizations": [
|
|
5298
|
+
"4-bit"
|
|
5299
|
+
],
|
|
5300
|
+
"model_id": "mlx-community/Yi-1.5-6B-Chat-4bit",
|
|
5301
|
+
"model_revision": "0177c9a12b869d6bc73f772b5a1981a7c966adb6"
|
|
5302
|
+
},
|
|
5303
|
+
{
|
|
5304
|
+
"model_format": "mlx",
|
|
5305
|
+
"model_size_in_billions": 6,
|
|
5306
|
+
"quantizations": [
|
|
5307
|
+
"8-bit"
|
|
5308
|
+
],
|
|
5309
|
+
"model_id": "mlx-community/Yi-1.5-6B-Chat-8bit",
|
|
5310
|
+
"model_revision": "7756e65d1bf1e2e6e97aef6bc9484307225f536b"
|
|
5311
|
+
},
|
|
5312
|
+
{
|
|
5313
|
+
"model_format": "mlx",
|
|
5314
|
+
"model_size_in_billions": 9,
|
|
5315
|
+
"quantizations": [
|
|
5316
|
+
"4-bit"
|
|
5317
|
+
],
|
|
5318
|
+
"model_id": "mlx-community/Yi-1.5-9B-Chat-4bit",
|
|
5319
|
+
"model_revision": "e15f886479c44e7d90f0ac13ace69b2319b71c2f"
|
|
5320
|
+
},
|
|
5321
|
+
{
|
|
5322
|
+
"model_format": "mlx",
|
|
5323
|
+
"model_size_in_billions": 9,
|
|
5324
|
+
"quantizations": [
|
|
5325
|
+
"8-bit"
|
|
5326
|
+
],
|
|
5327
|
+
"model_id": "mlx-community/Yi-1.5-9B-Chat-8bit",
|
|
5328
|
+
"model_revision": "c1f742fcf3683edbe2d2c2fd1ad7ac2bb6c5ca36"
|
|
5329
|
+
},
|
|
5330
|
+
{
|
|
5331
|
+
"model_format": "mlx",
|
|
5332
|
+
"model_size_in_billions": 34,
|
|
5333
|
+
"quantizations": [
|
|
5334
|
+
"4-bit"
|
|
5335
|
+
],
|
|
5336
|
+
"model_id": "mlx-community/Yi-1.5-34B-Chat-4bit",
|
|
5337
|
+
"model_revision": "945e3b306ef37c46ab444fdc857d1f3ea7247374"
|
|
5338
|
+
},
|
|
5339
|
+
{
|
|
5340
|
+
"model_format": "mlx",
|
|
5341
|
+
"model_size_in_billions": 34,
|
|
5342
|
+
"quantizations": [
|
|
5343
|
+
"8-bit"
|
|
5344
|
+
],
|
|
5345
|
+
"model_id": "mlx-community/Yi-1.5-34B-Chat-8bit",
|
|
5346
|
+
"model_revision": "3c12761a2c6663f216caab6dff84b0dd29b472ac"
|
|
5347
|
+
}
|
|
4612
5348
|
],
|
|
4613
5349
|
"prompt_style": {
|
|
4614
5350
|
"style_name": "CHATML",
|
|
@@ -5766,33 +6502,168 @@
|
|
|
5766
6502
|
},
|
|
5767
6503
|
{
|
|
5768
6504
|
"version": 1,
|
|
5769
|
-
"context_length": 4096,
|
|
5770
|
-
"model_name": "Skywork-Math",
|
|
6505
|
+
"context_length": 4096,
|
|
6506
|
+
"model_name": "Skywork-Math",
|
|
6507
|
+
"model_lang": [
|
|
6508
|
+
"en",
|
|
6509
|
+
"zh"
|
|
6510
|
+
],
|
|
6511
|
+
"model_ability": [
|
|
6512
|
+
"generate"
|
|
6513
|
+
],
|
|
6514
|
+
"model_description": "Skywork is a series of large models developed by the Kunlun Group · Skywork team.",
|
|
6515
|
+
"model_specs": [
|
|
6516
|
+
{
|
|
6517
|
+
"model_format": "pytorch",
|
|
6518
|
+
"model_size_in_billions": 13,
|
|
6519
|
+
"quantizations": [
|
|
6520
|
+
"8-bit",
|
|
6521
|
+
"none"
|
|
6522
|
+
],
|
|
6523
|
+
"model_id": "skywork/Skywork-13B-Math",
|
|
6524
|
+
"model_revision": "70d1740208c8ba39f9ba250b22117ec25311ab33"
|
|
6525
|
+
}
|
|
6526
|
+
]
|
|
6527
|
+
},
|
|
6528
|
+
{
|
|
6529
|
+
"version": 1,
|
|
6530
|
+
"context_length": 32768,
|
|
6531
|
+
"model_name": "internlm2-chat",
|
|
6532
|
+
"model_lang": [
|
|
6533
|
+
"en",
|
|
6534
|
+
"zh"
|
|
6535
|
+
],
|
|
6536
|
+
"model_ability": [
|
|
6537
|
+
"chat"
|
|
6538
|
+
],
|
|
6539
|
+
"model_description": "The second generation of the InternLM model, InternLM2.",
|
|
6540
|
+
"model_specs": [
|
|
6541
|
+
{
|
|
6542
|
+
"model_format": "pytorch",
|
|
6543
|
+
"model_size_in_billions": 7,
|
|
6544
|
+
"quantizations": [
|
|
6545
|
+
"none"
|
|
6546
|
+
],
|
|
6547
|
+
"model_id": "internlm/internlm2-chat-7b",
|
|
6548
|
+
"model_revision": "2292b86b21cb856642782cebed0a453997453b1f"
|
|
6549
|
+
},
|
|
6550
|
+
{
|
|
6551
|
+
"model_format": "pytorch",
|
|
6552
|
+
"model_size_in_billions": 20,
|
|
6553
|
+
"quantizations": [
|
|
6554
|
+
"none"
|
|
6555
|
+
],
|
|
6556
|
+
"model_id": "internlm/internlm2-chat-20b",
|
|
6557
|
+
"model_revision": "b666125047cd98c5a7c85ca28720b44a06aed124"
|
|
6558
|
+
}
|
|
6559
|
+
],
|
|
6560
|
+
"prompt_style": {
|
|
6561
|
+
"style_name": "INTERNLM2",
|
|
6562
|
+
"system_prompt": "You are InternLM (书生·浦语), a helpful, honest, and harmless AI assistant developed by Shanghai AI Laboratory (上海人工智能实验室).",
|
|
6563
|
+
"roles": [
|
|
6564
|
+
"<|im_start|>user",
|
|
6565
|
+
"<|im_start|>assistant"
|
|
6566
|
+
],
|
|
6567
|
+
"intra_message_sep": "<|im_end|>",
|
|
6568
|
+
"stop_token_ids": [
|
|
6569
|
+
2,
|
|
6570
|
+
92542
|
|
6571
|
+
],
|
|
6572
|
+
"stop": [
|
|
6573
|
+
"</s>",
|
|
6574
|
+
"<|im_end|>"
|
|
6575
|
+
]
|
|
6576
|
+
}
|
|
6577
|
+
},
|
|
6578
|
+
{
|
|
6579
|
+
"version": 1,
|
|
6580
|
+
"context_length": 32768,
|
|
6581
|
+
"model_name": "internlm2.5-chat",
|
|
5771
6582
|
"model_lang": [
|
|
5772
6583
|
"en",
|
|
5773
6584
|
"zh"
|
|
5774
6585
|
],
|
|
5775
6586
|
"model_ability": [
|
|
5776
|
-
"
|
|
6587
|
+
"chat"
|
|
5777
6588
|
],
|
|
5778
|
-
"model_description": "
|
|
6589
|
+
"model_description": "InternLM2.5 series of the InternLM model.",
|
|
5779
6590
|
"model_specs": [
|
|
5780
6591
|
{
|
|
5781
6592
|
"model_format": "pytorch",
|
|
5782
|
-
"model_size_in_billions":
|
|
6593
|
+
"model_size_in_billions": 7,
|
|
5783
6594
|
"quantizations": [
|
|
5784
|
-
"8-bit",
|
|
5785
6595
|
"none"
|
|
5786
6596
|
],
|
|
5787
|
-
"model_id": "
|
|
5788
|
-
"model_revision": "
|
|
6597
|
+
"model_id": "internlm/internlm2_5-7b-chat",
|
|
6598
|
+
"model_revision": "9dc8536a922ab4954726aad1b37fa199004a291a"
|
|
6599
|
+
},
|
|
6600
|
+
{
|
|
6601
|
+
"model_format": "gptq",
|
|
6602
|
+
"model_size_in_billions": 7,
|
|
6603
|
+
"quantizations": [
|
|
6604
|
+
"Int4"
|
|
6605
|
+
],
|
|
6606
|
+
"model_id": "ModelCloud/internlm-2.5-7b-chat-gptq-4bit",
|
|
6607
|
+
"model_revision": "2e2dda735c326544921a4035bbeb6c6e316a8254"
|
|
6608
|
+
},
|
|
6609
|
+
{
|
|
6610
|
+
"model_format": "ggufv2",
|
|
6611
|
+
"model_size_in_billions": 7,
|
|
6612
|
+
"quantizations": [
|
|
6613
|
+
"q2_k",
|
|
6614
|
+
"q3_k_m",
|
|
6615
|
+
"q4_0",
|
|
6616
|
+
"q4_k_m",
|
|
6617
|
+
"q5_0",
|
|
6618
|
+
"q5_k_m",
|
|
6619
|
+
"q6_k",
|
|
6620
|
+
"q8_0",
|
|
6621
|
+
"fp16"
|
|
6622
|
+
],
|
|
6623
|
+
"model_id": "internlm/internlm2_5-7b-chat-gguf",
|
|
6624
|
+
"model_file_name_template": "internlm2_5-7b-chat-{quantization}.gguf"
|
|
6625
|
+
},
|
|
6626
|
+
{
|
|
6627
|
+
"model_format": "mlx",
|
|
6628
|
+
"model_size_in_billions": 7,
|
|
6629
|
+
"quantizations": [
|
|
6630
|
+
"4-bit"
|
|
6631
|
+
],
|
|
6632
|
+
"model_id": "mlx-community/internlm2_5-7b-chat-4bit",
|
|
6633
|
+
"model_revision": "d12097a867721978142a6048399f470a3d18beee"
|
|
6634
|
+
},
|
|
6635
|
+
{
|
|
6636
|
+
"model_format": "mlx",
|
|
6637
|
+
"model_size_in_billions": 7,
|
|
6638
|
+
"quantizations": [
|
|
6639
|
+
"8-bit"
|
|
6640
|
+
],
|
|
6641
|
+
"model_id": "mlx-community/internlm2_5-7b-chat-8bit",
|
|
6642
|
+
"model_revision": "0ec94d61d30ab161b49c69f9bf92ec2b9986d234"
|
|
5789
6643
|
}
|
|
5790
|
-
]
|
|
6644
|
+
],
|
|
6645
|
+
"prompt_style": {
|
|
6646
|
+
"style_name": "INTERNLM2",
|
|
6647
|
+
"system_prompt": "You are InternLM (书生·浦语), a helpful, honest, and harmless AI assistant developed by Shanghai AI Laboratory (上海人工智能实验室).",
|
|
6648
|
+
"roles": [
|
|
6649
|
+
"<|im_start|>user",
|
|
6650
|
+
"<|im_start|>assistant"
|
|
6651
|
+
],
|
|
6652
|
+
"intra_message_sep": "<|im_end|>",
|
|
6653
|
+
"stop_token_ids": [
|
|
6654
|
+
2,
|
|
6655
|
+
92542
|
|
6656
|
+
],
|
|
6657
|
+
"stop": [
|
|
6658
|
+
"</s>",
|
|
6659
|
+
"<|im_end|>"
|
|
6660
|
+
]
|
|
6661
|
+
}
|
|
5791
6662
|
},
|
|
5792
6663
|
{
|
|
5793
6664
|
"version": 1,
|
|
5794
|
-
"context_length":
|
|
5795
|
-
"model_name": "internlm2-chat",
|
|
6665
|
+
"context_length": 262144,
|
|
6666
|
+
"model_name": "internlm2.5-chat-1m",
|
|
5796
6667
|
"model_lang": [
|
|
5797
6668
|
"en",
|
|
5798
6669
|
"zh"
|
|
@@ -5800,7 +6671,7 @@
|
|
|
5800
6671
|
"model_ability": [
|
|
5801
6672
|
"chat"
|
|
5802
6673
|
],
|
|
5803
|
-
"model_description": "
|
|
6674
|
+
"model_description": "InternLM2.5 series of the InternLM model supports 1M long-context",
|
|
5804
6675
|
"model_specs": [
|
|
5805
6676
|
{
|
|
5806
6677
|
"model_format": "pytorch",
|
|
@@ -5808,17 +6679,34 @@
|
|
|
5808
6679
|
"quantizations": [
|
|
5809
6680
|
"none"
|
|
5810
6681
|
],
|
|
5811
|
-
"model_id": "internlm/
|
|
5812
|
-
"model_revision": "
|
|
6682
|
+
"model_id": "internlm/internlm2_5-7b-chat-1m",
|
|
6683
|
+
"model_revision": "8d1a709a04d71440ef3df6ebbe204672f411c8b6"
|
|
5813
6684
|
},
|
|
5814
6685
|
{
|
|
5815
|
-
"model_format": "
|
|
5816
|
-
"model_size_in_billions":
|
|
6686
|
+
"model_format": "gptq",
|
|
6687
|
+
"model_size_in_billions": 7,
|
|
5817
6688
|
"quantizations": [
|
|
5818
|
-
"
|
|
6689
|
+
"Int4"
|
|
5819
6690
|
],
|
|
5820
|
-
"model_id": "internlm
|
|
5821
|
-
"model_revision": "
|
|
6691
|
+
"model_id": "ModelCloud/internlm-2.5-7b-chat-1m-gptq-4bit",
|
|
6692
|
+
"model_revision": "022e59cb30f03b271d56178478acb038b2b9b58c"
|
|
6693
|
+
},
|
|
6694
|
+
{
|
|
6695
|
+
"model_format": "ggufv2",
|
|
6696
|
+
"model_size_in_billions": 7,
|
|
6697
|
+
"quantizations": [
|
|
6698
|
+
"q2_k",
|
|
6699
|
+
"q3_k_m",
|
|
6700
|
+
"q4_0",
|
|
6701
|
+
"q4_k_m",
|
|
6702
|
+
"q5_0",
|
|
6703
|
+
"q5_k_m",
|
|
6704
|
+
"q6_k",
|
|
6705
|
+
"q8_0",
|
|
6706
|
+
"fp16"
|
|
6707
|
+
],
|
|
6708
|
+
"model_id": "internlm/internlm2_5-7b-chat-1m-gguf",
|
|
6709
|
+
"model_file_name_template": "internlm2_5-7b-chat-1m-{quantization}.gguf"
|
|
5822
6710
|
}
|
|
5823
6711
|
],
|
|
5824
6712
|
"prompt_style": {
|
|
@@ -6192,6 +7080,52 @@
|
|
|
6192
7080
|
],
|
|
6193
7081
|
"model_id": "google/gemma-2-27b-it"
|
|
6194
7082
|
},
|
|
7083
|
+
{
|
|
7084
|
+
"model_format": "ggufv2",
|
|
7085
|
+
"model_size_in_billions": 9,
|
|
7086
|
+
"quantizations": [
|
|
7087
|
+
"Q2_K",
|
|
7088
|
+
"Q2_K_L",
|
|
7089
|
+
"Q3_K_L",
|
|
7090
|
+
"Q3_K_M",
|
|
7091
|
+
"Q3_K_S",
|
|
7092
|
+
"Q4_K_L",
|
|
7093
|
+
"Q4_K_M",
|
|
7094
|
+
"Q4_K_S",
|
|
7095
|
+
"Q5_K_L",
|
|
7096
|
+
"Q5_K_M",
|
|
7097
|
+
"Q5_K_S",
|
|
7098
|
+
"Q6_K",
|
|
7099
|
+
"Q6_K_L",
|
|
7100
|
+
"Q8_0",
|
|
7101
|
+
"f32"
|
|
7102
|
+
],
|
|
7103
|
+
"model_id": "bartowski/gemma-2-9b-it-GGUF",
|
|
7104
|
+
"model_file_name_template": "gemma-2-9b-it-{quantization}.gguf"
|
|
7105
|
+
},
|
|
7106
|
+
{
|
|
7107
|
+
"model_format": "ggufv2",
|
|
7108
|
+
"model_size_in_billions": 27,
|
|
7109
|
+
"quantizations": [
|
|
7110
|
+
"Q2_K",
|
|
7111
|
+
"Q2_K_L",
|
|
7112
|
+
"Q3_K_L",
|
|
7113
|
+
"Q3_K_M",
|
|
7114
|
+
"Q3_K_S",
|
|
7115
|
+
"Q4_K_L",
|
|
7116
|
+
"Q4_K_M",
|
|
7117
|
+
"Q4_K_S",
|
|
7118
|
+
"Q5_K_L",
|
|
7119
|
+
"Q5_K_M",
|
|
7120
|
+
"Q5_K_S",
|
|
7121
|
+
"Q6_K",
|
|
7122
|
+
"Q6_K_L",
|
|
7123
|
+
"Q8_0",
|
|
7124
|
+
"f32"
|
|
7125
|
+
],
|
|
7126
|
+
"model_id": "bartowski/gemma-2-27b-it-GGUF",
|
|
7127
|
+
"model_file_name_template": "gemma-2-27b-it-{quantization}.gguf"
|
|
7128
|
+
},
|
|
6195
7129
|
{
|
|
6196
7130
|
"model_format": "mlx",
|
|
6197
7131
|
"model_size_in_billions": 9,
|
|
@@ -6809,6 +7743,15 @@
|
|
|
6809
7743
|
"model_id": "CohereForAI/c4ai-command-r-v01",
|
|
6810
7744
|
"model_revision": "16881ccde1c68bbc7041280e6a66637bc46bfe88"
|
|
6811
7745
|
},
|
|
7746
|
+
{
|
|
7747
|
+
"model_format": "pytorch",
|
|
7748
|
+
"model_size_in_billions": 35,
|
|
7749
|
+
"quantizations": [
|
|
7750
|
+
"4-bit"
|
|
7751
|
+
],
|
|
7752
|
+
"model_id": "CohereForAI/c4ai-command-r-v01-4bit",
|
|
7753
|
+
"model_revision": "f2e87936a146643c9dd143422dcafb9cb1552611"
|
|
7754
|
+
},
|
|
6812
7755
|
{
|
|
6813
7756
|
"model_format": "ggufv2",
|
|
6814
7757
|
"model_size_in_billions": 35,
|
|
@@ -6838,69 +7781,23 @@
|
|
|
6838
7781
|
"model_id": "CohereForAI/c4ai-command-r-plus",
|
|
6839
7782
|
"model_revision": "ba7f1d954c9d1609013677d87e4142ab95c34e62"
|
|
6840
7783
|
},
|
|
6841
|
-
{
|
|
6842
|
-
"model_format": "gptq",
|
|
6843
|
-
"model_size_in_billions": 104,
|
|
6844
|
-
"quantizations": [
|
|
6845
|
-
"Int4"
|
|
6846
|
-
],
|
|
6847
|
-
"model_id": "alpindale/c4ai-command-r-plus-GPTQ",
|
|
6848
|
-
"model_revision": "35febfc08f723ac0df32480eb4af349a7d08656e"
|
|
6849
|
-
}
|
|
6850
|
-
],
|
|
6851
|
-
"prompt_style": {
|
|
6852
|
-
"style_name": "c4ai-command-r",
|
|
6853
|
-
"system_prompt": "You are Command-R, a brilliant, sophisticated, AI-assistant trained to assist human users by providing thorough responses. You are trained by Cohere.",
|
|
6854
|
-
"roles": [
|
|
6855
|
-
"<|USER_TOKEN|>",
|
|
6856
|
-
"<|CHATBOT_TOKEN|>"
|
|
6857
|
-
],
|
|
6858
|
-
"intra_message_sep": "",
|
|
6859
|
-
"inter_message_sep": "<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|>",
|
|
6860
|
-
"stop_token_ids": [
|
|
6861
|
-
6,
|
|
6862
|
-
255001
|
|
6863
|
-
]
|
|
6864
|
-
}
|
|
6865
|
-
},
|
|
6866
|
-
{
|
|
6867
|
-
"version": 1,
|
|
6868
|
-
"context_length": 131072,
|
|
6869
|
-
"model_name": "c4ai-command-r-v01-4bit",
|
|
6870
|
-
"model_lang": [
|
|
6871
|
-
"en",
|
|
6872
|
-
"fr",
|
|
6873
|
-
"de",
|
|
6874
|
-
"es",
|
|
6875
|
-
"it",
|
|
6876
|
-
"pt",
|
|
6877
|
-
"ja",
|
|
6878
|
-
"ko",
|
|
6879
|
-
"zh",
|
|
6880
|
-
"ar"
|
|
6881
|
-
],
|
|
6882
|
-
"model_ability": [
|
|
6883
|
-
"generate"
|
|
6884
|
-
],
|
|
6885
|
-
"model_description": "This model is 4bit quantized version of C4AI Command-R using bitsandbytes.",
|
|
6886
|
-
"model_specs": [
|
|
6887
7784
|
{
|
|
6888
7785
|
"model_format": "pytorch",
|
|
6889
|
-
"model_size_in_billions":
|
|
7786
|
+
"model_size_in_billions": 104,
|
|
6890
7787
|
"quantizations": [
|
|
6891
|
-
"
|
|
7788
|
+
"4-bit"
|
|
6892
7789
|
],
|
|
6893
|
-
"model_id": "CohereForAI/c4ai-command-r-
|
|
6894
|
-
"model_revision": "
|
|
7790
|
+
"model_id": "CohereForAI/c4ai-command-r-plus-4bit",
|
|
7791
|
+
"model_revision": "bb63b5b7005ecedb30b0cfd0d5953b02a5817f7b"
|
|
6895
7792
|
},
|
|
6896
7793
|
{
|
|
6897
|
-
"model_format": "
|
|
7794
|
+
"model_format": "gptq",
|
|
6898
7795
|
"model_size_in_billions": 104,
|
|
6899
7796
|
"quantizations": [
|
|
6900
|
-
"
|
|
7797
|
+
"Int4"
|
|
6901
7798
|
],
|
|
6902
|
-
"model_id": "
|
|
6903
|
-
"model_revision": "
|
|
7799
|
+
"model_id": "alpindale/c4ai-command-r-plus-GPTQ",
|
|
7800
|
+
"model_revision": "35febfc08f723ac0df32480eb4af349a7d08656e"
|
|
6904
7801
|
}
|
|
6905
7802
|
],
|
|
6906
7803
|
"prompt_style": {
|
|
@@ -7148,5 +8045,67 @@
|
|
|
7148
8045
|
160132
|
|
7149
8046
|
]
|
|
7150
8047
|
}
|
|
8048
|
+
},
|
|
8049
|
+
{
|
|
8050
|
+
"version": 1,
|
|
8051
|
+
"context_length": 32768,
|
|
8052
|
+
"model_name": "csg-wukong-chat-v0.1",
|
|
8053
|
+
"model_lang": [
|
|
8054
|
+
"en"
|
|
8055
|
+
],
|
|
8056
|
+
"model_ability": [
|
|
8057
|
+
"chat"
|
|
8058
|
+
],
|
|
8059
|
+
"model_description": "csg-wukong-1B is a 1 billion-parameter small language model(SLM) pretrained on 1T tokens.",
|
|
8060
|
+
"model_specs": [
|
|
8061
|
+
{
|
|
8062
|
+
"model_format": "pytorch",
|
|
8063
|
+
"model_size_in_billions": 1,
|
|
8064
|
+
"quantizations": [
|
|
8065
|
+
"none"
|
|
8066
|
+
],
|
|
8067
|
+
"model_id": "opencsg/csg-wukong-1B-chat-v0.1",
|
|
8068
|
+
"model_revision": "2443c903d46074af0856e2ba11398dcd01d35536"
|
|
8069
|
+
},
|
|
8070
|
+
{
|
|
8071
|
+
"model_format": "ggufv2",
|
|
8072
|
+
"model_size_in_billions": 1,
|
|
8073
|
+
"quantizations": [
|
|
8074
|
+
"Q2_K",
|
|
8075
|
+
"Q3_K",
|
|
8076
|
+
"Q3_K_S",
|
|
8077
|
+
"Q3_K_M",
|
|
8078
|
+
"Q3_K_L",
|
|
8079
|
+
"Q4_0",
|
|
8080
|
+
"Q4_1",
|
|
8081
|
+
"Q4_K_S",
|
|
8082
|
+
"Q4_K_M",
|
|
8083
|
+
"Q5_0",
|
|
8084
|
+
"Q5_1",
|
|
8085
|
+
"Q5_K_S",
|
|
8086
|
+
"Q5_K_M",
|
|
8087
|
+
"Q6_K",
|
|
8088
|
+
"Q8_0"
|
|
8089
|
+
],
|
|
8090
|
+
"model_id": "RichardErkhov/opencsg_-_csg-wukong-1B-chat-v0.1-gguf",
|
|
8091
|
+
"model_file_name_template": "csg-wukong-1B-chat-v0.1.{quantization}.gguf"
|
|
8092
|
+
}
|
|
8093
|
+
],
|
|
8094
|
+
"prompt_style": {
|
|
8095
|
+
"style_name": "NO_COLON_TWO",
|
|
8096
|
+
"system_prompt": "<|system|>\nYou are a creative super artificial intelligence assistant, possessing all the knowledge of humankind. Your name is csg-wukong, developed by OpenCSG. You need to understand and infer the true intentions of users based on the topics discussed in the chat history, and respond to user questions correctly as required. You enjoy responding to users with accurate and insightful answers. Please pay attention to the appropriate style and format when replying, try to avoid repetitive words and sentences, and keep your responses as concise and profound as possible. You carefully consider the context of the discussion when replying to users. When the user says \"continue,\" please proceed with the continuation of the previous assistant's response.</s>\n",
|
|
8097
|
+
"roles": [
|
|
8098
|
+
"<|user|>\n",
|
|
8099
|
+
"<|assistant|>\n"
|
|
8100
|
+
],
|
|
8101
|
+
"intra_message_sep": "</s>\n",
|
|
8102
|
+
"inter_message_sep": "</s>\n",
|
|
8103
|
+
"stop_token_ids": [
|
|
8104
|
+
2
|
|
8105
|
+
],
|
|
8106
|
+
"stop": [
|
|
8107
|
+
"</s>"
|
|
8108
|
+
]
|
|
8109
|
+
}
|
|
7151
8110
|
}
|
|
7152
8111
|
]
|